libosmscout  1.1.1
GenNumericIndex.h
Go to the documentation of this file.
1 #ifndef OSMSCOUT_IMPORT_GENNUMERICINDEX_H
2 #define OSMSCOUT_IMPORT_GENNUMERICINDEX_H
3 
4 /*
5  This source is part of the libosmscout library
6  Copyright (C) 2009 Tim Teulings
7 
8  This library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Lesser General Public
10  License as published by the Free Software Foundation; either
11  version 2.1 of the License, or (at your option) any later version.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22 
23 #include <vector>
24 
25 
26 #include <osmscout/util/Cache.h>
27 #include <osmscout/util/File.h>
30 #include <osmscout/util/Number.h>
31 #include <osmscout/util/Progress.h>
32 
33 #include <osmscout/import/Import.h>
34 
35 namespace osmscout {
36 
37  template <class N,class T>
39  {
40  private:
41  std::string description;
42  std::string datafile;
43  std::string indexfile;
44 
45  private:
46  void ReadData(const TypeConfig& typeConfig,
47  FileScanner& scanner,
48  T& data) const;
49 
50  public:
51  NumericIndexGenerator(const std::string& description,
52  const std::string& datafile,
53  const std::string& indexfile);
54 
55  ~NumericIndexGenerator() override;
56 
57  bool Import(const TypeConfigRef& typeConfig,
58  const ImportParameter& parameter,
59  Progress& progress) override;
60  };
61 
62  template <class N,class T>
63  NumericIndexGenerator<N,T>::NumericIndexGenerator(const std::string& description,
64  const std::string& datafile,
65  const std::string& indexfile)
66  : description(description),
67  datafile(datafile),
68  indexfile(indexfile)
69  {
70  // no code
71  }
72 
73  template <class N,class T>
75  {
76  // no code
77  }
78 
79  template <class N,class T>
80  void NumericIndexGenerator<N,T>::ReadData(const TypeConfig& typeConfig,
81  FileScanner& scanner,
82  T& data) const
83  {
84  data.Read(typeConfig,
85  scanner);
86  }
87 
88  template <class N,class T>
90  const ImportParameter& parameter,
91  Progress& progress)
92  {
93  FileScanner scanner;
94  FileWriter writer;
95 
96  uint32_t dataCount;
97 
98  std::vector<N> startingIds;
99  std::vector<FileOffset> pageStarts;
100 
101  std::vector<uint32_t> indexPageCounts;
102 
103  FileOffset levelsOffset;
104  FileOffset lastLevelPageStartOffset;
105 
106  FileOffset indexPageCountsOffset;
107  uint32_t pageSize=(uint32_t)parameter.GetNumericIndexPageSize();
108 
109  //
110  // Writing index file
111  //
112 
113  progress.SetAction(std::string("Generating '")+indexfile+"'");
114 
115  try {
116  writer.Open(AppendFileToDir(parameter.GetDestinationDirectory(),
117  indexfile));
118 
119  scanner.Open(AppendFileToDir(parameter.GetDestinationDirectory(),
120  datafile),
121  FileScanner::Sequential,true);
122 
123  dataCount=scanner.ReadUInt32();
124 
125  writer.WriteNumber(pageSize); // Size of one index page in bytes
126  writer.WriteNumber(dataCount); // Number of entries in data file
127 
128  levelsOffset=writer.GetPos();
129  writer.Write((uint32_t)0); // Number of levels
130 
131  lastLevelPageStartOffset=writer.GetPos();
132  writer.WriteFileOffset((FileOffset)0); // Write the starting position of the last page
133 
134  indexPageCountsOffset=writer.GetPos();
135  writer.WriteFileOffset((FileOffset)0); // Write the starting position of list of sizes of each index level
136 
137  writer.FlushCurrentBlockWithZeros(pageSize);
138 
139  progress.Info(std::string("Writing level ")+std::to_string(1)+" ("+std::to_string(dataCount)+" entries)");
140 
141  N lastId=0;
142  FileOffset lastPos=0;
143  uint32_t currentPageSize=0;
144 
145  for (uint32_t d=0; d<dataCount; d++) {
146  progress.SetProgress(d,dataCount);
147 
148  FileOffset readPos;
149  T data;
150 
151  readPos=scanner.GetPos();
152 
153  ReadData(*typeConfig,
154  scanner,
155  data);
156 
157  if (d>0) {
158  if (data.GetId()<=lastId) {
159  progress.Error("Current id "+std::to_string(data.GetId())+" <= last id "+std::to_string(lastId));
160  }
161  assert(data.GetId()>lastId);
162  assert(readPos>lastPos);
163  }
164 
165  if (currentPageSize>0) {
166  std::array<char,10> b1;
167  std::array<char,10> b2;
168  N b1val=data.GetId()-lastId;
169  FileOffset b2val=readPos-lastPos;
170  unsigned int b1size;
171  unsigned int b2size;
172 
173 
174  b1size=EncodeNumber(b1val,b1);
175  b2size=EncodeNumber(b2val,b2);
176 
177  assert(b1size<=10);
178  assert(b2size<=10);
179 
180  if (currentPageSize+b1size+b2size>pageSize) {
181  // Next entry does not fit, fill rest of index page with zeros
182  writer.FlushCurrentBlockWithZeros(pageSize);
183 
184  currentPageSize=0;
185  }
186  else {
187  writer.Write(b1.data(),b1size);
188  writer.Write(b2.data(),b2size);
189 
190  currentPageSize+=b1size+b2size;
191  }
192  }
193 
194  if (currentPageSize==0) {
195  FileOffset writePos=writer.GetPos();
196 
197  startingIds.push_back(data.GetId());
198  pageStarts.push_back(writePos);
199 
200  writer.WriteNumber(data.GetId());
201  writer.WriteNumber(readPos);
202 
203  writePos=writer.GetPos();
204  currentPageSize=writePos%pageSize;
205  }
206 
207  lastId=data.GetId();
208  lastPos=readPos;
209  }
210 
211  writer.FlushCurrentBlockWithZeros(pageSize);
212  indexPageCounts.push_back((uint32_t)pageStarts.size());
213 
214  while (pageStarts.size()>1) {
215  std::vector<N> si(startingIds);
216  std::vector<FileOffset> po(pageStarts);
217 
218  startingIds.clear();
219  pageStarts.clear();
220 
221  progress.Info(std::string("Writing level ")+std::to_string(indexPageCounts.size()+1)+" ("+std::to_string(si.size())+" entries)");
222 
223  size_t currentPageSize=0;
224 
225  for (size_t i=0; i<si.size(); i++) {
226  if (currentPageSize>0) {
227  std::array<char,10> b1;
228  std::array<char,10> b2;
229  N b1val=si[i]-si[i-1];
230  FileOffset b2val=po[i]-po[i-1];
231  unsigned int b1size;
232  unsigned int b2size;
233 
234  b1size=EncodeNumber(b1val,b1);
235  b2size=EncodeNumber(b2val,b2);
236 
237  assert(b1size<=10);
238  assert(b2size<=10);
239 
240  if (currentPageSize+b1size+b2size>pageSize) {
241  // Fill rest of first index page with zeros
242  writer.FlushCurrentBlockWithZeros(pageSize);
243 
244  currentPageSize=0;
245  }
246  else {
247  writer.Write(b1.data(),b1size);
248  writer.Write(b2.data(),b2size);
249 
250  currentPageSize+=b1size+b2size;
251  }
252  }
253 
254  if (currentPageSize==0) {
255  FileOffset writePos;
256 
257  writePos=writer.GetPos();
258 
259  startingIds.push_back(si[i]);
260  pageStarts.push_back(writePos);
261 
262  writer.WriteNumber(si[i]);
263  writer.WriteNumber(po[i]);
264 
265  writePos=writer.GetPos();
266  currentPageSize=writePos%pageSize;
267  }
268  }
269 
270  writer.FlushCurrentBlockWithZeros(pageSize);
271  indexPageCounts.push_back((uint32_t)pageStarts.size());
272  }
273 
274  // If we have data to index, we should have at least one root level index page
275  if (dataCount>0) {
276  assert(pageStarts.size()==1);
277 
278  FileOffset indexPageCountsPos;
279 
280  indexPageCountsPos=writer.GetPos();
281 
282  writer.SetPos(levelsOffset);
283  writer.Write((uint32_t)indexPageCounts.size());
284 
285  writer.SetPos(lastLevelPageStartOffset);
286  writer.WriteFileOffset(pageStarts[0]);
287 
288  writer.SetPos(indexPageCountsOffset);
289  writer.WriteFileOffset(indexPageCountsPos);
290 
291  writer.SetPos(indexPageCountsPos);
292  }
293 
294  progress.Info(std::string("Index for ")+std::to_string(dataCount)+" data elements will be stored in "+std::to_string(indexPageCounts.size())+ " levels");
295  for (size_t level=0; level<indexPageCounts.size(); level++) {
296  size_t levelIndex=indexPageCounts.size()-level-1;
297 
298  progress.Info(std::string("Page count for level ")+std::to_string(level)+" is "+std::to_string(indexPageCounts[levelIndex]));
299  writer.WriteNumber(indexPageCounts[levelIndex]);
300  }
301 
302  scanner.Close();
303  writer.Close();
304  }
305  catch (IOException& e) {
306  progress.Error(e.GetDescription());
307 
308  scanner.CloseFailsafe();
309  writer.CloseFailsafe();
310 
311  return false;
312  }
313 
314  return true;
315  }
316 }
317 
318 #endif
NumericIndexGenerator(const std::string &description, const std::string &datafile, const std::string &indexfile)
Definition: GenNumericIndex.h:63
Definition: Exception.h:72
OSMSCOUT_API std::string AppendFileToDir(const std::string &dir, const std::string &file)
Definition: GenNumericIndex.h:38
Definition: Area.h:38
virtual void Error(const std::string &text)
virtual void SetAction(const std::string &action)
std::string GetDescription() const override
Definition: ImportModule.h:100
unsigned int EncodeNumber(N number, char *buffer)
Definition: Number.h:145
virtual void Info(const std::string &text)
virtual void SetProgress(double current, double total, const std::string &label="")
uint64_t FileOffset
Definition: OSMScoutTypes.h:47
bool Import(const TypeConfigRef &typeConfig, const ImportParameter &parameter, Progress &progress) override
Definition: GenNumericIndex.h:89
Definition: Progress.h:34
std::shared_ptr< TypeConfig > TypeConfigRef
Definition: TypeConfig.h:1227
~NumericIndexGenerator() override
Definition: GenNumericIndex.h:74