libosmscout  1.1.1
NumericIndex.h
Go to the documentation of this file.
1 #ifndef OSMSCOUT_NUMERICINDEX_H
2 #define OSMSCOUT_NUMERICINDEX_H
3 
4 /*
5  This source is part of the libosmscout library
6  Copyright (C) 2009 Tim Teulings
7 
8  This library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Lesser General Public
10  License as published by the Free Software Foundation; either
11  version 2.1 of the License, or (at your option) any later version.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22 
23 #include <mutex>
24 #include <vector>
25 
26 #include <osmscout/util/Cache.h>
27 #include <osmscout/util/File.h>
29 #include <osmscout/util/Logger.h>
30 #include <osmscout/util/Number.h>
31 #include <osmscout/util/String.h>
32 
33 namespace osmscout {
34 
40  template <class N>
42  {
43  private:
47  struct Entry
48  {
49  N startId;
50  FileOffset fileOffset;
51  };
52 
53  struct Page
54  {
55  std::vector<Entry> entries;
56 
57  inline bool IndexIsValid(size_t index) const
58  {
59  return index<entries.size();
60  }
61  };
62 
63  using PageRef = std::shared_ptr<Page>;
65  using PageSimpleCache = std::unordered_map<N, PageRef>;
66 
70  struct NumericIndexCacheValueSizer : public PageCache::ValueSizer
71  {
72  size_t GetSize(const PageRef& value) const
73  {
74  return sizeof(value)+sizeof(Page)+sizeof(Entry)*value->entries.size();
75  }
76  };
77 
78  private:
79  std::string filepart;
80  std::string filename;
81 
82  mutable FileScanner scanner;
83 
84  size_t cacheSize;
85  uint32_t pageSize;
86  uint32_t levels;
87  std::vector<uint32_t> pageCounts;
88  char *buffer;
89 
90  PageRef root;
91  size_t simpleCacheMaxLevel;
92  mutable std::vector<PageSimpleCache> simplePageCache;
93  mutable std::vector<PageCache> pageCaches;
94 
95  mutable std::mutex accessMutex;
96 
97  private:
98  size_t GetPageIndex(const Page& page, N id) const;
99  void ReadPage(FileOffset offset, PageRef& page) const;
100  void InitializeCache();
101 
102  public:
103  NumericIndex(const std::string& filename,
104  size_t cacheSize);
105  virtual ~NumericIndex();
106 
107  bool Open(const std::string& path,
108  bool memoryMapped);
109  bool Close();
110 
111  bool IsOpen() const;
112 
113  bool GetOffset(const N& id, FileOffset& offset) const;
114 
115  template<typename IteratorIn>
116  bool GetOffsets(IteratorIn begin,
117  IteratorIn end,
118  size_t size,
119  std::vector<FileOffset>& offsets) const;
120 
121  void DumpStatistics() const;
122  };
123 
124  template <class N>
125  NumericIndex<N>::NumericIndex(const std::string& filename,
126  size_t cacheSize)
127  : filepart(filename),
128  cacheSize(cacheSize),
129  pageSize(0),
130  levels(0),
131  buffer(nullptr)
132  {
133  // no code
134  }
135 
136  template <class N>
138  {
139  Close();
140 
141  delete [] buffer;
142  }
143 
147  template <class N>
148  inline size_t NumericIndex<N>::GetPageIndex(const Page& page, N id) const
149  {
150  size_t size=page.entries.size();
151 
152  //std::cout << "Lookup in page: " << page.entries[0].startId << " " << id << " " << page.entries[page.entries.size()-1].startId << std::endl;
153 
154  if (size>0) {
155  size_t left=0;
156  size_t right=size-1;
157 
158  while (left<=right) {
159  size_t mid=(left+right)/2;
160 
161  if (page.entries[mid].startId<=id &&
162  (mid+1>=size || page.entries[mid+1].startId>id)) {
163  return mid;
164  }
165 
166  if (page.entries[mid].startId<id) {
167  left=mid+1;
168  }
169  else {
170  if (mid==0) {
171  return size;
172  }
173 
174  right=mid-1;
175  }
176  }
177  }
178 
179  return size;
180  }
181 
182  template <class N>
183  inline void NumericIndex<N>::ReadPage(FileOffset offset, PageRef& page) const
184  {
185  if (!page) {
186  page=std::make_shared<Page>();
187  }
188  else {
189  page->entries.clear();
190  }
191 
192  page->entries.reserve(pageSize/4);
193 
194  scanner.SetPos(offset);
195 
196  scanner.Read(buffer,
197  pageSize);
198 
199  size_t currentPos=0;
200  N prevId=0;
201  FileOffset prefFileOffset=0;
202 
203  //std::cout << "Page: " << offset << std::endl;
204 
205  while (currentPos<pageSize &&
206  buffer[currentPos]!=0) {
207  unsigned int idBytes;
208  unsigned int fileOffsetBytes;
209  N curId;
210  FileOffset curFileOffset;
211  Entry entry;
212 
213  idBytes=DecodeNumber(&buffer[currentPos],
214  curId);
215 
216  currentPos+=idBytes;
217 
218  fileOffsetBytes=DecodeNumber(&buffer[currentPos],
219  curFileOffset);
220 
221  currentPos+=fileOffsetBytes;
222 
223  entry.startId=prevId+curId;
224  entry.fileOffset=prefFileOffset+curFileOffset;
225 
226  //std::cout << "- " << entry.startId << " " << idBytes << " " << entry.fileOffset << " " << fileOffsetBytes << std::endl;
227 
228  prevId=entry.startId;
229  prefFileOffset=entry.fileOffset;
230 
231  page->entries.push_back(entry);
232  }
233  }
234 
235  template <class N>
236  void NumericIndex<N>::InitializeCache()
237  {
238  size_t currentCacheSize=cacheSize; // Available free space in cache
239  size_t requiredCacheSize=0; // Space needed for caching everything
240 
241  for (const auto count : pageCounts) {
242  requiredCacheSize+=count;
243  }
244 
245  if (requiredCacheSize>cacheSize) {
246  log.Warn() << "Warning: Index " << filepart << " has cache size " << cacheSize<< ", but requires cache size " << requiredCacheSize << " to load index completely into cache!";
247  }
248 
249  simpleCacheMaxLevel=0;
250  for (size_t level=1; level<pageCounts.size(); level++) {
251  size_t resultingCacheSize; // Cache size we actually use for this level
252 
253  simplePageCache.push_back(PageSimpleCache());
254 
255  if (pageCounts[level]>currentCacheSize) {
256  resultingCacheSize=currentCacheSize;
257  currentCacheSize=0;
258 
259  pageCaches.push_back(PageCache(resultingCacheSize));
260  }
261  else {
262  resultingCacheSize=pageCounts[level];
263  currentCacheSize-=pageCounts[level];
264 
265  simpleCacheMaxLevel=level;
266 
267  pageCaches.push_back(PageCache(0));
268  }
269  }
270  }
271 
272  template <class N>
273  bool NumericIndex<N>::Open(const std::string& path,
274  bool memoryMapped)
275  {
276  filename=AppendFileToDir(path,filepart);
277 
278  try {
279  scanner.Open(filename,
280  FileScanner::FastRandom,
281  memoryMapped);
282 
283  pageSize=scanner.ReadUInt32Number(); // Size of one index page
284  /*uint32_t entries=*/scanner.ReadUInt32Number(); // Number of entries in data file
285 
286  levels=scanner.ReadUInt32(); // Number of levels
287  pageCounts.resize(levels);
288 
289  FileOffset lastLevelPageStart=scanner.ReadFileOffset(); // Start of top level index page
290  FileOffset indexPageCountsOffset=scanner.ReadFileOffset(); // Start of list of sizes of index levels
291 
292  scanner.SetPos(indexPageCountsOffset);
293  for (size_t level=0; level<levels; level++) {
294  pageCounts[level]=scanner.ReadUInt32Number();
295  }
296 
297  delete [] buffer;
298  buffer=new char[pageSize];
299 
300  //std::cout << "Index " << filename << ": " << entries << " entries to index, " << levels << " levels, pageSize " << pageSize << ", cache size " << cacheSize << std::endl;
301 
302  ReadPage(lastLevelPageStart,root);
303 
304  InitializeCache();
305  }
306  catch (IOException& e) {
307  log.Error() << e.GetDescription();
308  scanner.CloseFailsafe();
309  return false;
310  }
311 
312  return !scanner.HasError();
313  }
314 
315  template <class N>
317  {
318  try {
319  if (scanner.IsOpen()) {
320  scanner.Close();
321  }
322  }
323  catch (IOException& e) {
324  log.Error() << e.GetDescription();
325  scanner.CloseFailsafe();
326  return false;
327  }
328 
329  return true;
330  }
331 
332  template <class N>
334  {
335  return scanner.IsOpen();
336  }
337 
343  template <class N>
344  bool NumericIndex<N>::GetOffset(const N& id,
345  FileOffset& offset) const
346  {
347  try
348  {
349  std::lock_guard<std::mutex> lock(accessMutex);
350 
351  //std::cout << "Looking up " << id << " in index...." << std::endl;
352 
353  size_t r=GetPageIndex(*root,id);
354  PageRef pageRef;
355 
356  if (!root->IndexIsValid(r)) {
357  //std::cerr << "Id " << id << " not found in root index, " << root->entries.front().startId << "-" << root->entries.back().startId << std::endl;
358  return false;
359  }
360 
361  const Entry& rootEntry=root->entries[r];
362 
363  //std::cout << "Root entry index: " << r << " " << rootEntry.startId << " " << rootEntry.fileOffset << std::endl;
364 
365  offset=rootEntry.fileOffset;
366 
367  N startId=rootEntry.startId;
368  for (size_t level=0; level+2<=levels; level++) {
369  //std::cout << "Level " << level << "/" << levels << std::endl;
370  if (level<=simpleCacheMaxLevel) {
371  auto cacheRef=simplePageCache[level].find(startId);
372 
373  if (cacheRef==simplePageCache[level].end()) {
374  pageRef=nullptr; // Make sure, that we allocate a new page and not reuse an old one
375 
376  ReadPage(offset,pageRef);
377 
378  simplePageCache[level].insert(std::make_pair(startId,pageRef));
379  }
380  else {
381  pageRef=cacheRef->second;
382  }
383  }
384  else {
385  typename PageCache::CacheRef cacheRef;
386 
387  if (!pageCaches[level].GetEntry(startId,cacheRef)) {
388  typename PageCache::CacheEntry cacheEntry(startId);
389 
390  cacheRef=pageCaches[level].SetEntry(cacheEntry);
391 
392  ReadPage(offset,cacheRef->value);
393  }
394 
395  pageRef=cacheRef->value;
396  }
397 
398  Page& page=*pageRef;
399 
400  size_t i=GetPageIndex(page,id);
401 
402  if (!page.IndexIsValid(i)) {
403  //std::cerr << "Id " << id << " not found in index level " << level+2 << "!" << std::endl;
404  return false;
405  }
406 
407  const Entry& entry=page.entries[i];
408 
409  //std::cout << "Sub entry index: " << i << " " << entry.startId << " " << entry.fileOffset << std::endl;
410 
411  startId=entry.startId;
412  offset=entry.fileOffset;
413  }
414 
415  if (startId!=id) {
416  //std::cerr << "Id " << id << " not found in leaf index level (" << levels << " levels)" << std::endl;
417  }
418 
419  return startId==id;
420  }
421  catch (IOException& e) {
422  log.Error() << e.GetDescription();
423  return false;
424  }
425  }
426 
432  template <class N>
433  template<typename IteratorIn>
434  bool NumericIndex<N>::GetOffsets(IteratorIn begin,
435  IteratorIn end,
436  size_t size,
437  std::vector<FileOffset>& offsets) const
438  {
439  offsets.clear();
440  offsets.reserve(size);
441 
442  for (IteratorIn idIter=begin; idIter!=end; ++idIter) {
443  FileOffset offset;
444 
445  if (GetOffset(*idIter,
446  offset)) {
447  offsets.push_back(offset);
448  }
449  }
450 
451  return true;
452  }
453 
454  template <class N>
456  {
457  size_t memory=0;
458  size_t pages=0;
459 
460  pages+=1;
461  memory+=root->entries.size()*sizeof(Entry);
462 
463 
464  for (size_t i=0; i<pageCaches.size(); i++) {
465  pages+=pageCaches[i].GetSize();
466  memory+=sizeof(pageCaches[i])+pageCaches[i].GetMemory(NumericIndexCacheValueSizer());
467  }
468 
469  log.Info() << "Index " << filepart << ": " << pages << " pages, memory " << memory;
470  }
471 }
472 
473 #endif
OSMSCOUT_API Log log
Definition: Cache.h:58
bool GetOffset(const N &id, FileOffset &offset) const
Definition: NumericIndex.h:344
bool Open(const std::string &path, bool memoryMapped)
Definition: NumericIndex.h:273
bool Close()
Definition: NumericIndex.h:316
NumericIndex(const std::string &filename, size_t cacheSize)
Definition: NumericIndex.h:125
Definition: NumericIndex.h:41
Prints details for debugging, if debug flag (performance, data) is set in renderer parameter...
Definition: MapPainter.h:58
Log & Info(bool state)
Definition: Logger.h:455
Definition: Exception.h:72
virtual ~NumericIndex()
Definition: NumericIndex.h:137
Log & Error(bool state)
Definition: Logger.h:469
unsigned int DecodeNumber(const char *buffer, N &number)
Definition: Number.h:294
Definition: Cache.h:89
bool GetOffsets(IteratorIn begin, IteratorIn end, size_t size, std::vector< FileOffset > &offsets) const
Definition: NumericIndex.h:434
OSMSCOUT_API std::string AppendFileToDir(const std::string &dir, const std::string &file)
Definition: Area.h:38
void DumpStatistics() const
Definition: NumericIndex.h:455
std::string GetDescription() const override
bool IsOpen() const
Definition: NumericIndex.h:333
uint64_t FileOffset
Definition: OSMScoutTypes.h:47
Log & Warn(bool state)
Definition: Logger.h:462
typename OrderList::iterator CacheRef
Definition: Cache.h:98
Definition: Cache.h:64