libosmscout 1.1.1
Loading...
Searching...
No Matches
NumericIndex.h
Go to the documentation of this file.
1#ifndef OSMSCOUT_NUMERICINDEX_H
2#define OSMSCOUT_NUMERICINDEX_H
3
4/*
5 This source is part of the libosmscout library
6 Copyright (C) 2009 Tim Teulings
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with this library; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21*/
22
23#include <mutex>
24#include <vector>
25
26#include <osmscout/util/Cache.h>
27#include <osmscout/log/Logger.h>
30
31#include <osmscout/io/File.h>
33
34namespace osmscout {
35
41 template <class N>
43 {
44 private:
48 struct Entry
49 {
50 N startId;
51 FileOffset fileOffset;
52 };
53
54 struct Page
55 {
56 std::vector<Entry> entries;
57
58 inline bool IndexIsValid(size_t index) const
59 {
60 return index<entries.size();
61 }
62 };
63
64 using PageRef = std::shared_ptr<Page>;
65 using PageCache = Cache<N, PageRef>;
66 using PageSimpleCache = std::unordered_map<N, PageRef>;
67
71 struct NumericIndexCacheValueSizer : public PageCache::ValueSizer
72 {
73 size_t GetSize(const PageRef& value) const
74 {
75 return sizeof(value)+sizeof(Page)+sizeof(Entry)*value->entries.size();
76 }
77 };
78
79 private:
80 std::string filepart;
81 std::string filename;
82
83 mutable FileScanner scanner;
84
85 size_t cacheSize;
86 uint32_t pageSize=0;
87 uint32_t levels=0;
88 std::vector<uint32_t> pageCounts;
89 char *buffer=nullptr;
90
91 PageRef root;
92 size_t simpleCacheMaxLevel;
93 mutable std::vector<PageSimpleCache> simplePageCache;
94 mutable std::vector<PageCache> pageCaches;
95
96 mutable std::mutex accessMutex;
97
98 private:
99 size_t GetPageIndex(const Page& page, N id) const;
100 void ReadPage(FileOffset offset, PageRef& page) const;
101 void InitializeCache();
102
103 public:
104 NumericIndex(const std::string& filename,
105 size_t cacheSize);
106 virtual ~NumericIndex();
107
108 bool Open(const std::string& path,
109 bool memoryMapped);
110 bool Close();
111
112 bool IsOpen() const;
113
114 bool GetOffset(const N& id, FileOffset& offset) const;
115
116 template<typename IteratorIn>
117 bool GetOffsets(IteratorIn begin,
118 IteratorIn end,
119 size_t size,
120 std::vector<FileOffset>& offsets) const;
121
122 void DumpStatistics() const;
123 };
124
125 template <class N>
126 NumericIndex<N>::NumericIndex(const std::string& filename,
127 size_t cacheSize)
128 : filepart(filename),
129 cacheSize(cacheSize)
130 {
131 // no code
132 }
133
134 template <class N>
136 {
137 Close();
138
139 delete [] buffer;
140 }
141
145 template <class N>
146 inline size_t NumericIndex<N>::GetPageIndex(const Page& page, N id) const
147 {
148 size_t size=page.entries.size();
149
150 //std::cout << "Lookup in page: " << page.entries[0].startId << " " << id << " " << page.entries[page.entries.size()-1].startId << std::endl;
151
152 if (size>0) {
153 size_t left=0;
154 size_t right=size-1;
155
156 while (left<=right) {
157 size_t mid=(left+right)/2;
158
159 if (page.entries[mid].startId<=id &&
160 (mid+1>=size || page.entries[mid+1].startId>id)) {
161 return mid;
162 }
163
164 if (page.entries[mid].startId<id) {
165 left=mid+1;
166 }
167 else {
168 if (mid==0) {
169 return size;
170 }
171
172 right=mid-1;
173 }
174 }
175 }
176
177 return size;
178 }
179
180 template <class N>
181 inline void NumericIndex<N>::ReadPage(FileOffset offset, PageRef& page) const
182 {
183 if (!page) {
184 page=std::make_shared<Page>();
185 }
186 else {
187 page->entries.clear();
188 }
189
190 page->entries.reserve(pageSize/4);
191
192 scanner.SetPos(offset);
193
194 scanner.Read(buffer,
195 pageSize);
196
197 size_t currentPos=0;
198 N prevId=0;
199 FileOffset prefFileOffset=0;
200
201 //std::cout << "Page: " << offset << std::endl;
202
203 while (currentPos<pageSize &&
204 buffer[currentPos]!=0) {
205 unsigned int idBytes;
206 unsigned int fileOffsetBytes;
207 N curId;
208 FileOffset curFileOffset;
209 Entry entry;
210
211 idBytes=DecodeNumber(&buffer[currentPos],
212 curId);
213
214 currentPos+=idBytes;
215
216 fileOffsetBytes=DecodeNumber(&buffer[currentPos],
217 curFileOffset);
218
219 currentPos+=fileOffsetBytes;
220
221 entry.startId=prevId+curId;
222 entry.fileOffset=prefFileOffset+curFileOffset;
223
224 //std::cout << "- " << entry.startId << " " << idBytes << " " << entry.fileOffset << " " << fileOffsetBytes << std::endl;
225
226 prevId=entry.startId;
227 prefFileOffset=entry.fileOffset;
228
229 page->entries.push_back(entry);
230 }
231 }
232
233 template <class N>
234 void NumericIndex<N>::InitializeCache()
235 {
236 size_t currentCacheSize=cacheSize; // Available free space in cache
237 size_t requiredCacheSize=0; // Space needed for caching everything
238
239 for (const auto count : pageCounts) {
240 requiredCacheSize+=count;
241 }
242
243 if (requiredCacheSize>cacheSize) {
244 log.Warn() << "Warning: Index " << filepart << " has cache size " << cacheSize<< ", but requires cache size " << requiredCacheSize << " to load index completely into cache!";
245 }
246
247 simpleCacheMaxLevel=0;
248 for (size_t level=1; level<pageCounts.size(); level++) {
249 size_t resultingCacheSize; // Cache size we actually use for this level
250
251 simplePageCache.push_back(PageSimpleCache());
252
253 if (pageCounts[level]>currentCacheSize) {
254 resultingCacheSize=currentCacheSize;
255 currentCacheSize=0;
256
257 pageCaches.push_back(PageCache(resultingCacheSize));
258 }
259 else {
260 resultingCacheSize=pageCounts[level];
261 currentCacheSize-=pageCounts[level];
262
263 simpleCacheMaxLevel=level;
264
265 pageCaches.push_back(PageCache(0));
266 }
267 }
268 }
269
270 template <class N>
271 bool NumericIndex<N>::Open(const std::string& path,
272 bool memoryMapped)
273 {
274 filename=AppendFileToDir(path,filepart);
275
276 try {
277 scanner.Open(filename,
278 FileScanner::FastRandom,
279 memoryMapped);
280
281 pageSize=scanner.ReadUInt32Number(); // Size of one index page
282 /*uint32_t entries=*/scanner.ReadUInt32Number(); // Number of entries in data file
283
284 levels=scanner.ReadUInt32(); // Number of levels
285 pageCounts.resize(levels);
286
287 FileOffset lastLevelPageStart=scanner.ReadFileOffset(); // Start of top level index page
288 FileOffset indexPageCountsOffset=scanner.ReadFileOffset(); // Start of list of sizes of index levels
289
290 scanner.SetPos(indexPageCountsOffset);
291 for (size_t level=0; level<levels; level++) {
292 pageCounts[level]=scanner.ReadUInt32Number();
293 }
294
295 delete [] buffer;
296 buffer=new char[pageSize];
297
298 //std::cout << "Index " << filename << ": " << entries << " entries to index, " << levels << " levels, pageSize " << pageSize << ", cache size " << cacheSize << std::endl;
299
300 ReadPage(lastLevelPageStart,root);
301
302 InitializeCache();
303 }
304 catch (IOException& e) {
305 log.Error() << e.GetDescription();
306 scanner.CloseFailsafe();
307 return false;
308 }
309
310 return !scanner.HasError();
311 }
312
313 template <class N>
315 {
316 try {
317 if (scanner.IsOpen()) {
318 scanner.Close();
319 }
320 }
321 catch (IOException& e) {
322 log.Error() << e.GetDescription();
323 scanner.CloseFailsafe();
324 return false;
325 }
326
327 return true;
328 }
329
330 template <class N>
332 {
333 return scanner.IsOpen();
334 }
335
341 template <class N>
343 FileOffset& offset) const
344 {
345 try
346 {
347 std::lock_guard<std::mutex> lock(accessMutex);
348
349 //std::cout << "Looking up " << id << " in index...." << std::endl;
350
351 size_t r=GetPageIndex(*root,id);
352 PageRef pageRef;
353
354 if (!root->IndexIsValid(r)) {
355 //std::cerr << "Id " << id << " not found in root index, " << root->entries.front().startId << "-" << root->entries.back().startId << std::endl;
356 return false;
357 }
358
359 const Entry& rootEntry=root->entries[r];
360
361 //std::cout << "Root entry index: " << r << " " << rootEntry.startId << " " << rootEntry.fileOffset << std::endl;
362
363 offset=rootEntry.fileOffset;
364
365 N startId=rootEntry.startId;
366 for (size_t level=0; level+2<=levels; level++) {
367 //std::cout << "Level " << level << "/" << levels << std::endl;
368 if (level<=simpleCacheMaxLevel) {
369 auto cacheRef=simplePageCache[level].find(startId);
370
371 if (cacheRef==simplePageCache[level].end()) {
372 pageRef=nullptr; // Make sure, that we allocate a new page and not reuse an old one
373
374 ReadPage(offset,pageRef);
375
376 simplePageCache[level].emplace(startId,pageRef);
377 }
378 else {
379 pageRef=cacheRef->second;
380 }
381 }
382 else {
383 typename PageCache::CacheRef cacheRef;
384
385 if (!pageCaches[level].GetEntry(startId,cacheRef)) {
386 typename PageCache::CacheEntry cacheEntry(startId);
387
388 cacheRef=pageCaches[level].SetEntry(cacheEntry);
389
390 ReadPage(offset,cacheRef->value);
391 }
392
393 pageRef=cacheRef->value;
394 }
395
396 Page& page=*pageRef;
397
398 size_t i=GetPageIndex(page,id);
399
400 if (!page.IndexIsValid(i)) {
401 //std::cerr << "Id " << id << " not found in index level " << level+2 << "!" << std::endl;
402 return false;
403 }
404
405 const Entry& entry=page.entries[i];
406
407 //std::cout << "Sub entry index: " << i << " " << entry.startId << " " << entry.fileOffset << std::endl;
408
409 startId=entry.startId;
410 offset=entry.fileOffset;
411 }
412
413 if (startId!=id) {
414 //std::cerr << "Id " << id << " not found in leaf index level (" << levels << " levels)" << std::endl;
415 }
416
417 return startId==id;
418 }
419 catch (IOException& e) {
420 log.Error() << e.GetDescription();
421 return false;
422 }
423 }
424
430 template <class N>
431 template<typename IteratorIn>
432 bool NumericIndex<N>::GetOffsets(IteratorIn begin,
433 IteratorIn end,
434 size_t size,
435 std::vector<FileOffset>& offsets) const
436 {
437 offsets.clear();
438 offsets.reserve(size);
439
440 for (IteratorIn idIter=begin; idIter!=end; ++idIter) {
441 FileOffset offset;
442
443 if (GetOffset(*idIter,
444 offset)) {
445 offsets.push_back(offset);
446 }
447 }
448
449 return true;
450 }
451
452 template <class N>
454 {
455 size_t memory=0;
456 size_t pages=0;
457
458 pages+=1;
459 memory+=root->entries.size()*sizeof(Entry);
460
461
462 for (size_t i=0; i<pageCaches.size(); i++) {
463 pages+=pageCaches[i].GetSize();
464 memory+=sizeof(pageCaches[i])+pageCaches[i].GetMemory(NumericIndexCacheValueSizer());
465 }
466
467 log.Info() << "Index " << filepart << ": " << pages << " pages, memory " << memory;
468 }
469}
470
471#endif
Definition Cache.h:59
typename OrderList::iterator CacheRef
Definition Cache.h:98
Definition Exception.h:73
std::string GetDescription() const override
bool IsOpen() const
Definition NumericIndex.h:331
bool Close()
Definition NumericIndex.h:314
bool GetOffsets(IteratorIn begin, IteratorIn end, size_t size, std::vector< FileOffset > &offsets) const
Definition NumericIndex.h:432
virtual ~NumericIndex()
Definition NumericIndex.h:135
bool GetOffset(const N &id, FileOffset &offset) const
Definition NumericIndex.h:342
void DumpStatistics() const
Definition NumericIndex.h:453
bool Open(const std::string &path, bool memoryMapped)
Definition NumericIndex.h:271
NumericIndex(const std::string &filename, size_t cacheSize)
Definition NumericIndex.h:126
OSMSCOUT_API std::string AppendFileToDir(const std::string &dir, const std::string &file)
OSMSCOUT_API Log log
Definition LoggerImpl.h:95
unsigned int DecodeNumber(const char *buffer, N &number)
Definition Number.h:294
uint64_t FileOffset
Definition OSMScoutTypes.h:46
Definition Area.h:39