libosmscout 1.1.1
Loading...
Searching...
No Matches
AreaIndexGenerator.h
Go to the documentation of this file.
1#ifndef LIBOSMSCOUT_AREAINDEXGENERATOR_H
2#define LIBOSMSCOUT_AREAINDEXGENERATOR_H
3
4/*
5 This source is part of the libosmscout library
6 Copyright (C) 2011 Tim Teulings
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at your option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with this library; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21*/
22
24
25#include <list>
26#include <map>
27#include <utility>
28
29#include <osmscout/Pixel.h>
30
32
33#include <osmscout/io/File.h>
35
38
40
41namespace osmscout {
42
46 template <typename Object>
48 {
49 protected:
50 using CoordCountMap = std::map<TileId, size_t>;
51 using CoordOffsetsMap = std::map<TileId, std::list<FileOffset>>;
52
53 struct TypeData
54 {
55 MagnificationLevel indexLevel{0};
56 size_t indexCells=0;
57 size_t indexEntries=0;
58
59 TileIdBox tileBox{TileId(0,0),TileId(0,0)};
60
62
63 inline bool HasEntries()
64 {
65 return indexCells>0 &&
67 }
68 };
69
70 private:
71 std::string typeName;
72 std::string typeNamePlural;
73 std::string dataFile;
74 std::string indexFile;
75
76 protected:
77 AreaIndexGenerator(const std::string& typeName,
78 const std::string& typeNamePlural,
79 const std::string& dataFile,
80 const std::string& indexFile):
81 typeName(typeName),
82 typeNamePlural(typeNamePlural),
83 dataFile(dataFile),
84 indexFile(indexFile)
85 {}
86
87 virtual bool FitsIndexCriteria(Progress& progress,
88 const TypeInfo& typeInfo,
89 const CoordCountMap& cellFillCount) const;
90
99 void CalculateStatistics(const MagnificationLevel& level,
100 TypeData& typeData,
101 const CoordCountMap& cellFillCount) const;
102
103 bool CalculateDistribution(const TypeConfig& typeConfig,
104 const ImportParameter& parameter,
105 Progress& progress,
106 const std::vector<TypeInfoRef>& types,
107 std::vector<TypeData>& typeData,
108 const MagnificationLevel& minLevelParam,
109 const MagnificationLevel& maxLevelParam,
110 bool useMmap,
111 MagnificationLevel& maxLevel) const;
112
124 void WriteBitmap(Progress& progress,
125 FileWriter& writer,
126 const TypeInfo& typeInfo,
127 const TypeData& typeData,
128 const CoordOffsetsMap& typeCellOffsets);
129
130 virtual void WriteTypeId(const TypeConfigRef& typeConfig,
131 const TypeInfoRef &type,
132 FileWriter &writer) const = 0;
133
134 bool MakeAreaIndex(const TypeConfigRef& typeConfig,
135 const ImportParameter& parameter,
136 Progress& progress,
137 const std::vector<TypeInfoRef> &types,
138 const MagnificationLevel &areaIndexMinMag,
139 const MagnificationLevel &areaIndexMaxMag,
140 bool useMmap);
141 };
142
143 template <typename Object>
145 FileWriter& writer,
146 const TypeInfo& typeInfo,
147 const TypeData& typeData,
148 const CoordOffsetsMap& typeCellOffsets)
149 {
150 size_t dataSize=0;
151 std::array<char,10> buffer;
152
153 //
154 // Calculate the number of entries and the overall size of the data in the bitmap entries
155 // We need the overall size of the bitmap entry data, because we would store the file offset only with
156 // that much bytes we need to address the last data entry.
157
158 for (const auto& cell : typeCellOffsets) {
159 dataSize+=EncodeNumber(cell.second.size(),
160 buffer);
161
162 FileOffset previousOffset=0;
163
164 for (const auto& offset : cell.second) {
165 FileOffset data=offset-previousOffset;
166
167 dataSize+=EncodeNumber(data,
168 buffer);
169
170 previousOffset=offset;
171 }
172 }
173
174 // "+1" because we add +1 to every offset, to generate offset > 0
175 uint8_t dataOffsetBytes=BytesNeededToEncodeNumber(dataSize+1);
176
177
178 GeoBox boundingBox=typeData.tileBox.GetCenter().GetBoundingBox(typeData.indexLevel);
179
180 progress.Info("Writing map for "+
181 typeInfo.GetName()+
182 " ("+
183 ByteSizeToString(1.0*dataOffsetBytes*typeData.tileBox.GetCount()+dataSize)+", "+
184 GetEllipsoidalDistance(boundingBox.GetTopLeft(),boundingBox.GetBottomRight()).AsString()+", "+
185 std::to_string(typeData.indexEntries/typeData.indexCells)+"/cell"+
186 ")");
187
188 FileOffset bitmapOffset=writer.GetPos();
189
190 assert(typeData.indexOffset!=0);
191
192 writer.SetPos(typeData.indexOffset);
193
194 writer.WriteFileOffset(bitmapOffset);
195 writer.Write(dataOffsetBytes);
196
197 writer.SetPos(bitmapOffset);
198
199 // Write the bitmap with offsets for each cell
200 // We prefill with zero and only overwrite cells that have data
201 // So zero means "no data for this cell"
202 for (size_t i=0; i<typeData.tileBox.GetCount(); i++) {
203 writer.WriteFileOffset(0,
204 dataOffsetBytes);
205 }
206
207 FileOffset dataStartOffset=writer.GetPos();
208
209 // Now write the list of offsets of objects for every cell with content
210 for (const auto& cell : typeCellOffsets) {
211 FileOffset bitmapCellOffset=bitmapOffset+
212 ((cell.first.GetY()-typeData.tileBox.GetMinY())*typeData.tileBox.GetWidth()+
213 cell.first.GetX()-typeData.tileBox.GetMinX())*(FileOffset)dataOffsetBytes;
214 FileOffset previousOffset=0;
215
216 assert(bitmapCellOffset>=bitmapOffset);
217
218 FileOffset cellOffset=writer.GetPos();
219
220 writer.SetPos(bitmapCellOffset);
221
222 assert(cellOffset>bitmapCellOffset);
223
224 // We add +1 to make sure, that we can differentiate between "0" as "no entry" and "0" as first data entry.
225 writer.WriteFileOffset(cellOffset-dataStartOffset+1,dataOffsetBytes);
226
227 writer.SetPos(cellOffset);
228
229 writer.WriteNumber((uint32_t)cell.second.size());
230
231 // FileOffsets are already in increasing order, since
232 // File is scanned from start to end
233 for (const auto& offset : cell.second) {
234 assert(offset>previousOffset);
235
236 writer.WriteNumber((FileOffset)(offset-previousOffset));
237
238 previousOffset=offset;
239 }
240 }
241 }
242
243 template <typename Object>
245 const TypeInfo& typeInfo,
246 const CoordCountMap& cellFillCount) const
247 {
248 if (cellFillCount.empty()) {
249 return true;
250 }
251
252 size_t overallCount=0;
253 size_t maxCellCount=0;
254
255 for (const auto& cell : cellFillCount) {
256 overallCount+=cell.second;
257 maxCellCount=std::max(maxCellCount,cell.second);
258 }
259
260 // Average number of entries per tile cell
261 double average=double(overallCount)/double(cellFillCount.size());
262
263 size_t emptyCount=0;
264 size_t tooLowCount=0;
265 size_t tooHighCount=0;
266 size_t muchTooHighCount=0;
267 size_t okCount=0;
268 size_t allCount=0;
269
270 size_t tooLowValue=4*average/10;
271 size_t tooHighValue=64+32;
272 size_t muchTooHighValue=128+64;
273
274 for (const auto& cell : cellFillCount) {
275 allCount++;
276
277 if (cell.second==0) {
278 emptyCount++;
279 }
280 else if (cell.second<tooLowValue) {
281 tooLowCount++;
282 }
283 else if (cell.second>muchTooHighValue) {
284 muchTooHighCount++;
285 }
286 else if (cell.second>tooHighValue) {
287 tooHighCount++;
288 }
289 else {
290 okCount++;
291 }
292 }
293
294 progress.Info(typeInfo.GetName()+" "+
295 std::to_string(emptyCount)+" | "+
296 std::to_string(tooLowCount)+" < "+
297 std::to_string(okCount)+" < "+
298 std::to_string(tooHighCount)+" *"+
299 std::to_string(muchTooHighCount)+"* - "+
300 std::to_string(allCount));
301
302 if (double(muchTooHighCount) / double(allCount) >= 0.01) {
303 progress.Warning(typeInfo.GetName() + " has more than 1% cells with much too high entry count, will use smaller tile size");
304 return false;
305 }
306
307 if (double(tooHighCount) / double(allCount) >= 0.05) {
308 progress.Warning(typeInfo.GetName() + " has more than 5% cells with too high entry count, will use smaller tile size");
309 return false;
310 }
311
312 if (double(tooLowCount) / double(allCount) >= 0.2) {
313 progress.Warning(typeInfo.GetName() + " has more than 20% cells with <40% of average filling");
314 }
315
316 /*
317 // If the fill rate of the index is too low, we use this index level anyway
318 if (fillRate<parameter.GetAreaWayIndexMinFillRate()) {
319 progress.Warning(typeInfo.GetName()+" is not well distributed");
320 return true;
321 }
322
323 // If average fill size and max fill size for tile cells
324 // is within limits, store it now.
325 if (maxCellCount<=parameter.GetAreaWayIndexCellSizeMax() &&
326 average<=parameter.GetAreaWayIndexCellSizeAverage()) {
327 return true;
328 }*/
329
330 return true;
331 }
332
333 template <typename Object>
335 const ImportParameter& parameter,
336 Progress& progress,
337 const std::vector<TypeInfoRef> &types,
338 const MagnificationLevel &areaIndexMinMag,
339 const MagnificationLevel &areaIndexMaxMag,
340 bool useMmap)
341 {
342 using namespace std::string_literals;
343
344 FileScanner scanner;
345 FileWriter writer;
346 std::vector<TypeData> typeData;
347 MagnificationLevel maxLevel;
348
349 progress.Info("Minimum magnification: "s + areaIndexMinMag);
350
351 //
352 // Scanning distribution
353 //
354
355 progress.SetAction("Scanning level distribution of "s + typeName + " types"s);
356
357 if (!CalculateDistribution(*typeConfig,
358 parameter,
359 progress,
360 types,
361 typeData,
362 areaIndexMinMag,
363 areaIndexMaxMag,
364 useMmap,
365 maxLevel)) {
366 return false;
367 }
368
369 // Calculate number of types which have data
370
371 auto indexEntries=std::count_if(types.begin(),
372 types.end(),
373 [&typeData](const TypeInfoRef& type) {
374 return typeData[type->GetIndex()].HasEntries();
375 });
376
377 //
378 // Writing index file
379 //
380
381 progress.SetAction("Generating '"s + indexFile + "'"s);
382
383 try {
384 writer.Open(AppendFileToDir(parameter.GetDestinationDirectory(),indexFile));
385
386 writer.Write((uint32_t)indexEntries);
387
388 for (const auto &type : types) {
389 size_t i=type->GetIndex();
390
391 if (typeData[i].HasEntries()) {
392 uint8_t dataOffsetBytes=0;
393 FileOffset bitmapOffset=0;
394
395 WriteTypeId(typeConfig,
396 type,
397 writer);
398
399 typeData[i].indexOffset=writer.GetPos();
400
401 writer.WriteFileOffset(bitmapOffset);
402 writer.Write(dataOffsetBytes);
403 writer.WriteNumber(typeData[i].indexLevel);
404 writer.WriteNumber(typeData[i].tileBox.GetMinX());
405 writer.WriteNumber(typeData[i].tileBox.GetMaxX());
406 writer.WriteNumber(typeData[i].tileBox.GetMinY());
407 writer.WriteNumber(typeData[i].tileBox.GetMaxY());
408 }
409 }
410
411 scanner.Open(AppendFileToDir(parameter.GetDestinationDirectory(),
412 dataFile),
413 FileScanner::Sequential,
414 useMmap);
415
416 for (MagnificationLevel l=areaIndexMinMag; l <= maxLevel; l++) {
417 Magnification magnification(l);
418 TypeInfoSet indexTypes(*typeConfig);
419
420 scanner.GotoBegin();
421
422 for (const auto &type : types) {
423 if (typeData[type->GetIndex()].HasEntries() &&
424 typeData[type->GetIndex()].indexLevel==l) {
425 indexTypes.Set(type);
426 }
427 }
428
429 if (indexTypes.Empty()) {
430 continue;
431 }
432
433 progress.Info("Scanning "s + typeNamePlural + " for index level "s + l);
434
435 std::vector<CoordOffsetsMap> typeCellOffsets(typeConfig->GetTypeCount());
436
437 uint32_t objectCount=scanner.ReadUInt32();
438
439 Object obj;
440
441 for (uint32_t w=1; w <= objectCount; w++) {
442 progress.SetProgress(w, objectCount);
443
444 FileOffset offset=scanner.GetPos();
445
446 obj.Read(*typeConfig,
447 scanner);
448
449 if (!indexTypes.IsSet(obj.GetType())) {
450 continue;
451 }
452
453 TileIdBox box(magnification, obj.GetBoundingBox());
454
455 for (const auto& tileId : box) {
456 typeCellOffsets[obj.GetType()->GetIndex()][tileId].push_back(offset);
457 }
458 }
459
460 for (const auto &type : indexTypes) {
461 size_t index=type->GetIndex();
462
463 WriteBitmap(progress,
464 writer,
465 *typeConfig->GetTypeInfo(index),
466 typeData[index],
467 typeCellOffsets[index]);
468 }
469 }
470
471 scanner.Close();
472 writer.Close();
473 }
474 catch (IOException& e) {
475 progress.Error(e.GetDescription());
476
477 scanner.CloseFailsafe();
478 writer.CloseFailsafe();
479
480 return false;
481 }
482
483 return true;
484 }
485
486 template <typename Object>
487 void AreaIndexGenerator<Object>::CalculateStatistics(const MagnificationLevel& level,
488 TypeData& typeData,
489 const CoordCountMap& cellFillCount) const
490 {
491 // Initialize/reset data structure
492 typeData.indexLevel=level;
493 typeData.indexCells=cellFillCount.size();
494 typeData.indexEntries=0;
495
496 // If we do not have any entries, we are done ;-)
497 if (cellFillCount.empty()) {
498 return;
499 }
500
501 typeData.tileBox=TileIdBox(cellFillCount.begin()->first,cellFillCount.begin()->first);
502
503 for (const auto& cell : cellFillCount) {
504 typeData.indexEntries+=cell.second;
505
506 typeData.tileBox=typeData.tileBox.Include(cell.first);
507 }
508 }
509
510 template <typename Object>
511 bool AreaIndexGenerator<Object>::CalculateDistribution(const TypeConfig& typeConfig,
512 const ImportParameter& parameter,
513 Progress& progress,
514 const std::vector<TypeInfoRef>& types,
515 std::vector<TypeData>& typeData,
516 const MagnificationLevel& minLevelParam,
517 const MagnificationLevel& maxLevelParam,
518 bool useMmap,
519 MagnificationLevel& maxLevel) const
520 {
521 FileScanner scanner;
522 TypeInfoSet remainingObjectTypes;
523 MagnificationLevel level=minLevelParam;
524
525 maxLevel=MagnificationLevel(0);
526 typeData.resize(typeConfig.GetTypeCount());
527
528 try {
529 scanner.Open(AppendFileToDir(parameter.GetDestinationDirectory(),
530 dataFile),
531 FileScanner::Sequential,
532 useMmap);
533
534 remainingObjectTypes.Set(types);
535
536 while (!remainingObjectTypes.Empty() &&
537 level <= maxLevelParam) {
538 Magnification magnification(level);
539 TypeInfoSet currentObjectTypes(remainingObjectTypes);
540 std::vector<CoordCountMap> cellFillCount(typeConfig.GetTypeCount());
541
542 progress.Info("Scanning Level " + level + " (" + std::to_string(remainingObjectTypes.Size()) + " types remaining)");
543
544 scanner.GotoBegin();
545
546 uint32_t objectCount=scanner.ReadUInt32();
547
548 Object obj;
549
550 for (uint32_t objI=1; objI <= objectCount; objI++) {
551 progress.SetProgress(objI, objectCount);
552
553 obj.Read(typeConfig,
554 scanner);
555
556 // Count number of entries per current type and coordinate
557 if (!currentObjectTypes.IsSet(obj.GetType())) {
558 continue;
559 }
560
561 GeoBox boundingBox=obj.GetBoundingBox();
562
563 TileIdBox box(TileId::GetTile(magnification,boundingBox.GetMinCoord()),
564 TileId::GetTile(magnification,boundingBox.GetMaxCoord()));
565
566 for (const auto& tileId : box) {
567 cellFillCount[obj.GetType()->GetIndex()][tileId]++;
568 }
569 }
570
571 // Check if cell fill for current type is in defined limits
572 for (const auto &type : currentObjectTypes) {
573 size_t typeIndex=type->GetIndex();
574
575 if (!FitsIndexCriteria(progress,
576 *typeConfig.GetTypeInfo(typeIndex),
577 cellFillCount[typeIndex])) {
578 if (level < maxLevelParam) {
579 currentObjectTypes.Remove(type);
580 }
581 else {
582 progress.Warning(typeConfig.GetTypeInfo(typeIndex)->GetName()+" still does not fit good index criteria");
583 }
584 }
585 }
586
587 for (const auto &type : currentObjectTypes) {
588 size_t typeIndex=type->GetIndex();
589
591 typeData[typeIndex],
592 cellFillCount[typeIndex]);
593
594 maxLevel=std::max(maxLevel,level);
595
596 progress.Info("Type " + type->GetName() + ", " +
597 std::to_string(typeData[type->GetIndex()].indexCells) + " cells, " +
598 std::to_string(typeData[type->GetIndex()].indexEntries) + " objects");
599
600 remainingObjectTypes.Remove(type);
601 }
602
603 level++;
604 }
605
606 scanner.Close();
607 }
608 catch (IOException& e) {
609 progress.Error(e.GetDescription());
610 return false;
611 }
612
613 return true;
614 }
615
616}
617
618#endif //LIBOSMSCOUT_AREAINDEXGENERATOR_H
std::map< TileId, size_t > CoordCountMap
Definition AreaIndexGenerator.h:50
AreaIndexGenerator(const std::string &typeName, const std::string &typeNamePlural, const std::string &dataFile, const std::string &indexFile)
Definition AreaIndexGenerator.h:77
void CalculateStatistics(const MagnificationLevel &level, TypeData &typeData, const CoordCountMap &cellFillCount) const
Definition AreaIndexGenerator.h:487
std::map< TileId, std::list< FileOffset > > CoordOffsetsMap
Definition AreaIndexGenerator.h:51
bool CalculateDistribution(const TypeConfig &typeConfig, const ImportParameter &parameter, Progress &progress, const std::vector< TypeInfoRef > &types, std::vector< TypeData > &typeData, const MagnificationLevel &minLevelParam, const MagnificationLevel &maxLevelParam, bool useMmap, MagnificationLevel &maxLevel) const
Definition AreaIndexGenerator.h:511
virtual bool FitsIndexCriteria(Progress &progress, const TypeInfo &typeInfo, const CoordCountMap &cellFillCount) const
Definition AreaIndexGenerator.h:244
bool MakeAreaIndex(const TypeConfigRef &typeConfig, const ImportParameter &parameter, Progress &progress, const std::vector< TypeInfoRef > &types, const MagnificationLevel &areaIndexMinMag, const MagnificationLevel &areaIndexMaxMag, bool useMmap)
Definition AreaIndexGenerator.h:334
virtual void WriteTypeId(const TypeConfigRef &typeConfig, const TypeInfoRef &type, FileWriter &writer) const =0
void WriteBitmap(Progress &progress, FileWriter &writer, const TypeInfo &typeInfo, const TypeData &typeData, const CoordOffsetsMap &typeCellOffsets)
Definition AreaIndexGenerator.h:144
Definition Exception.h:73
std::string GetDescription() const override
Definition ImportModule.h:101
Definition Progress.h:34
virtual void Error(const std::string &text)
virtual void Info(const std::string &text)
virtual void Warning(const std::string &text)
virtual void SetAction(const std::string &action)
virtual void SetProgress(double current, double total, const std::string &label="")
Definition TileId.h:46
static TileId GetTile(const Magnification &magnification, const GeoCoord &coord)
OSMSCOUT_API std::string AppendFileToDir(const std::string &dir, const std::string &file)
OSMSCOUT_API Distance GetEllipsoidalDistance(double aLon, double aLat, double bLon, double bLat)
uint8_t BytesNeededToEncodeNumber(N number)
Definition Number.h:318
OSMSCOUT_API std::string ByteSizeToString(FileOffset size, const Locale &locale=Locale::ByEnvironmentSafe())
uint64_t FileOffset
Definition OSMScoutTypes.h:46
unsigned int EncodeNumber(N number, char *buffer)
Definition Number.h:145
std::shared_ptr< TypeConfig > TypeConfigRef
Definition TypeConfig.h:1396
Definition Area.h:39
std::shared_ptr< TypeInfo > TypeInfoRef
Definition TypeConfig.h:61
Definition AreaIndexGenerator.h:54
MagnificationLevel indexLevel
Definition AreaIndexGenerator.h:55
TileIdBox tileBox
Number of entries over all cells.
Definition AreaIndexGenerator.h:59
FileOffset indexOffset
Definition AreaIndexGenerator.h:61
size_t indexCells
magnification level of index
Definition AreaIndexGenerator.h:56
size_t indexEntries
Number of filled cells in index.
Definition AreaIndexGenerator.h:57
bool HasEntries()
Position in file where the offset of the bitmap is written to.
Definition AreaIndexGenerator.h:63