libosmscout  1.1.1
AreaIndexGenerator.h
Go to the documentation of this file.
1 #ifndef LIBOSMSCOUT_AREAINDEXGENERATOR_H
2 #define LIBOSMSCOUT_AREAINDEXGENERATOR_H
3 
4 /*
5  This source is part of the libosmscout library
6  Copyright (C) 2011 Tim Teulings
7 
8  This library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Lesser General Public
10  License as published by the Free Software Foundation; either
11  version 2.1 of the License, or (at your option) any later version.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22 
23 #include <osmscout/import/Import.h>
24 
25 #include <list>
26 #include <map>
27 
28 #include <osmscout/Pixel.h>
29 
31 #include <osmscout/util/TileId.h>
32 #include <osmscout/util/String.h>
33 #include <osmscout/util/File.h>
35 #include <osmscout/TypeInfoSet.h>
36 
37 namespace osmscout {
38 
42  template <typename Object>
44  {
45  protected:
46  using CoordCountMap = std::map<TileId, size_t>;
47  using CoordOffsetsMap = std::map<TileId, std::list<FileOffset>>;
48 
49  struct TypeData
50  {
51  MagnificationLevel indexLevel{0};
52  size_t indexCells=0;
53  size_t indexEntries=0;
54 
55  TileIdBox tileBox{TileId(0,0),TileId(0,0)};
56 
58 
59  inline bool HasEntries()
60  {
61  return indexCells>0 &&
62  indexEntries>0;
63  }
64  };
65 
66  std::string typeName;
67  std::string typeNamePlural;
68  std::string dataFile;
69  std::string indexFile;
70 
71  protected:
72  AreaIndexGenerator(const std::string &typeName,
73  const std::string &typeNamePlural,
74  const std::string &dataFile,
75  const std::string &indexFile):
80  {}
81 
82  virtual bool FitsIndexCriteria(Progress& progress,
83  const TypeInfo& typeInfo,
84  const CoordCountMap& cellFillCount) const;
85 
94  void CalculateStatistics(const MagnificationLevel& level,
95  TypeData& typeData,
96  const CoordCountMap& cellFillCount) const;
97 
98  bool CalculateDistribution(const TypeConfig& typeConfig,
99  const ImportParameter& parameter,
100  Progress& progress,
101  const std::vector<TypeInfoRef>& types,
102  std::vector<TypeData>& typeData,
103  const MagnificationLevel& minLevelParam,
104  const MagnificationLevel& maxLevelParam,
105  bool useMmap,
106  MagnificationLevel& maxLevel) const;
107 
120  bool WriteBitmap(Progress& progress,
121  FileWriter& writer,
122  const TypeInfo& typeInfo,
123  const TypeData& typeData,
124  const CoordOffsetsMap& typeCellOffsets);
125 
126  virtual void WriteTypeId(const TypeConfigRef& typeConfig,
127  const TypeInfoRef &type,
128  FileWriter &writer) const = 0;
129 
130  bool MakeAreaIndex(const TypeConfigRef& typeConfig,
131  const ImportParameter& parameter,
132  Progress& progress,
133  const std::vector<TypeInfoRef> &types,
134  const MagnificationLevel &areaIndexMinMag,
135  const MagnificationLevel &areaIndexMaxLevel,
136  bool useMmap);
137  };
138 
139  template <typename Object>
141  FileWriter& writer,
142  const TypeInfo& typeInfo,
143  const TypeData& typeData,
144  const CoordOffsetsMap& typeCellOffsets)
145  {
146  size_t indexEntries=0;
147  size_t dataSize=0;
148  std::array<char,10> buffer;
149 
150  //
151  // Calculate the number of entries and the overall size of the data in the bitmap entries
152  // We need the overall size of the bitmap entry data, because we would store the file offset only with
153  // that much bytes we need to address the last data entry.
154 
155  for (const auto& cell : typeCellOffsets) {
156  indexEntries+=cell.second.size();
157 
158  dataSize+=EncodeNumber(cell.second.size(),
159  buffer);
160 
161  FileOffset previousOffset=0;
162 
163  for (const auto& offset : cell.second) {
164  FileOffset data=offset-previousOffset;
165 
166  dataSize+=EncodeNumber(data,
167  buffer);
168 
169  previousOffset=offset;
170  }
171  }
172 
173  // "+1" because we add +1 to every offset, to generate offset > 0
174  uint8_t dataOffsetBytes=BytesNeededToEncodeNumber(dataSize+1);
175 
176  progress.Info("Writing map for "+
177  typeInfo.GetName()+" , "+
178  ByteSizeToString(1.0*dataOffsetBytes*typeData.tileBox.GetCount()+dataSize));
179 
180  FileOffset bitmapOffset;
181 
182  bitmapOffset=writer.GetPos();
183 
184  assert(typeData.indexOffset!=0);
185 
186  writer.SetPos(typeData.indexOffset);
187 
188  writer.WriteFileOffset(bitmapOffset);
189  writer.Write(dataOffsetBytes);
190 
191  writer.SetPos(bitmapOffset);
192 
193  // Write the bitmap with offsets for each cell
194  // We prefill with zero and only overwrite cells that have data
195  // So zero means "no data for this cell"
196  for (size_t i=0; i<typeData.tileBox.GetCount(); i++) {
197  writer.WriteFileOffset(0,
198  dataOffsetBytes);
199  }
200 
201  FileOffset dataStartOffset;
202 
203  dataStartOffset=writer.GetPos();
204 
205  // Now write the list of offsets of objects for every cell with content
206  for (const auto& cell : typeCellOffsets) {
207  FileOffset bitmapCellOffset=bitmapOffset+
208  ((cell.first.GetY()-typeData.tileBox.GetMinY())*typeData.tileBox.GetWidth()+
209  cell.first.GetX()-typeData.tileBox.GetMinX())*(FileOffset)dataOffsetBytes;
210  FileOffset previousOffset=0;
211  FileOffset cellOffset;
212 
213  assert(bitmapCellOffset>=bitmapOffset);
214 
215  cellOffset=writer.GetPos();
216 
217  writer.SetPos(bitmapCellOffset);
218 
219  assert(cellOffset>bitmapCellOffset);
220 
221  // We add +1 to make sure, that we can differentiate between "0" as "no entry" and "0" as first data entry.
222  writer.WriteFileOffset(cellOffset-dataStartOffset+1,dataOffsetBytes);
223 
224  writer.SetPos(cellOffset);
225 
226  writer.WriteNumber((uint32_t)cell.second.size());
227 
228  // FileOffsets are already in increasing order, since
229  // File is scanned from start to end
230  for (const auto& offset : cell.second) {
231  assert(offset>previousOffset);
232 
233  writer.WriteNumber((FileOffset)(offset-previousOffset));
234 
235  previousOffset=offset;
236  }
237  }
238 
239  return true;
240  }
241 
242  template <typename Object>
244  const TypeInfo& typeInfo,
245  const CoordCountMap& cellFillCount) const
246  {
247  if (cellFillCount.empty()) {
248  return true;
249  }
250 
251  size_t overallCount=0;
252  size_t maxCellCount=0;
253 
254  for (const auto& cell : cellFillCount) {
255  overallCount+=cell.second;
256  maxCellCount=std::max(maxCellCount,cell.second);
257  }
258 
259  // Average number of entries per tile cell
260  double average=overallCount*1.0/cellFillCount.size();
261 
262  size_t emptyCount=0;
263  size_t toLowCount=0;
264  size_t toHighCount=0;
265  size_t inCount=0;
266  size_t allCount=0;
267 
268  for (const auto& cell : cellFillCount) {
269  if (cell.second==0) {
270  emptyCount++;
271  }
272  else if (cell.second<0.4*average) {
273  toLowCount++;
274  }
275  else if (cell.second>128){
276  toHighCount++;
277  }
278  else {
279  inCount++;
280  }
281 
282  allCount++;
283  }
284 
285  if (toHighCount*1.0/allCount>=0.05) {
286  return false;
287  }
288 
289  if (toLowCount*1.0/allCount>=0.2) {
290  progress.Warning(typeInfo.GetName()+" has more than 20% cells with <40% of average filling ("+std::to_string(toLowCount)+"/"+std::to_string(allCount)+")");
291  }
292 
293  /*
294  // If the fill rate of the index is too low, we use this index level anyway
295  if (fillRate<parameter.GetAreaWayIndexMinFillRate()) {
296  progress.Warning(typeInfo.GetName()+" is not well distributed");
297  return true;
298  }
299 
300  // If average fill size and max fill size for tile cells
301  // is within limits, store it now.
302  if (maxCellCount<=parameter.GetAreaWayIndexCellSizeMax() &&
303  average<=parameter.GetAreaWayIndexCellSizeAverage()) {
304  return true;
305  }*/
306 
307  return true;
308  }
309 
310  template <typename Object>
312  const ImportParameter& parameter,
313  Progress& progress,
314  const std::vector<TypeInfoRef> &types,
315  const MagnificationLevel &areaIndexMinMag,
316  const MagnificationLevel &areaIndexMaxLevel,
317  bool useMmap)
318  {
319  using namespace std::string_literals;
320 
321  FileScanner scanner;
322  FileWriter writer;
323  std::vector<TypeData> typeData;
324  MagnificationLevel maxLevel;
325 
326  progress.Info("Minimum magnification: "s + areaIndexMinMag);
327 
328  //
329  // Scanning distribution
330  //
331 
332  progress.SetAction("Scanning level distribution of "s + typeName + " types"s);
333 
334  if (!CalculateDistribution(*typeConfig,
335  parameter,
336  progress,
337  types,
338  typeData,
339  areaIndexMinMag,
340  areaIndexMaxLevel,
341  useMmap,
342  maxLevel)) {
343  return false;
344  }
345 
346  // Calculate number of types which have data
347 
348  auto indexEntries=std::count_if(types.begin(),
349  types.end(),
350  [&typeData](const TypeInfoRef& type) {
351  return typeData[type->GetIndex()].HasEntries();
352  });
353 
354  //
355  // Writing index file
356  //
357 
358  progress.SetAction("Generating '"s + indexFile + "'"s);
359 
360  try {
361  writer.Open(AppendFileToDir(parameter.GetDestinationDirectory(),indexFile));
362 
363  writer.Write((uint32_t)indexEntries);
364 
365  for (const auto &type : types) {
366  size_t i=type->GetIndex();
367 
368  if (typeData[i].HasEntries()) {
369  uint8_t dataOffsetBytes=0;
370  FileOffset bitmapOffset=0;
371 
372  WriteTypeId(typeConfig,
373  type,
374  writer);
375 
376  typeData[i].indexOffset=writer.GetPos();
377 
378  writer.WriteFileOffset(bitmapOffset);
379  writer.Write(dataOffsetBytes);
380  writer.WriteNumber(typeData[i].indexLevel);
381  writer.WriteNumber(typeData[i].tileBox.GetMinX());
382  writer.WriteNumber(typeData[i].tileBox.GetMaxX());
383  writer.WriteNumber(typeData[i].tileBox.GetMinY());
384  writer.WriteNumber(typeData[i].tileBox.GetMaxY());
385  }
386  }
387 
388  scanner.Open(AppendFileToDir(parameter.GetDestinationDirectory(),
389  dataFile),
390  FileScanner::Sequential,
391  useMmap);
392 
393  for (MagnificationLevel l=areaIndexMinMag; l<=maxLevel; l++) {
394  Magnification magnification(l);
395  TypeInfoSet indexTypes(*typeConfig);
396 
397  scanner.GotoBegin();
398 
399  for (const auto &type : types) {
400  if (typeData[type->GetIndex()].HasEntries() &&
401  typeData[type->GetIndex()].indexLevel==l) {
402  indexTypes.Set(type);
403  }
404  }
405 
406  if (indexTypes.Empty()) {
407  continue;
408  }
409 
410  progress.Info("Scanning "s + typeNamePlural + " for index level "s + l);
411 
412  std::vector<CoordOffsetsMap> typeCellOffsets(typeConfig->GetTypeCount());
413 
414  uint32_t objectCount=scanner.ReadUInt32();
415 
416  Object obj;
417 
418  for (uint32_t w=1; w <= objectCount; w++) {
419  progress.SetProgress(w, objectCount);
420 
421  FileOffset offset;
422 
423  offset=scanner.GetPos();
424 
425  obj.Read(*typeConfig,
426  scanner);
427 
428  if (!indexTypes.IsSet(obj.GetType())) {
429  continue;
430  }
431 
432  TileIdBox box(magnification, obj.GetBoundingBox());
433 
434  for (const auto& tileId : box) {
435  typeCellOffsets[obj.GetType()->GetIndex()][tileId].push_back(offset);
436  }
437  }
438 
439  for (const auto &type : indexTypes) {
440  size_t index=type->GetIndex();
441 
442  if (!WriteBitmap(progress,
443  writer,
444  *typeConfig->GetTypeInfo(index),
445  typeData[index],
446  typeCellOffsets[index])) {
447  return false;
448  }
449  }
450  }
451 
452  scanner.Close();
453  writer.Close();
454  }
455  catch (IOException& e) {
456  progress.Error(e.GetDescription());
457 
458  scanner.CloseFailsafe();
459  writer.CloseFailsafe();
460 
461  return false;
462  }
463 
464  return true;
465  }
466 
467  template <typename Object>
468  void AreaIndexGenerator<Object>::CalculateStatistics(const MagnificationLevel& level,
469  TypeData& typeData,
470  const CoordCountMap& cellFillCount) const
471  {
472  // Initialize/reset data structure
473  typeData.indexLevel=level;
474  typeData.indexCells=cellFillCount.size();
475  typeData.indexEntries=0;
476 
477  // If we do not have any entries, we are done ;-)
478  if (cellFillCount.empty()) {
479  return;
480  }
481 
482  typeData.tileBox=TileIdBox(cellFillCount.begin()->first,cellFillCount.begin()->first);
483 
484  for (const auto& cell : cellFillCount) {
485  typeData.indexEntries+=cell.second;
486 
487  typeData.tileBox=typeData.tileBox.Include(cell.first);
488  }
489  }
490 
491  template <typename Object>
492  bool AreaIndexGenerator<Object>::CalculateDistribution(const TypeConfig& typeConfig,
493  const ImportParameter& parameter,
494  Progress& progress,
495  const std::vector<TypeInfoRef>& types,
496  std::vector<TypeData>& typeData,
497  const MagnificationLevel& minLevelParam,
498  const MagnificationLevel& maxLevelParam,
499  bool useMmap,
500  MagnificationLevel& maxLevel) const
501  {
502  FileScanner scanner;
503  TypeInfoSet remainingObjectTypes;
504  MagnificationLevel level=minLevelParam;
505 
506  maxLevel=MagnificationLevel(0);
507  typeData.resize(typeConfig.GetTypeCount());
508 
509  try {
510  scanner.Open(AppendFileToDir(parameter.GetDestinationDirectory(),
511  dataFile),
512  FileScanner::Sequential,
513  useMmap);
514 
515  remainingObjectTypes.Set(types);
516 
517  while (!remainingObjectTypes.Empty() &&
518  level <= maxLevelParam) {
519  Magnification magnification(level);
520  TypeInfoSet currentObjectTypes(remainingObjectTypes);
521  std::vector<CoordCountMap> cellFillCount(typeConfig.GetTypeCount());
522 
523  progress.Info("Scanning Level " + level + " (" + std::to_string(remainingObjectTypes.Size()) + " types remaining)");
524 
525  scanner.GotoBegin();
526 
527  uint32_t objectCount=scanner.ReadUInt32();
528 
529  Object obj;
530 
531  for (uint32_t objI=1; objI <= objectCount; objI++) {
532  progress.SetProgress(objI, objectCount);
533 
534  obj.Read(typeConfig,
535  scanner);
536 
537  // Count number of entries per current type and coordinate
538  if (!currentObjectTypes.IsSet(obj.GetType())) {
539  continue;
540  }
541 
542  GeoBox boundingBox=obj.GetBoundingBox();
543 
544  TileIdBox box(TileId::GetTile(magnification,boundingBox.GetMinCoord()),
545  TileId::GetTile(magnification,boundingBox.GetMaxCoord()));
546 
547  for (const auto& tileId : box) {
548  cellFillCount[obj.GetType()->GetIndex()][tileId]++;
549  }
550  }
551 
552  // Check if cell fill for current type is in defined limits
553  for (const auto &type : currentObjectTypes) {
554  size_t typeIndex=type->GetIndex();
555 
556  if (!FitsIndexCriteria(progress,
557  *typeConfig.GetTypeInfo(typeIndex),
558  cellFillCount[typeIndex])) {
559  if (level < maxLevelParam) {
560  currentObjectTypes.Remove(type);
561  }
562  else {
563  progress.Warning(typeConfig.GetTypeInfo(typeIndex)->GetName()+" has too many index cells, that area filled over the limit");
564  }
565  }
566  }
567 
568  for (const auto &type : currentObjectTypes) {
569  size_t typeIndex=type->GetIndex();
570 
571  CalculateStatistics(level,
572  typeData[typeIndex],
573  cellFillCount[typeIndex]);
574 
575  maxLevel=std::max(maxLevel,level);
576 
577  progress.Info("Type " + type->GetName() + ", " +
578  std::to_string(typeData[type->GetIndex()].indexCells) + " cells, " +
579  std::to_string(typeData[type->GetIndex()].indexEntries) + " objects");
580 
581  remainingObjectTypes.Remove(type);
582  }
583 
584  level++;
585  }
586 
587  scanner.Close();
588  }
589  catch (IOException& e) {
590  progress.Error(e.GetDescription());
591  return false;
592  }
593 
594  return true;
595  }
596 
597 }
598 
599 #endif //LIBOSMSCOUT_AREAINDEXGENERATOR_H
std::string indexFile
Definition: AreaIndexGenerator.h:69
bool WriteBitmap(Progress &progress, FileWriter &writer, const TypeInfo &typeInfo, const TypeData &typeData, const CoordOffsetsMap &typeCellOffsets)
Definition: AreaIndexGenerator.h:140
virtual bool FitsIndexCriteria(Progress &progress, const TypeInfo &typeInfo, const CoordCountMap &cellFillCount) const
Definition: AreaIndexGenerator.h:243
bool HasEntries()
Position in file where the offset of the bitmap is written to.
Definition: AreaIndexGenerator.h:59
TileIdBox tileBox
Number of entries over all cells.
Definition: AreaIndexGenerator.h:55
OSMSCOUT_API std::string ByteSizeToString(FileOffset size)
std::string dataFile
Definition: AreaIndexGenerator.h:68
uint8_t BytesNeededToEncodeNumber(N number)
Definition: Number.h:318
Definition: Exception.h:72
std::map< TileId, std::list< FileOffset > > CoordOffsetsMap
Definition: AreaIndexGenerator.h:47
OSMSCOUT_API std::string AppendFileToDir(const std::string &dir, const std::string &file)
FileOffset indexOffset
Definition: AreaIndexGenerator.h:57
bool MakeAreaIndex(const TypeConfigRef &typeConfig, const ImportParameter &parameter, Progress &progress, const std::vector< TypeInfoRef > &types, const MagnificationLevel &areaIndexMinMag, const MagnificationLevel &areaIndexMaxLevel, bool useMmap)
Definition: AreaIndexGenerator.h:311
MagnificationLevel indexLevel
Definition: AreaIndexGenerator.h:51
void CalculateStatistics(const MagnificationLevel &level, TypeData &typeData, const CoordCountMap &cellFillCount) const
Definition: AreaIndexGenerator.h:468
Definition: Area.h:38
std::map< TileId, size_t > CoordCountMap
Definition: AreaIndexGenerator.h:46
virtual void Error(const std::string &text)
virtual void SetAction(const std::string &action)
std::string typeNamePlural
Definition: AreaIndexGenerator.h:67
Definition: AreaIndexGenerator.h:49
std::string GetDescription() const override
Definition: ImportModule.h:100
unsigned int EncodeNumber(N number, char *buffer)
Definition: Number.h:145
virtual void Info(const std::string &text)
AreaIndexGenerator(const std::string &typeName, const std::string &typeNamePlural, const std::string &dataFile, const std::string &indexFile)
Definition: AreaIndexGenerator.h:72
size_t indexEntries
Number of filled cells in index.
Definition: AreaIndexGenerator.h:53
virtual void SetProgress(double current, double total, const std::string &label="")
std::shared_ptr< TypeInfo > TypeInfoRef
Definition: TypeConfig.h:58
uint64_t FileOffset
Definition: OSMScoutTypes.h:47
size_t indexCells
magnification level of index
Definition: AreaIndexGenerator.h:52
bool CalculateDistribution(const TypeConfig &typeConfig, const ImportParameter &parameter, Progress &progress, const std::vector< TypeInfoRef > &types, std::vector< TypeData > &typeData, const MagnificationLevel &minLevelParam, const MagnificationLevel &maxLevelParam, bool useMmap, MagnificationLevel &maxLevel) const
Definition: AreaIndexGenerator.h:492
Definition: Progress.h:34
static TileId GetTile(const Magnification &magnification, const GeoCoord &coord)
std::shared_ptr< TypeConfig > TypeConfigRef
Definition: TypeConfig.h:1227
virtual void WriteTypeId(const TypeConfigRef &typeConfig, const TypeInfoRef &type, FileWriter &writer) const =0
virtual void Warning(const std::string &text)
Definition: AreaIndexGenerator.h:43
Definition: TileId.h:45
std::string typeName
Definition: AreaIndexGenerator.h:66