libosmscout  1.1.1
GenLocationIndex.h
Go to the documentation of this file.
1 #ifndef OSMSCOUT_IMPORT_GENLOCATIONINDEX_H
2 #define OSMSCOUT_IMPORT_GENLOCATIONINDEX_H
3 
4 /*
5  This source is part of the libosmscout library
6  Copyright (C) 2009 Tim Teulings
7 
8  This library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Lesser General Public
10  License as published by the Free Software Foundation; either
11  version 2.1 of the License, or (at your option) any later version.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22 
23 #include <map>
24 #include <memory>
25 #include <unordered_map>
26 #include <unordered_set>
27 
28 #include <osmscout/Node.h>
29 #include <osmscout/Area.h>
30 #include <osmscout/Way.h>
31 
32 #include <osmscout/ObjectRef.h>
33 
34 #include <osmscout/TypeInfoSet.h>
35 
36 #include <osmscout/import/Import.h>
37 
39 
40 namespace osmscout {
41 
42  class LocationIndexGenerator CLASS_FINAL : public ImportModule
43  {
44  public:
45  static const char* const FILENAME_LOCATION_REGION_TXT;
46  static const char* const FILENAME_LOCATION_FULL_TXT;
47  static const char* const FILENAME_LOCATION_METRICS_TXT;
48 
49  private:
50  struct RegionMetrics CLASS_FINAL
51  {
52  uint32_t minRegionChars;
53  uint32_t maxRegionChars;
54  uint32_t minRegionWords;
55  uint32_t maxRegionWords;
56  uint32_t maxPOIWords;
57  uint32_t minLocationChars;
58  uint32_t maxLocationChars;
59  uint32_t minLocationWords;
60  uint32_t maxLocationWords;
61  uint32_t maxAddressWords;
62 
63  RegionMetrics();
64  };
65 
71  struct RegionAlias CLASS_FINAL
72  {
74  std::string name;
75  std::string altName;
76  };
77 
81  struct RegionPOI CLASS_FINAL
82  {
83  std::string name;
84  ObjectFileRef object;
85 
86  RegionPOI(const std::string& name,
87  const ObjectFileRef& object)
88  : name(name),
89  object(object)
90  {
91  // no code
92  }
93 
94  bool operator<(const RegionPOI& other) const
95  {
96  return object.GetFileOffset()<other.object.GetFileOffset();
97  }
98  };
99 
100  struct RegionAddress CLASS_FINAL
101  {
102  std::string name;
103  ObjectFileRef object;
104 
105  RegionAddress(const std::string& name,
106  const ObjectFileRef& object)
107  : name(name),
108  object(object)
109  {
110  // no code
111  }
112 
113  bool operator<(const RegionAddress& other) const
114  {
115  return object.GetFileOffset()<other.object.GetFileOffset();
116  }
117  };
118 
119  struct RegionLocation CLASS_FINAL
120  {
121  std::unordered_map<std::string,
122  size_t> names;
124  std::list<ObjectFileRef> objects;
125  std::list<RegionAddress> addresses;
126 
127  std::string GetName() const;
128  };
129 
130  class Region;
131 
132  typedef std::shared_ptr<Region> RegionRef;
133 
134  struct PostalArea CLASS_FINAL
135  {
136  std::string name;
137  FileOffset dataOffsetOffset;
138  std::map<std::string,RegionLocation> locations;
139 
140  explicit PostalArea(const std::string& name)
141  : name(name)
142  {
143  // no code
144  }
145 
146  void AddLocationObject(const std::string& name,
147  const ObjectFileRef& objectRef);
148  };
149 
156  class Region CLASS_FINAL
157  {
158  public:
159  typedef std::unordered_map<std::string,PostalArea> PostalAreaMap;
160 
161  private:
162  std::vector<GeoBox> boundingBoxes;
163  GeoBox boundingBox;
164  std::vector<GeoCoord> probePoints;
165 
166  public:
169 
170  ObjectFileRef reference;
171  std::string name;
172  std::string altName;
173  std::string isIn;
174  std::list<RegionAlias> aliases;
175  int8_t level{-1};
176 
177  std::vector<std::vector<GeoCoord>> areas;
178  std::list<RegionPOI> pois;
180  PostalAreaMap::iterator defaultPostalArea;
181 
182  std::list<RegionRef> regions;
183 
184  public:
185  Region();
186 
187  void CalculateMinMax();
188  bool CouldContain(const GeoBox& boundingBox) const;
189  bool CouldContain(const Region& region, bool strict) const;
190 
191  bool Contains(Region& child) const;
192 
193  inline GeoBox GetBoundingBox() const
194  {
195  return boundingBox;
196  }
197 
198  inline const std::vector<GeoBox> GetAreaBoundingBoxes() const
199  {
200  return boundingBoxes;
201  }
202 
203  void AddLocationObject(const std::string& name,
204  const std::string& postalCode,
205  const ObjectFileRef& objectRef);
206 
207  protected:
208  void CalculateProbePoints();
209  void CalculateProbePointsForArea(size_t areaIndex,
210  size_t refinement=0);
211  };
212 
213  class RegionIndex CLASS_FINAL
214  {
215  public:
216  std::map<Pixel,std::list<RegionRef> > index;
217  double cellWidth;
218  double cellHeight;
219 
220  public:
221  RegionRef GetRegionForNode(RegionRef& rootRegion,
222  const GeoCoord& coord) const;
223  };
224 
225  private:
226  uint8_t bytesForNodeFileOffset;
227  uint8_t bytesForAreaFileOffset;
228  uint8_t bytesForWayFileOffset;
229 
230  ImportErrorReporterRef errorReporter;
231 
232  private:
233  void Write(FileWriter& writer,
234  const ObjectFileRef& object);
235 
236  void AnalyseStringForIgnoreTokens(const std::string& string,
237  std::unordered_map<std::string,size_t>& ignoreTokens,
238  std::unordered_set<std::string>& blacklist);
239 
240  void CalculateRegionNameIgnoreTokens(const Region& parent,
241  std::unordered_map<std::string,size_t>& ignoreTokens,
242  std::unordered_set<std::string>& blacklist);
243 
244  void CalculatePOINameIgnoreTokens(const Region& parent,
245  std::unordered_map<std::string,size_t>& ignoreTokens,
246  std::unordered_set<std::string>& blacklist);
247 
248  void CalculateLocationNameIgnoreTokens(const Region& parent,
249  std::unordered_map<std::string,size_t>& ignoreTokens,
250  std::unordered_set<std::string>& blacklist);
251 
252  bool CalculateIgnoreTokens(const Region& rootRegion,
253  std::list<std::string>& regionTokens,
254  std::list<std::string>& poiTokens,
255  std::list<std::string>& locationTokens);
256 
257 
258  void CalculateRegionMetrics(const Region& region,
259  RegionMetrics& metrics);
260 
261  void DumpRegion(const Region& parent,
262  size_t indent,
263  std::ostream& out);
264 
265  void DumpRegionAndData(const Region& parent,
266  size_t indent,
267  std::ostream& out);
268 
269  bool DumpRegionTree(Progress& progress,
270  const Region& rootRegion,
271  const std::string& filename);
272 
273  bool DumpLocationTree(Progress& progress,
274  const Region& rootRegion,
275  const std::string& filename);
276 
277  bool DumpLocationMetrics(Progress& progress,
278  const std::string& filename,
279  const LocationIndexGenerator::RegionMetrics& metrics,
280  const std::list<std::string>& regionIgnoreTokens,
281  const std::list<std::string>& poiIgnoreTokens,
282  const std::list<std::string>& locationIgnoreTokens);
283 
284  bool AddRegion(Region& parent,
285  const RegionRef& region,
286  bool assume_contains=true);
287 
288  bool GetBoundaryAreas(const ImportParameter& parameter,
289  Progress& progress,
290  const TypeConfigRef& typeConfig,
291  const TypeInfoSet& boundaryTypes,
292  std::vector<std::list<RegionRef>>& boundaryAreas);
293 
294  void SortInBoundaries(Progress& progress,
295  Region& rootRegion,
296  std::list<RegionRef>& boundaryAreas);
297 
298  bool GetRegionAreas(const TypeConfig& typeConfig,
299  const ImportParameter& parameter,
300  Progress& progress,
301  std::list<RegionRef>& regionAreas);
302 
303  bool SortInRegionAreas(Progress& progress,
304  Region& rootRegion,
305  std::list<RegionRef>& regionAreas);
306 
307  void SortInRegion(RegionRef& area,
308  std::vector<std::list<RegionRef> >& regionTree,
309  unsigned long level);
310 
311  unsigned long GetRegionTreeDepth(const Region& rootRegion);
312 
313  void IndexRegions(const std::vector<std::list<RegionRef> >& regionTree,
314  RegionIndex& regionIndex);
315 
316  void AddAliasToRegion(Region& region,
317  const RegionAlias& location,
318  const GeoCoord& node);
319 
320  bool IndexRegionNodes(const TypeConfigRef& typeConfig,
321  const ImportParameter& parameter,
322  Progress& progress,
323  RegionRef& rootRegion,
324  const RegionIndex& regionIndex);
325 
326  bool AddLocationAreaToRegion(Region& region,
327  const Area& area,
328  const std::vector<Point>& nodes,
329  const std::string& name,
330  const std::string& postalCode,
331  const GeoBox& boundingBox);
332 
333  void AddLocationAreaToRegion(RegionRef& rootRegion,
334  const Area& area,
335  const Area::Ring& ring,
336  const std::string& name,
337  const std::string& postalCode,
338  const RegionIndex& regionIndex);
339 
340  bool IndexLocationAreas(const TypeConfig& typeConfig,
341  const ImportParameter& parameter,
342  Progress& progress,
343  RegionRef& rootRegion,
344  const RegionIndex& regionIndex);
345 
346  bool AddLocationWayToRegion(Region& region,
347  const Way& way,
348  const std::string& name,
349  const std::string& postalCode,
350  const GeoBox& boundingBox);
351 
352  bool IndexLocationWays(const TypeConfig& typeConfig,
353  const ImportParameter& parameter,
354  Progress& progress,
355  RegionRef& rootRegion,
356  const RegionIndex& regionIndex);
357 
358  void AddAddressToRegion(Progress& progress,
359  Region& region,
360  const ObjectFileRef& object,
361  const std::string& location,
362  const std::string& address,
363  const std::string &postalCode,
364  bool allowDuplicates,
365  bool& added);
366 
367  void AddAddressAreaToRegion(Progress& progress,
368  Region& region,
369  const FileOffset& fileOffset,
370  const std::string& location,
371  const std::string& address,
372  const std::string &postalCode,
373  const std::vector<Point>& nodes,
374  const GeoBox& boundingBox,
375  bool& added);
376 
377  void AddPOIAreaToRegion(Progress& progress,
378  Region& region,
379  const FileOffset& fileOffset,
380  const std::string& name,
381  const std::vector<Point>& nodes,
382  const GeoBox& boundingBox,
383  bool& added);
384 
385  bool IndexAddressAreas(const TypeConfig& typeConfig,
386  const ImportParameter& parameter,
387  Progress& progress,
388  RegionRef& rootRegion,
389  const RegionIndex& regionIndex);
390 
391  bool AddAddressWayToRegion(Progress& progress,
392  Region& region,
393  const FileOffset& fileOffset,
394  const std::string& location,
395  const std::string& address,
396  const std::vector<Point>& nodes,
397  const GeoBox& boundingBox,
398  bool& added);
399 
400  bool AddPOIWayToRegion(Progress& progress,
401  Region& region,
402  const FileOffset& fileOffset,
403  const std::string& name,
404  const std::vector<Point>& nodes,
405  const GeoBox& boundingBox,
406  bool& added);
407 
408  bool IndexAddressWays(const TypeConfig& typeConfig,
409  const ImportParameter& parameter,
410  Progress& progress,
411  RegionRef& rootRegion,
412  const RegionIndex& regionIndex);
413 
426  std::map<std::string,RegionLocation>::iterator FindLocation(Progress& progress,
427  Region& region,
428  PostalArea& postalArea,
429  const std::string &locationName);
430 
431  void AddAddressNodeToRegion(Progress& progress,
432  Region& region,
433  const FileOffset& fileOffset,
434  const std::string& location,
435  const std::string& address,
436  const std::string& postalCode,
437  bool& added);
438 
439  void AddPOINodeToRegion(Region& region,
440  const FileOffset& fileOffset,
441  const std::string& name,
442  bool& added);
443 
444  bool IndexAddressNodes(const TypeConfig& typeConfig,
445  const ImportParameter& parameter,
446  Progress& progress,
447  RegionRef& rootRegion,
448  const RegionIndex& regionIndex);
449 
450  void CleanupPostalAreas(Region& region);
451 
452  void WriteIgnoreTokens(FileWriter& writer,
453  const std::list<std::string>& regionIgnoreTokens,
454  const std::list<std::string>& poiIgnoreTokens,
455  const std::list<std::string>& locationIgnoreTokens);
456 
457  void WriteRegionMetrics(FileWriter& writer,
458  const RegionMetrics& metrics);
459 
460  void WriteRegionIndexEntry(FileWriter& writer,
461  const Region& parentRegion,
462  Region& region);
463 
464  void WriteRegionIndex(FileWriter& writer,
465  Region& root);
466 
467  void WriteRegionDataEntry(FileWriter& writer,
468  Region& region);
469 
470  void WriteRegionData(FileWriter& writer,
471  Region& root);
472 
473  void WritePostalArea(FileWriter& writer,
474  PostalArea& postalArea);
475 
476  void WriteAddressDataEntry(FileWriter& writer,
477  Region& region);
478 
479  void WriteAddressData(FileWriter& writer,
480  Region& root);
481 
482  public:
483  void GetDescription(const ImportParameter& parameter,
484  ImportModuleDescription& description) const override;
485 
486  bool Import(const TypeConfigRef& typeConfig,
487  const ImportParameter& parameter,
488  Progress& progress) override;
489  };
490 }
491 
492 #endif
GeoBox GetBoundingBox() const
Checks whether child is within this.
Definition: GenLocationIndex.h:193
uint32_t minLocationWords
Definition: GenLocationIndex.h:59
FileOffset indexOffset
Offset into the index file.
Definition: GenLocationIndex.h:167
PostalAreaMap::iterator defaultPostalArea
PostalArea for postal code "".
Definition: GenLocationIndex.h:180
std::shared_ptr< ImportErrorReporter > ImportErrorReporterRef
Definition: ImportErrorReporter.h:137
std::list< RegionRef > regions
A list of sub regions.
Definition: GenLocationIndex.h:182
std::map< std::string, RegionLocation > locations
list of indexed objects in this region
Definition: GenLocationIndex.h:138
double cellWidth
Definition: GenLocationIndex.h:217
std::map< Pixel, std::list< RegionRef > > index
Definition: GenLocationIndex.h:216
bool operator<(const RegionPOI &other) const
Definition: GenLocationIndex.h:94
Definition: Area.h:86
std::list< RegionAddress > addresses
Addresses at this location.
Definition: GenLocationIndex.h:125
std::list< RegionPOI > pois
A list of POIs in this region.
Definition: GenLocationIndex.h:178
uint32_t maxRegionChars
Definition: GenLocationIndex.h:53
std::string altName
Definition: GenLocationIndex.h:75
uint32_t maxRegionWords
Definition: GenLocationIndex.h:55
const std::vector< GeoBox > GetAreaBoundingBoxes() const
Definition: GenLocationIndex.h:198
uint32_t minRegionChars
Definition: GenLocationIndex.h:52
RegionAddress(const std::string &name, const ObjectFileRef &object)
Definition: GenLocationIndex.h:105
RegionPOI(const std::string &name, const ObjectFileRef &object)
Definition: GenLocationIndex.h:86
ObjectFileRef object
Object.
Definition: GenLocationIndex.h:84
static const char *const FILENAME_LOCATION_REGION_TXT
Definition: GenLocationIndex.h:45
Definition: Area.h:38
Definition: Location.h:38
uint32_t minLocationChars
Definition: GenLocationIndex.h:57
double cellHeight
Definition: GenLocationIndex.h:218
std::unordered_map< std::string, PostalArea > PostalAreaMap
Definition: GenLocationIndex.h:159
static const char *const FILENAME_LOCATION_METRICS_TXT
Definition: GenLocationIndex.h:47
bool operator<(const RegionAddress &other) const
Definition: GenLocationIndex.h:113
FileOffset dataOffsetOffset
Offset of place where the address list offset is stored.
Definition: GenLocationIndex.h:123
uint32_t maxPOIWords
Definition: GenLocationIndex.h:56
#define CLASS_FINAL
Definition: Compiler.h:26
Definition: LaneAgent.h:60
std::string isIn
Name of the parent region as stated in OSM (is_in tag)
Definition: GenLocationIndex.h:173
FileOffset reference
Reference to the node that is the alias.
Definition: GenLocationIndex.h:73
ObjectFileRef reference
Reference to the object this area is based on.
Definition: GenLocationIndex.h:170
std::unordered_map< std::string, size_t > names
map of names in different case used for this location and their use count
Definition: GenLocationIndex.h:122
uint32_t minRegionWords
Definition: GenLocationIndex.h:54
uint64_t FileOffset
Definition: OSMScoutTypes.h:47
uint32_t maxLocationChars
Definition: GenLocationIndex.h:58
uint32_t maxAddressWords
Definition: GenLocationIndex.h:61
std::list< RegionAlias > aliases
Location that are represented by this region.
Definition: GenLocationIndex.h:174
Definition: Progress.h:34
std::list< ObjectFileRef > objects
Objects that represent this location.
Definition: GenLocationIndex.h:124
std::shared_ptr< TypeConfig > TypeConfigRef
Definition: TypeConfig.h:1227
PostalArea(const std::string &name)
Definition: GenLocationIndex.h:140
FileOffset dataOffset
Offset into the index file.
Definition: GenLocationIndex.h:168
std::vector< std::vector< GeoCoord > > areas
the geometric area of this region
Definition: GenLocationIndex.h:177
PostalAreaMap postalAreas
Collection of objects without a postal code.
Definition: GenLocationIndex.h:179
static const char *const FILENAME_LOCATION_FULL_TXT
Definition: GenLocationIndex.h:46
uint32_t maxLocationWords
Definition: GenLocationIndex.h:60