libosmscout  1.1.1
DataFile.h
Go to the documentation of this file.
1 #ifndef OSMSCOUT_DATAFILE_H
2 #define OSMSCOUT_DATAFILE_H
3 
4 /*
5  This source is part of the libosmscout library
6  Copyright (C) 2010 Tim Teulings
7 
8  This library is free software; you can redistribute it and/or
9  modify it under the terms of the GNU Lesser General Public
10  License as published by the Free Software Foundation; either
11  version 2.1 of the License, or (at youbase option) any later version.
12 
13  This library is distributed in the hope that it will be useful,
14  but WITHOUT ANY WARRANTY; without even the implied warranty of
15  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16  Lesser General Public License for more details.
17 
18  You should have received a copy of the GNU Lesser General Public
19  License along with this library; if not, write to the Free Software
20  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 */
22 
23 #include <memory>
24 #include <mutex>
25 #include <set>
26 #include <unordered_map>
27 #include <vector>
28 
29 #include <osmscout/NumericIndex.h>
30 #include <osmscout/TypeConfig.h>
31 
32 #include <osmscout/util/Cache.h>
34 #include <osmscout/util/Logger.h>
35 
36 //#include <map>
37 namespace osmscout {
38 
45  {
47  uint32_t count;
48 
49  bool operator<(const DataBlockSpan& other) const
50  {
51  return startOffset<other.startOffset;
52  }
53 
54  bool operator==(const DataBlockSpan& other) const
55  {
56  return startOffset==other.startOffset && count==other.count;
57  }
58 
59  bool operator!=(const DataBlockSpan& other) const
60  {
61  return startOffset!=other.startOffset || count!=other.count;
62  }
63  };
64 
72  template <class N>
73  class DataFile
74  {
75  public:
76  using ValueType = std::shared_ptr<N>;
78 
81 
82  private:
83  std::string datafile;
84  std::string datafilename;
85 
86  mutable ValueCache cache;
87 
88  mutable FileScanner scanner;
89 
90  mutable std::mutex accessMutex;
91 
92  protected:
94 
95  private:
96  bool ReadData(N& data) const;
97  bool ReadData(FileOffset offset,
98  N& data) const;
99 
100  public:
101  DataFile(const std::string& datafile,
102  size_t cacheSize);
103 
104  // disable copy and move
105  DataFile(const DataFile&) = delete;
106  DataFile(DataFile&&) = delete;
107  DataFile& operator=(const DataFile&) = delete;
108  DataFile& operator=(DataFile&&) = delete;
109 
110  virtual ~DataFile();
111 
112  bool Open(const TypeConfigRef& typeConfig,
113  const std::string& path,
114  bool memoryMappedData);
115  virtual bool IsOpen() const;
116  virtual bool Close();
117 
118  void FlushCache();
119 
120  std::string GetFilename() const
121  {
122  return datafilename;
123  }
124 
125  bool GetByOffset(FileOffset offset,
126  ValueType& entry) const;
127 
128  bool GetByBlockSpan(const DataBlockSpan& span,
129  std::vector<ValueType>& data) const;
130 
131  template<typename IteratorIn>
132  bool GetByOffset(IteratorIn begin, IteratorIn end, size_t size,
133  std::vector<ValueType>& data) const;
134 
135  template<typename IteratorIn>
136  bool GetByOffset(IteratorIn begin, IteratorIn end, size_t size,
137  const GeoBox& boundingBox,
138  std::vector<ValueType>& data) const;
139 
140  template<typename IteratorIn>
141  bool GetByOffset(IteratorIn begin, IteratorIn end, size_t size,
142  std::unordered_map<FileOffset,ValueType>& dataMap) const;
143 
144  template<typename IteratorIn>
145  bool GetByBlockSpans(IteratorIn begin, IteratorIn end,
146  std::vector<ValueType>& data) const;
147  };
148 
149  template <class N>
150  DataFile<N>::DataFile(const std::string& datafile, size_t cacheSize)
151  : datafile(datafile),cache(cacheSize)
152  {
153  // no code
154  }
155 
156  template <class N>
158  {
159  if (IsOpen()) {
160  Close();
161  }
162  }
163 
169  template <class N>
170  bool DataFile<N>::ReadData(FileOffset offset,
171  N& data) const
172  {
173  try {
174  scanner.SetPos(offset);
175 
176  data.Read(*typeConfig,
177  scanner);
178  }
179  catch (const IOException& e) {
180  log.Error() << e.GetDescription();
181  return false;
182  }
183 
184  return true;
185  }
186 
192  template <class N>
193  bool DataFile<N>::ReadData(N& data) const
194  {
195  try {
196  data.Read(*typeConfig,
197  scanner);
198  }
199  catch (const IOException& e) {
200  log.Error() << e.GetDescription();
201  return false;
202  }
203 
204  return true;
205  }
206 
212  template <class N>
213  bool DataFile<N>::Open(const TypeConfigRef& typeConfig,
214  const std::string& path,
215  bool memoryMappedData)
216  {
217  this->typeConfig=typeConfig;
218 
219  datafilename=AppendFileToDir(path,datafile);
220 
221  try {
222  scanner.Open(datafilename,
223  FileScanner::LowMemRandom,
224  memoryMappedData);
225  }
226  catch (const IOException& e) {
227  log.Error() << e.GetDescription();
228  scanner.CloseFailsafe();
229  return false;
230  }
231 
232  return true;
233  }
234 
240  template <class N>
241  bool DataFile<N>::IsOpen() const
242  {
243  return scanner.IsOpen();
244  }
245 
251  template <class N>
253  {
254  typeConfig=nullptr;
255  cache.Flush();
256 
257  try {
258  if (scanner.IsOpen()) {
259  scanner.Close();
260  }
261  }
262  catch (const IOException& e) {
263  log.Error() << e.GetDescription();
264  scanner.CloseFailsafe();
265  return false;
266  }
267 
268  return true;
269  }
270 
271  template <class N>
273  {
274  std::scoped_lock<std::mutex> lock(accessMutex);
275  cache.Flush();
276  }
277 
301  template <class N>
302  template <typename IteratorIn>
303  bool DataFile<N>::GetByOffset(IteratorIn begin, IteratorIn end,
304  size_t size,
305  std::vector<ValueType>& data) const
306  {
307  if (size==0) {
308  return true;
309  }
310 
311  data.reserve(data.size()+size);
312  std::scoped_lock<std::mutex> lock(accessMutex);
313 
314  if (cache.GetMaxSize()>0 &&
315  size>cache.GetMaxSize()){
316  log.Warn() << "Cache size (" << cache.GetMaxSize() << ") for file " << datafile << " is smaller than current request (" << size << ")";
317  }
318 
319  for (IteratorIn offsetIter=begin; offsetIter!=end; ++offsetIter) {
320  ValueCacheRef entryRef;
321 
322  if (cache.GetEntry(*offsetIter,entryRef)) {
323  data.push_back(entryRef->value);
324  }
325  else {
326  ValueType value=std::make_shared<N>();
327 
328  if (!ReadData(*offsetIter,
329  *value)) {
330  log.Error() << "Error while reading data from offset " << *offsetIter << " of file " << datafilename << "!";
331  return false;
332  }
333 
334  cache.SetEntry(ValueCacheEntry(*offsetIter,value));
335  data.push_back(value);
336  }
337  }
338 
339  return true;
340  }
341 
347  template <class N>
348  template<typename IteratorIn>
349  bool DataFile<N>::GetByOffset(IteratorIn begin, IteratorIn end,
350  size_t size,
351  const GeoBox& boundingBox,
352  std::vector<ValueType>& data) const
353  {
354  if (size==0) {
355  return true;
356  }
357 
358  data.reserve(data.size()+size);
359  std::scoped_lock<std::mutex> lock(accessMutex);
360 
361  if (cache.GetMaxSize()>0 &&
362  size>cache.GetMaxSize()){
363  log.Warn() << "Cache size (" << cache.GetMaxSize() << ") for file " << datafile << " is smaller than current request (" << size << ")";
364  }
365 
366  //std::map<std::string,size_t> hitRateTypes;
367  //std::map<std::string,size_t> missRateTypes;
368  size_t inBoxCount=0;
369  for (IteratorIn offsetIter=begin; offsetIter!=end; ++offsetIter) {
370  ValueType value=std::make_shared<N>();
371 
372  ValueCacheRef entryRef;
373  if (cache.GetEntry(*offsetIter,entryRef)){
374  value=entryRef->value;
375  }else{
376  if (!ReadData(*offsetIter,
377  *value)) {
378  log.Error() << "Error while reading data from offset " << *offsetIter << " of file " << datafilename << "!";
379  return false;
380  }
381 
382  cache.SetEntry(ValueCacheEntry(*offsetIter,value));
383  }
384 
385  if (!value->Intersects(boundingBox)) {
386  //missRateTypes[value->GetType()->GetName()]++;
387  continue;
388  }
389  /*else {
390  hitRateTypes[value->GetType()->GetName()]++;
391  }*/
392 
393  inBoxCount++;
394 
395  data.push_back(value);
396  }
397 
398  size_t hitRate=inBoxCount*100/size;
399  if (size>100 && hitRate<50) {
400  log.Warn() << "Bounding box hit rate for file " << datafile << " is only " << hitRate << "% (" << inBoxCount << "/" << size << ")";
401  /*
402  for (const auto& missRateType: missRateTypes) {
403  log.Warn() << "- " << missRateType.first << " " << missRateType.second;
404  }
405  for (const auto& hitRateType: hitRateTypes) {
406  log.Warn() << "+ " << hitRateType.first << " " << hitRateType.second;
407  }*/
408  }
409 
410  return true;
411  }
412 
418  template <class N>
419  template<typename IteratorIn>
420  bool DataFile<N>::GetByOffset(IteratorIn begin, IteratorIn end,
421  size_t size,
422  std::unordered_map<FileOffset,ValueType>& dataMap) const
423  {
424  if (size==0) {
425  return true;
426  }
427 
428  std::vector<ValueType> data;
429 
430  if (!GetByOffset(begin,
431  end,
432  size,
433  data)) {
434  return false;
435  }
436 
437  for (const auto& entry : data) {
438  dataMap.insert(std::make_pair(entry->GetFileOffset(),entry));
439  }
440 
441  return true;
442  }
443 
449  template <class N>
451  ValueType& entry) const
452  {
453  std::scoped_lock<std::mutex> lock(accessMutex);
454 
455  ValueCacheRef entryRef;
456  if (cache.GetEntry(offset,entryRef)){
457  entry=entryRef->value;
458  }else{
459  ValueType value=std::make_shared<N>();
460 
461  if (!ReadData(offset,
462  *value)) {
463  log.Error() << "Error while reading data from offset " << offset << " of file " << datafilename << "!";
464  // TODO: Remove broken entry from cache
465  return false;
466  }
467 
468  cache.SetEntry(ValueCacheEntry(offset,value));
469  entry=value;
470  }
471 
472  return true;
473  }
474 
480  template <class N>
482  std::vector<ValueType>& data) const
483  {
484  if (span.count==0) {
485  return true;
486  }
487 
488  std::scoped_lock<std::mutex> lock(accessMutex);
489 
490  try {
491  bool offsetSetup=false;
492  FileOffset offset=span.startOffset;
493 
494  data.reserve(data.size()+span.count);
495 
496  for (uint32_t i=1; i<=span.count; i++) {
497  ValueCacheRef entryRef;
498  if (cache.GetEntry(offset,entryRef)){
499  data.push_back(entryRef->value);
500  offset=entryRef->value->GetNextFileOffset();
501  offsetSetup=false;
502  }else{
503  if (!offsetSetup){
504  scanner.SetPos(offset);
505  }
506 
507  ValueType value=std::make_shared<N>();
508 
509  if (!ReadData(*value)) {
510  log.Error() << "Error while reading data #" << i << " starting from offset " << span.startOffset << " of file " << datafilename << "!";
511  return false;
512  }
513 
514  cache.SetEntry(ValueCacheEntry(offset,value));
515  offset=value->GetNextFileOffset();
516  offsetSetup=true;
517  data.push_back(value);
518  }
519  }
520 
521  return true;
522  }
523  catch (const IOException& e) {
524  log.Error() << e.GetDescription();
525  return false;
526  }
527  }
528 
534  template <class N>
535  template<typename IteratorIn>
536  bool DataFile<N>::GetByBlockSpans(IteratorIn begin, IteratorIn end,
537  std::vector<ValueType>& data) const
538  {
539  uint32_t overallCount=0;
540 
541  for (IteratorIn spanIter=begin; spanIter!=end; ++spanIter) {
542  overallCount+=spanIter->count;
543  }
544 
545  data.reserve(data.size()+overallCount);
546 
547  try {
548  std::scoped_lock<std::mutex> lock(accessMutex);
549  for (IteratorIn spanIter=begin; spanIter!=end; ++spanIter) {
550  if (spanIter->count==0) {
551  continue;
552  }
553 
554  bool offsetSetup=false;
555  FileOffset offset=spanIter->startOffset;
556 
557  for (uint32_t i=1; i<=spanIter->count; i++) {
558  ValueCacheRef entryRef;
559  if (cache.GetEntry(offset,entryRef)){
560  data.push_back(entryRef->value);
561  offset=entryRef->value->GetNextFileOffset();
562  offsetSetup=false;
563  }else{
564  if (!offsetSetup){
565  scanner.SetPos(offset);
566  }
567 
568  ValueType value=std::make_shared<N>();
569 
570  if (!ReadData(*value)) {
571  log.Error() << "Error while reading data #" << i << " starting from offset " << spanIter->startOffset <<
572  " of file " << datafilename << "!";
573  return false;
574  }
575 
576  cache.SetEntry(ValueCacheEntry(offset,value));
577  offset=value->GetNextFileOffset();
578  offsetSetup=true;
579  data.push_back(value);
580  }
581  }
582  }
583  }
584  catch (const IOException& e) {
585  log.Error() << e.GetDescription();
586  return false;
587  }
588 
589  return true;
590  }
591 
599  template <class I, class N>
600  class IndexedDataFile : public DataFile<N>
601  {
602  public:
603  using ValueType = std::shared_ptr<N>;
604 
605  private:
606  using DataIndex = NumericIndex<I>;
607 
608  private:
609  DataIndex index;
610 
611  public:
612  IndexedDataFile(const std::string& datafile,
613  const std::string& indexfile,
614  size_t indexCacheSize,
615  size_t dataCacheSize);
616 
617  bool Open(const TypeConfigRef& typeConfig,
618  const std::string& path,
619  bool memoryMappedIndex,
620  bool memoryMappedData);
621  bool Close() override;
622 
623  bool IsOpen() const override;
624 
625  bool GetOffset(I id,
626  FileOffset& offset) const;
627 
628  bool Get(I id,
629  ValueType& entry) const;
630 
631  template<typename IteratorIn>
632  bool GetOffsets(IteratorIn begin, IteratorIn end, size_t size,
633  std::vector<FileOffset>& offsets) const;
634 
635  bool Get(const std::vector<I>& ids,
636  std::vector<ValueType>& data) const;
637  bool Get(const std::list<I>& ids,
638  std::vector<ValueType>& data) const;
639  bool Get(const std::set<I>& ids,
640  std::vector<ValueType>& data) const;
641 
642  bool Get(const std::set<I>& ids,
643  std::unordered_map<I,ValueType>& data) const;
644 
645  };
646 
647  template <class I, class N>
648  IndexedDataFile<I,N>::IndexedDataFile(const std::string& datafile,
649  const std::string& indexfile,
650  size_t indexCacheSize,
651  size_t dataCacheSize)
652  : DataFile<N>(datafile,dataCacheSize),
653  index(indexfile,indexCacheSize)
654  {
655  // no code
656  }
657 
658  template <class I, class N>
660  const std::string& path,
661  bool memoryMappedIndex,
662  bool memoryMappedData)
663  {
664  if (!DataFile<N>::Open(typeConfig,
665  path,
666  memoryMappedData)) {
667  return false;
668  }
669 
670  return index.Open(path,
671  memoryMappedIndex);
672  }
673 
674  template <class I, class N>
676  {
677  bool result=true;
678 
679  if (!DataFile<N>::Close()) {
680  result=false;
681  }
682 
683  if (!index.Close()) {
684  result=false;
685  }
686 
687  return result;
688  }
689 
690  template <class I, class N>
692  {
693  return DataFile<N>::IsOpen() &&
694  index.IsOpen();
695  }
696 
697  template <class I, class N>
698  template<typename IteratorIn>
699  bool IndexedDataFile<I,N>::GetOffsets(IteratorIn begin, IteratorIn end, size_t size,
700  std::vector<FileOffset>& offsets) const
701  {
702  return index.GetOffsets(begin,
703  end,
704  size,
705  offsets);
706  }
707 
708  template <class I, class N>
710  FileOffset& offset) const
711  {
712  return index.GetOffset(id,offset);
713  }
714 
715  template <class I, class N>
716  bool IndexedDataFile<I,N>::Get(const std::vector<I>& ids,
717  std::vector<ValueType>& data) const
718  {
719  std::vector<FileOffset> offsets;
720 
721  if (!index.GetOffsets(ids.begin(),
722  ids.end(),
723  ids.size(),
724  offsets)) {
725  return false;
726  }
727 
728  return DataFile<N>::GetByOffset(offsets.begin(),
729  offsets.end(),
730  offsets.size(),
731  data);
732  }
733 
734  template <class I, class N>
735  bool IndexedDataFile<I,N>::Get(const std::list<I>& ids,
736  std::vector<ValueType>& data) const
737  {
738  std::vector<FileOffset> offsets;
739 
740  if (!index.GetOffsets(ids.begin(),
741  ids.end(),
742  ids.size(),
743  offsets)) {
744  return false;
745  }
746 
747  return DataFile<N>::GetByOffset(offsets.begin(),
748  offsets.end(),
749  offsets.size(),
750  data);
751  }
752 
753  template <class I, class N>
754  bool IndexedDataFile<I,N>::Get(const std::set<I>& ids,
755  std::vector<ValueType>& data) const
756  {
757  std::vector<FileOffset> offsets;
758 
759  if (!index.GetOffsets(ids.begin(),
760  ids.end(),
761  ids.size(),
762  offsets)) {
763  return false;
764  }
765 
766  return DataFile<N>::GetByOffset(offsets.begin(),
767  offsets.end(),
768  offsets.size(),
769  data);
770  }
771 
772  template <class I, class N>
773  bool IndexedDataFile<I,N>::Get(const std::set<I>& ids,
774  std::unordered_map<I,ValueType>& data) const
775  {
776  std::vector<FileOffset> offsets;
777  std::vector<ValueType> d;
778 
779  if (!index.GetOffsets(ids.begin(),
780  ids.end(),
781  ids.size(),
782  offsets)) {
783  return false;
784  }
785 
786  if (!DataFile<N>::GetByOffset(offsets.begin(),
787  offsets.end(),
788  offsets.size(),
789  d)) {
790  return false;
791  }
792 
793  for (const auto& value : d) {
794  data[value->GetId()]=value;
795  }
796 
797  return true;
798  }
799 
800  template <class I, class N>
802  ValueType& entry) const
803  {
804  FileOffset offset;
805 
806  if (!index.GetOffset(id,offset)) {
807  return false;
808  }
809 
810  return DataFile<N>::GetByOffset(offset,entry);
811  }
812 }
813 
814 #endif
uint32_t count
Number of entries to read.
Definition: DataFile.h:47
OSMSCOUT_API Log log
DataFile & operator=(const DataFile &)=delete
virtual bool Close()
Definition: DataFile.h:252
void FlushCache()
Definition: DataFile.h:272
Definition: Exception.h:72
Definition: DataFile.h:600
Log & Error(bool state)
Definition: Logger.h:469
std::shared_ptr< PTRoute > ValueType
Definition: DataFile.h:76
FileOffset startOffset
Offset for the first data entry referenced in the file. Data will be read starting from this position...
Definition: DataFile.h:46
bool Open(const TypeConfigRef &typeConfig, const std::string &path, bool memoryMappedData)
Definition: DataFile.h:213
OSMSCOUT_API std::string AppendFileToDir(const std::string &dir, const std::string &file)
bool GetOffset(I id, FileOffset &offset) const
Definition: DataFile.h:709
bool Open(const TypeConfigRef &typeConfig, const std::string &path, bool memoryMappedIndex, bool memoryMappedData)
Definition: DataFile.h:659
Definition: Area.h:38
bool operator!=(const DataBlockSpan &other) const
Definition: DataFile.h:59
TypeConfigRef typeConfig
Definition: DataFile.h:93
typename Cache< FileOffset, ValueType >::CacheEntry ValueCacheEntry
Definition: DataFile.h:79
typename Cache< FileOffset, ValueType >::CacheRef ValueCacheRef
Definition: DataFile.h:80
bool GetByBlockSpans(IteratorIn begin, IteratorIn end, std::vector< ValueType > &data) const
Definition: DataFile.h:536
std::string GetDescription() const override
Definition: DataFile.h:44
Definition: DataFile.h:73
uint64_t FileOffset
Definition: OSMScoutTypes.h:47
Log & Warn(bool state)
Definition: Logger.h:462
DataFile(const std::string &datafile, size_t cacheSize)
Definition: DataFile.h:150
typename OrderList::iterator CacheRef
Definition: Cache.h:98
bool operator<(const DataBlockSpan &other) const
Definition: DataFile.h:49
std::string GetFilename() const
Definition: DataFile.h:120
bool GetByOffset(FileOffset offset, ValueType &entry) const
Definition: DataFile.h:450
virtual ~DataFile()
Definition: DataFile.h:157
std::shared_ptr< TypeConfig > TypeConfigRef
Definition: TypeConfig.h:1227
virtual bool IsOpen() const
Definition: DataFile.h:241
bool GetOffsets(IteratorIn begin, IteratorIn end, size_t size, std::vector< FileOffset > &offsets) const
Definition: DataFile.h:699
bool operator==(const DataBlockSpan &other) const
Definition: DataFile.h:54
bool GetByBlockSpan(const DataBlockSpan &span, std::vector< ValueType > &data) const
Definition: DataFile.h:481
Definition: Cache.h:64