libosmscout 1.1.1
Loading...
Searching...
No Matches
DataFile.h
Go to the documentation of this file.
1#ifndef OSMSCOUT_DATAFILE_H
2#define OSMSCOUT_DATAFILE_H
3
4/*
5 This source is part of the libosmscout library
6 Copyright (C) 2010 Tim Teulings
7
8 This library is free software; you can redistribute it and/or
9 modify it under the terms of the GNU Lesser General Public
10 License as published by the Free Software Foundation; either
11 version 2.1 of the License, or (at youbase option) any later version.
12
13 This library is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 Lesser General Public License for more details.
17
18 You should have received a copy of the GNU Lesser General Public
19 License along with this library; if not, write to the Free Software
20 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21*/
22
23#include <memory>
24#include <mutex>
25#include <set>
26#include <unordered_map>
27#include <vector>
28
29#include <osmscout/TypeConfig.h>
30
33
34#include <osmscout/util/Cache.h>
35#include <osmscout/log/Logger.h>
36
37//#include <map>
38namespace osmscout {
39
46 {
48 uint32_t count;
49
50 bool operator<(const DataBlockSpan& other) const
51 {
52 return startOffset<other.startOffset;
53 }
54
55 bool operator==(const DataBlockSpan& other) const
56 {
57 return startOffset==other.startOffset && count==other.count;
58 }
59
60 bool operator!=(const DataBlockSpan& other) const
61 {
62 return startOffset!=other.startOffset || count!=other.count;
63 }
64 };
65
73 template <class N>
75 {
76 public:
77 using ValueType = std::shared_ptr<N>;
79
82
83 private:
84 std::string datafile;
85 std::string datafilename;
86
87 mutable ValueCache cache;
88
89 mutable FileScanner scanner;
90
91 mutable std::mutex accessMutex;
92
93 protected:
95
96 private:
97 bool ReadData(N& data) const;
98 bool ReadData(FileOffset offset,
99 N& data) const;
100
101 public:
102 DataFile(const std::string& datafile,
103 size_t cacheSize);
104
105 // disable copy and move
106 DataFile(const DataFile&) = delete;
107 DataFile(DataFile&&) = delete;
108 DataFile& operator=(const DataFile&) = delete;
110
111 virtual ~DataFile();
112
114 const std::string& path,
115 bool memoryMappedData);
116 virtual bool IsOpen() const;
117 virtual bool Close();
118
120
121 std::string GetFilename() const
122 {
123 return datafilename;
124 }
125
127 ValueType& entry) const;
128
130 std::vector<ValueType>& data) const;
131
132 template<typename IteratorIn>
133 bool GetByOffset(IteratorIn begin, IteratorIn end, size_t size,
134 std::vector<ValueType>& data) const;
135
136 template<typename IteratorIn>
137 bool GetByOffset(IteratorIn begin, IteratorIn end, size_t size,
138 const GeoBox& boundingBox,
139 std::vector<ValueType>& data) const;
140
141 template<typename IteratorIn>
142 bool GetByOffset(IteratorIn begin, IteratorIn end, size_t size,
143 std::unordered_map<FileOffset,ValueType>& dataMap) const;
144
145 template<typename IteratorIn>
146 bool GetByBlockSpans(IteratorIn begin, IteratorIn end,
147 std::vector<ValueType>& data) const;
148 };
149
150 template <class N>
151 DataFile<N>::DataFile(const std::string& datafile, size_t cacheSize)
152 : datafile(datafile),cache(cacheSize)
153 {
154 // no code
155 }
156
157 template <class N>
159 {
160 if (IsOpen()) {
161 Close();
162 }
163 }
164
170 template <class N>
171 bool DataFile<N>::ReadData(FileOffset offset,
172 N& data) const
173 {
174 try {
175 scanner.SetPos(offset);
176
177 data.Read(*typeConfig,
178 scanner);
179 }
180 catch (const IOException& e) {
181 log.Error() << e.GetDescription();
182 return false;
183 }
184
185 return true;
186 }
187
193 template <class N>
194 bool DataFile<N>::ReadData(N& data) const
195 {
196 try {
197 data.Read(*typeConfig,
198 scanner);
199 }
200 catch (const IOException& e) {
201 log.Error() << e.GetDescription();
202 return false;
203 }
204
205 return true;
206 }
207
213 template <class N>
215 const std::string& path,
216 bool memoryMappedData)
217 {
218 this->typeConfig=typeConfig;
219
220 datafilename=AppendFileToDir(path,datafile);
221
222 try {
223 scanner.Open(datafilename,
224 FileScanner::LowMemRandom,
225 memoryMappedData);
226 }
227 catch (const IOException& e) {
228 log.Error() << e.GetDescription();
229 scanner.CloseFailsafe();
230 return false;
231 }
232
233 return true;
234 }
235
241 template <class N>
243 {
244 return scanner.IsOpen();
245 }
246
252 template <class N>
254 {
255 typeConfig=nullptr;
256 cache.Flush();
257
258 try {
259 if (scanner.IsOpen()) {
260 scanner.Close();
261 }
262 }
263 catch (const IOException& e) {
264 log.Error() << e.GetDescription();
265 scanner.CloseFailsafe();
266 return false;
267 }
268
269 return true;
270 }
271
272 template <class N>
274 {
275 std::scoped_lock<std::mutex> lock(accessMutex);
276 cache.Flush();
277 }
278
302 template <class N>
303 template <typename IteratorIn>
304 bool DataFile<N>::GetByOffset(IteratorIn begin, IteratorIn end,
305 size_t size,
306 std::vector<ValueType>& data) const
307 {
308 if (size==0) {
309 return true;
310 }
311
312 data.reserve(data.size()+size);
313 std::scoped_lock<std::mutex> lock(accessMutex);
314
315 if (cache.GetMaxSize()>0 &&
316 size>cache.GetMaxSize()){
317 log.Warn() << "Cache size (" << cache.GetMaxSize() << ") for file " << datafile << " is smaller than current request (" << size << ")";
318 }
319
320 for (IteratorIn offsetIter=begin; offsetIter!=end; ++offsetIter) {
321 ValueCacheRef entryRef;
322
323 if (cache.GetEntry(*offsetIter,entryRef)) {
324 data.push_back(entryRef->value);
325 }
326 else {
327 ValueType value=std::make_shared<N>();
328
329 if (!ReadData(*offsetIter,
330 *value)) {
331 log.Error() << "Error while reading data from offset " << *offsetIter << " of file " << datafilename << "!";
332 return false;
333 }
334
335 cache.SetEntry(ValueCacheEntry(*offsetIter,value));
336 data.push_back(value);
337 }
338 }
339
340 return true;
341 }
342
348 template <class N>
349 template<typename IteratorIn>
350 bool DataFile<N>::GetByOffset(IteratorIn begin, IteratorIn end,
351 size_t size,
352 const GeoBox& boundingBox,
353 std::vector<ValueType>& data) const
354 {
355 if (size==0) {
356 return true;
357 }
358
359 data.reserve(data.size()+size);
360 std::scoped_lock<std::mutex> lock(accessMutex);
361
362 if (cache.GetMaxSize()>0 &&
363 size>cache.GetMaxSize()){
364 log.Warn() << "Cache size (" << cache.GetMaxSize() << ") for file " << datafile << " is smaller than current request (" << size << ")";
365 }
366
367 //std::map<std::string,size_t> hitRateTypes;
368 //std::map<std::string,size_t> missRateTypes;
369 size_t inBoxCount=0;
370 for (IteratorIn offsetIter=begin; offsetIter!=end; ++offsetIter) {
371 ValueType value=std::make_shared<N>();
372
373 ValueCacheRef entryRef;
374 if (cache.GetEntry(*offsetIter,entryRef)){
375 value=entryRef->value;
376 }else{
377 if (!ReadData(*offsetIter,
378 *value)) {
379 log.Error() << "Error while reading data from offset " << *offsetIter << " of file " << datafilename << "!";
380 return false;
381 }
382
383 cache.SetEntry(ValueCacheEntry(*offsetIter,value));
384 }
385
386 if (!value->Intersects(boundingBox)) {
387 //missRateTypes[value->GetType()->GetName()]++;
388 continue;
389 }
390 /*else {
391 hitRateTypes[value->GetType()->GetName()]++;
392 }*/
393
394 inBoxCount++;
395
396 data.push_back(value);
397 }
398
399 size_t hitRate=inBoxCount*100/size;
400 if (size>100 && hitRate<50) {
401 log.Warn() << "Bounding box hit rate for file " << datafile << " is only " << hitRate << "% (" << inBoxCount << "/" << size << ")";
402 /*
403 for (const auto& missRateType: missRateTypes) {
404 log.Warn() << "- " << missRateType.first << " " << missRateType.second;
405 }
406 for (const auto& hitRateType: hitRateTypes) {
407 log.Warn() << "+ " << hitRateType.first << " " << hitRateType.second;
408 }*/
409 }
410
411 return true;
412 }
413
419 template <class N>
420 template<typename IteratorIn>
421 bool DataFile<N>::GetByOffset(IteratorIn begin, IteratorIn end,
422 size_t size,
423 std::unordered_map<FileOffset,ValueType>& dataMap) const
424 {
425 if (size==0) {
426 return true;
427 }
428
429 std::vector<ValueType> data;
430
431 if (!GetByOffset(begin,
432 end,
433 size,
434 data)) {
435 return false;
436 }
437
438 for (const auto& entry : data) {
439 dataMap.emplace(entry->GetFileOffset(),entry);
440 }
441
442 return true;
443 }
444
450 template <class N>
452 ValueType& entry) const
453 {
454 std::scoped_lock<std::mutex> lock(accessMutex);
455
456 ValueCacheRef entryRef;
457 if (cache.GetEntry(offset,entryRef)){
458 entry=entryRef->value;
459 }else{
460 ValueType value=std::make_shared<N>();
461
462 if (!ReadData(offset,
463 *value)) {
464 log.Error() << "Error while reading data from offset " << offset << " of file " << datafilename << "!";
465 // TODO: Remove broken entry from cache
466 return false;
467 }
468
469 cache.SetEntry(ValueCacheEntry(offset,value));
470 entry=value;
471 }
472
473 return true;
474 }
475
481 template <class N>
483 std::vector<ValueType>& data) const
484 {
485 if (span.count==0) {
486 return true;
487 }
488
489 std::scoped_lock<std::mutex> lock(accessMutex);
490
491 try {
492 bool offsetSetup=false;
493 FileOffset offset=span.startOffset;
494
495 data.reserve(data.size()+span.count);
496
497 for (uint32_t i=1; i<=span.count; i++) {
498 ValueCacheRef entryRef;
499 if (cache.GetEntry(offset,entryRef)){
500 data.push_back(entryRef->value);
501 offset=entryRef->value->GetNextFileOffset();
502 offsetSetup=false;
503 }else{
504 if (!offsetSetup){
505 scanner.SetPos(offset);
506 }
507
508 ValueType value=std::make_shared<N>();
509
510 if (!ReadData(*value)) {
511 log.Error() << "Error while reading data #" << i << " starting from offset " << span.startOffset << " of file " << datafilename << "!";
512 return false;
513 }
514
515 cache.SetEntry(ValueCacheEntry(offset,value));
516 offset=value->GetNextFileOffset();
517 offsetSetup=true;
518 data.push_back(value);
519 }
520 }
521
522 return true;
523 }
524 catch (const IOException& e) {
525 log.Error() << e.GetDescription();
526 return false;
527 }
528 }
529
535 template <class N>
536 template<typename IteratorIn>
537 bool DataFile<N>::GetByBlockSpans(IteratorIn begin, IteratorIn end,
538 std::vector<ValueType>& data) const
539 {
540 uint32_t overallCount=0;
541
542 for (IteratorIn spanIter=begin; spanIter!=end; ++spanIter) {
543 overallCount+=spanIter->count;
544 }
545
546 data.reserve(data.size()+overallCount);
547
548 try {
549 std::scoped_lock<std::mutex> lock(accessMutex);
550 for (IteratorIn spanIter=begin; spanIter!=end; ++spanIter) {
551 if (spanIter->count==0) {
552 continue;
553 }
554
555 bool offsetSetup=false;
556 FileOffset offset=spanIter->startOffset;
557
558 for (uint32_t i=1; i<=spanIter->count; i++) {
559 ValueCacheRef entryRef;
560 if (cache.GetEntry(offset,entryRef)){
561 data.push_back(entryRef->value);
562 offset=entryRef->value->GetNextFileOffset();
563 offsetSetup=false;
564 }else{
565 if (!offsetSetup){
566 scanner.SetPos(offset);
567 }
568
569 ValueType value=std::make_shared<N>();
570
571 if (!ReadData(*value)) {
572 log.Error() << "Error while reading data #" << i << " starting from offset " << spanIter->startOffset <<
573 " of file " << datafilename << "!";
574 return false;
575 }
576
577 cache.SetEntry(ValueCacheEntry(offset,value));
578 offset=value->GetNextFileOffset();
579 offsetSetup=true;
580 data.push_back(value);
581 }
582 }
583 }
584 }
585 catch (const IOException& e) {
586 log.Error() << e.GetDescription();
587 return false;
588 }
589
590 return true;
591 }
592
600 template <class I, class N>
601 class IndexedDataFile : public DataFile<N>
602 {
603 public:
604 using ValueType = std::shared_ptr<N>;
605
606 private:
607 using DataIndex = NumericIndex<I>;
608
609 private:
610 DataIndex index;
611
612 public:
613 IndexedDataFile(const std::string& datafile,
614 const std::string& indexfile,
615 size_t indexCacheSize,
616 size_t dataCacheSize);
617
619 const std::string& path,
620 bool memoryMappedIndex,
621 bool memoryMappedData);
622 bool Close() override;
623
624 bool IsOpen() const override;
625
626 bool GetOffset(I id,
627 FileOffset& offset) const;
628
629 bool Get(I id,
630 ValueType& entry) const;
631
632 template<typename IteratorIn>
633 bool GetOffsets(IteratorIn begin, IteratorIn end, size_t size,
634 std::vector<FileOffset>& offsets) const;
635
636 bool Get(const std::vector<I>& ids,
637 std::vector<ValueType>& data) const;
638 bool Get(const std::list<I>& ids,
639 std::vector<ValueType>& data) const;
640 bool Get(const std::set<I>& ids,
641 std::vector<ValueType>& data) const;
642
643 bool Get(const std::set<I>& ids,
644 std::unordered_map<I,ValueType>& data) const;
645
646 };
647
648 template <class I, class N>
649 IndexedDataFile<I,N>::IndexedDataFile(const std::string& datafile,
650 const std::string& indexfile,
651 size_t indexCacheSize,
652 size_t dataCacheSize)
653 : DataFile<N>(datafile,dataCacheSize),
654 index(indexfile,indexCacheSize)
655 {
656 // no code
657 }
658
659 template <class I, class N>
661 const std::string& path,
662 bool memoryMappedIndex,
663 bool memoryMappedData)
664 {
666 path,
667 memoryMappedData)) {
668 return false;
669 }
670
671 return index.Open(path,
672 memoryMappedIndex);
673 }
674
675 template <class I, class N>
677 {
678 bool result=true;
679
680 if (!DataFile<N>::Close()) {
681 result=false;
682 }
683
684 if (!index.Close()) {
685 result=false;
686 }
687
688 return result;
689 }
690
691 template <class I, class N>
693 {
694 return DataFile<N>::IsOpen() &&
695 index.IsOpen();
696 }
697
698 template <class I, class N>
699 template<typename IteratorIn>
700 bool IndexedDataFile<I,N>::GetOffsets(IteratorIn begin, IteratorIn end, size_t size,
701 std::vector<FileOffset>& offsets) const
702 {
703 return index.GetOffsets(begin,
704 end,
705 size,
706 offsets);
707 }
708
709 template <class I, class N>
711 FileOffset& offset) const
712 {
713 return index.GetOffset(id,offset);
714 }
715
716 template <class I, class N>
717 bool IndexedDataFile<I,N>::Get(const std::vector<I>& ids,
718 std::vector<ValueType>& data) const
719 {
720 std::vector<FileOffset> offsets;
721
722 if (!index.GetOffsets(ids.begin(),
723 ids.end(),
724 ids.size(),
725 offsets)) {
726 return false;
727 }
728
729 return DataFile<N>::GetByOffset(offsets.begin(),
730 offsets.end(),
731 offsets.size(),
732 data);
733 }
734
735 template <class I, class N>
736 bool IndexedDataFile<I,N>::Get(const std::list<I>& ids,
737 std::vector<ValueType>& data) const
738 {
739 std::vector<FileOffset> offsets;
740
741 if (!index.GetOffsets(ids.begin(),
742 ids.end(),
743 ids.size(),
744 offsets)) {
745 return false;
746 }
747
748 return DataFile<N>::GetByOffset(offsets.begin(),
749 offsets.end(),
750 offsets.size(),
751 data);
752 }
753
754 template <class I, class N>
755 bool IndexedDataFile<I,N>::Get(const std::set<I>& ids,
756 std::vector<ValueType>& data) const
757 {
758 std::vector<FileOffset> offsets;
759
760 if (!index.GetOffsets(ids.begin(),
761 ids.end(),
762 ids.size(),
763 offsets)) {
764 return false;
765 }
766
767 return DataFile<N>::GetByOffset(offsets.begin(),
768 offsets.end(),
769 offsets.size(),
770 data);
771 }
772
773 template <class I, class N>
774 bool IndexedDataFile<I,N>::Get(const std::set<I>& ids,
775 std::unordered_map<I,ValueType>& data) const
776 {
777 std::vector<FileOffset> offsets;
778 std::vector<ValueType> d;
779
780 if (!index.GetOffsets(ids.begin(),
781 ids.end(),
782 ids.size(),
783 offsets)) {
784 return false;
785 }
786
787 if (!DataFile<N>::GetByOffset(offsets.begin(),
788 offsets.end(),
789 offsets.size(),
790 d)) {
791 return false;
792 }
793
794 for (const auto& value : d) {
795 data[value->GetId()]=value;
796 }
797
798 return true;
799 }
800
801 template <class I, class N>
803 ValueType& entry) const
804 {
805 FileOffset offset;
806
807 if (!index.GetOffset(id,offset)) {
808 return false;
809 }
810
811 return DataFile<N>::GetByOffset(offset,entry);
812 }
813}
814
815#endif
Definition Cache.h:59
typename OrderList::iterator CacheRef
Definition Cache.h:98
DataFile(DataFile &&)=delete
std::shared_ptr< N > ValueType
Definition DataFile.h:77
bool GetByBlockSpans(IteratorIn begin, IteratorIn end, std::vector< ValueType > &data) const
Definition DataFile.h:537
bool GetByOffset(FileOffset offset, ValueType &entry) const
Definition DataFile.h:451
DataFile(const DataFile &)=delete
void FlushCache()
Definition DataFile.h:273
bool Open(const TypeConfigRef &typeConfig, const std::string &path, bool memoryMappedData)
Definition DataFile.h:214
DataFile(const std::string &datafile, size_t cacheSize)
Definition DataFile.h:151
DataFile & operator=(DataFile &&)=delete
typename Cache< FileOffset, ValueType >::CacheRef ValueCacheRef
Definition DataFile.h:81
DataFile & operator=(const DataFile &)=delete
virtual bool Close()
Definition DataFile.h:253
typename Cache< FileOffset, ValueType >::CacheEntry ValueCacheEntry
Definition DataFile.h:80
virtual bool IsOpen() const
Definition DataFile.h:242
bool GetByBlockSpan(const DataBlockSpan &span, std::vector< ValueType > &data) const
Definition DataFile.h:482
std::string GetFilename() const
Definition DataFile.h:121
virtual ~DataFile()
Definition DataFile.h:158
TypeConfigRef typeConfig
Definition DataFile.h:94
Cache< FileOffset, std::shared_ptr< N > > ValueCache
Definition DataFile.h:78
Definition Exception.h:73
std::string GetDescription() const override
std::shared_ptr< N > ValueType
Definition DataFile.h:604
bool Open(const TypeConfigRef &typeConfig, const std::string &path, bool memoryMappedIndex, bool memoryMappedData)
Definition DataFile.h:660
bool Close() override
Definition DataFile.h:676
bool IsOpen() const override
Definition DataFile.h:692
IndexedDataFile(const std::string &datafile, const std::string &indexfile, size_t indexCacheSize, size_t dataCacheSize)
Definition DataFile.h:649
bool GetOffsets(IteratorIn begin, IteratorIn end, size_t size, std::vector< FileOffset > &offsets) const
Definition DataFile.h:700
bool GetOffset(I id, FileOffset &offset) const
Definition DataFile.h:710
bool Get(I id, ValueType &entry) const
Definition DataFile.h:802
Log & Error(bool state)
Definition Logger.h:414
Definition NumericIndex.h:43
OSMSCOUT_API std::string AppendFileToDir(const std::string &dir, const std::string &file)
OSMSCOUT_API Log log
Definition LoggerImpl.h:95
uint64_t FileOffset
Definition OSMScoutTypes.h:46
std::shared_ptr< TypeConfig > TypeConfigRef
Definition TypeConfig.h:1396
Definition Area.h:39
Definition Cache.h:65
Definition DataFile.h:46
uint32_t count
Number of entries to read.
Definition DataFile.h:48
bool operator<(const DataBlockSpan &other) const
Definition DataFile.h:50
bool operator==(const DataBlockSpan &other) const
Definition DataFile.h:55
FileOffset startOffset
Offset for the first data entry referenced in the file. Data will be read starting from this position...
Definition DataFile.h:47
bool operator!=(const DataBlockSpan &other) const
Definition DataFile.h:60