90 const ImportParameter& parameter,
98 std::vector<N> startingIds;
99 std::vector<FileOffset> pageStarts;
101 std::vector<uint32_t> indexPageCounts;
107 uint32_t pageSize=(uint32_t)parameter.GetNumericIndexPageSize();
113 progress.
SetAction(std::string(
"Generating '")+indexfile+
"'");
121 FileScanner::Sequential,
true);
123 dataCount=scanner.ReadUInt32();
125 writer.WriteNumber(pageSize);
126 writer.WriteNumber(dataCount);
128 levelsOffset=writer.GetPos();
129 writer.Write((uint32_t)0);
131 lastLevelPageStartOffset=writer.GetPos();
134 indexPageCountsOffset=writer.GetPos();
137 writer.FlushCurrentBlockWithZeros(pageSize);
139 progress.
Info(std::string(
"Writing level ")+std::to_string(1)+
" ("+std::to_string(dataCount)+
" entries)");
143 uint32_t currentPageSize=0;
145 for (uint32_t d=0; d<dataCount; d++) {
151 readPos=scanner.GetPos();
153 ReadData(*typeConfig,
158 if (data.GetId()<=lastId) {
159 progress.
Error(
"Current id "+std::to_string(data.GetId())+
" <= last id "+std::to_string(lastId));
161 assert(data.GetId()>lastId);
162 assert(readPos>lastPos);
165 if (currentPageSize>0) {
166 std::array<char,10> b1;
167 std::array<char,10> b2;
168 N b1val=data.GetId()-lastId;
180 if (currentPageSize+b1size+b2size>pageSize) {
182 writer.FlushCurrentBlockWithZeros(pageSize);
187 writer.Write(b1.data(),b1size);
188 writer.Write(b2.data(),b2size);
190 currentPageSize+=b1size+b2size;
194 if (currentPageSize==0) {
197 startingIds.push_back(data.GetId());
198 pageStarts.push_back(writePos);
200 writer.WriteNumber(data.GetId());
201 writer.WriteNumber(readPos);
203 writePos=writer.GetPos();
204 currentPageSize=writePos%pageSize;
211 writer.FlushCurrentBlockWithZeros(pageSize);
212 indexPageCounts.push_back((uint32_t)pageStarts.size());
214 while (pageStarts.size()>1) {
215 std::vector<N> si(startingIds);
216 std::vector<FileOffset> po(pageStarts);
221 progress.
Info(std::string(
"Writing level ")+std::to_string(indexPageCounts.size()+1)+
" ("+std::to_string(si.size())+
" entries)");
223 size_t currentPageSize=0;
225 for (
size_t i=0; i<si.size(); i++) {
226 if (currentPageSize>0) {
227 std::array<char,10> b1;
228 std::array<char,10> b2;
229 N b1val=si[i]-si[i-1];
240 if (currentPageSize+b1size+b2size>pageSize) {
242 writer.FlushCurrentBlockWithZeros(pageSize);
247 writer.Write(b1.data(),b1size);
248 writer.Write(b2.data(),b2size);
250 currentPageSize+=b1size+b2size;
254 if (currentPageSize==0) {
257 writePos=writer.GetPos();
259 startingIds.push_back(si[i]);
260 pageStarts.push_back(writePos);
262 writer.WriteNumber(si[i]);
263 writer.WriteNumber(po[i]);
265 writePos=writer.GetPos();
266 currentPageSize=writePos%pageSize;
270 writer.FlushCurrentBlockWithZeros(pageSize);
271 indexPageCounts.push_back((uint32_t)pageStarts.size());
276 assert(pageStarts.size()==1);
280 indexPageCountsPos=writer.GetPos();
282 writer.SetPos(levelsOffset);
283 writer.Write((uint32_t)indexPageCounts.size());
285 writer.SetPos(lastLevelPageStartOffset);
286 writer.WriteFileOffset(pageStarts[0]);
288 writer.SetPos(indexPageCountsOffset);
289 writer.WriteFileOffset(indexPageCountsPos);
291 writer.SetPos(indexPageCountsPos);
294 progress.
Info(std::string(
"Index for ")+std::to_string(dataCount)+
" data elements will be stored in "+std::to_string(indexPageCounts.size())+
" levels");
295 for (
size_t level=0; level<indexPageCounts.size(); level++) {
296 size_t levelIndex=indexPageCounts.size()-level-1;
298 progress.
Info(std::string(
"Page count for level ")+std::to_string(level)+
" is "+std::to_string(indexPageCounts[levelIndex]));
299 writer.WriteNumber(indexPageCounts[levelIndex]);
308 scanner.CloseFailsafe();
309 writer.CloseFailsafe();