libzypp 17.28.8
MediaBlockList.cc
Go to the documentation of this file.
1/*---------------------------------------------------------------------\
2| ____ _ __ __ ___ |
3| |__ / \ / / . \ . \ |
4| / / \ V /| _/ _/ |
5| / /__ | | | | | | |
6| /_____||_| |_| |_| |
7| |
8\---------------------------------------------------------------------*/
13#include <sys/types.h>
14#include <stdio.h>
15#include <stdlib.h>
16#include <string.h>
17
18#include <vector>
19#include <iostream>
20#include <fstream>
21
23#include <zypp/base/Logger.h>
24#include <zypp/base/String.h>
25
26using namespace zypp::base;
27
28namespace zypp {
29 namespace media {
30
32{
33 filesize = size;
34 haveblocks = false;
35 chksumlen = 0;
36 chksumpad = 0;
37 rsumlen = 0;
38 rsumpad = 0;
39}
40
41size_t
42MediaBlockList::addBlock(off_t off, size_t size)
43{
44 haveblocks = true;
45 blocks.push_back(MediaBlock( off, size ));
46 return blocks.size() - 1;
47}
48
49void
50MediaBlockList::setFileChecksum(std::string ctype, int cl, unsigned char *c)
51{
52 if (!cl)
53 return;
54 fsumtype = ctype;
55 fsum.resize(cl);
56 memcpy(&fsum[0], c, cl);
57}
58
60{
61 return fsumtype;
62}
63
65{
66 return fsum;
67}
68
69bool
71{
72 return digest.create(fsumtype);
73}
74
75bool
77{
78 if (!haveFileChecksum())
79 return true;
80 std::vector<unsigned char>dig = digest.digestVector();
81 if (dig.empty() || dig.size() < fsum.size())
82 return false;
83 return memcmp(&dig[0], &fsum[0], fsum.size()) ? false : true;
84}
85
86void
87MediaBlockList::setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad)
88{
89 if (!csl)
90 return;
91 if (!chksumlen)
92 {
93 if (blkno)
94 return;
95 chksumlen = csl;
96 chksumtype = cstype;
97 chksumpad = cspad;
98 }
99 if (csl != chksumlen || cstype != chksumtype || cspad != chksumpad || blkno != chksums.size() / chksumlen)
100 return;
101 chksums.resize(chksums.size() + csl);
102 memcpy(&chksums[csl * blkno], cs, csl);
103}
104
105void
106MediaBlockList::setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad)
107{
108 if (!rsl)
109 return;
110 if (!rsumlen)
111 {
112 if (blkno)
113 return;
114 rsumlen = rsl;
115 rsumpad = rspad;
116 }
117 if (rsl != rsumlen || rspad != rsumpad || blkno != rsums.size())
118 return;
119 rsums.push_back(rs);
120}
121
122bool
124{
125 return digest.create(chksumtype);
126}
127
128bool
129MediaBlockList::verifyDigest(size_t blkno, Digest &digest) const
130{
131 if (!haveChecksum(blkno))
132 return true;
133 size_t size = blocks[blkno].size;
134 if (!size)
135 return true;
136 if (chksumpad > size)
137 {
138 char pad[chksumpad - size];
139 memset(pad, 0, chksumpad - size);
140 digest.update(pad, chksumpad - size);
141 }
142 std::vector<unsigned char>dig = digest.digestVector();
143 if (dig.empty() || dig.size() < size_t(chksumlen))
144 return false;
145 return memcmp(&dig[0], &chksums[chksumlen * blkno], chksumlen) ? false : true;
146}
147
148unsigned int
149MediaBlockList::updateRsum(unsigned int rs, const char* bytes, size_t len) const
150{
151 if (!len)
152 return rs;
153 unsigned short s, m;
154 s = (rs >> 16) & 65535;
155 m = rs & 65535;
156 for (; len > 0 ; len--)
157 {
158 unsigned short c = (unsigned char)*bytes++;
159 s += c;
160 m += s;
161 }
162 return (s & 65535) << 16 | (m & 65535);
163}
164
165bool
166MediaBlockList::verifyRsum(size_t blkno, unsigned int rs) const
167{
168 if (!haveRsum(blkno))
169 return true;
170 size_t size = blocks[blkno].size;
171 if (!size)
172 return true;
173 if (rsumpad > size)
174 {
175 unsigned short s, m;
176 s = (rs >> 16) & 65535;
177 m = rs & 65535;
178 m += s * (rsumpad - size);
179 rs = (s & 65535) << 16 | (m & 65535);
180 }
181 switch(rsumlen)
182 {
183 case 3:
184 rs &= 0xffffff;
185 case 2:
186 rs &= 0xffff;
187 case 1:
188 rs &= 0xff;
189 default:
190 break;
191 }
192 return rs == rsums[blkno];
193}
194
195bool
196MediaBlockList::checkRsum(size_t blkno, const unsigned char *buf, size_t bufl) const
197{
198 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
199 return false;
200 unsigned int rs = updateRsum(0, (const char *)buf, blocks[blkno].size);
201 return verifyRsum(blkno, rs);
202}
203
204bool
205MediaBlockList::checkChecksum(size_t blkno, const unsigned char *buf, size_t bufl) const
206{
207 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
208 return false;
209 Digest dig;
210 if (!createDigest(dig))
211 return false;
212 dig.update((const char *)buf, blocks[blkno].size);
213 return verifyDigest(blkno, dig);
214}
215
217{
218 if ( !haveChecksum(blkno) )
219 return {};
220
221 UByteArray buf ( chksumlen, '\0' );
222 memcpy( buf.data(), chksums.data()+(chksumlen * blkno), chksumlen );
223 return buf;
224}
225
227{
228 return chksumtype;
229}
230
231// specialized version of checkChecksum that can deal with a "rotated" buffer
232bool
233MediaBlockList::checkChecksumRotated(size_t blkno, const unsigned char *buf, size_t bufl, size_t start) const
234{
235 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
236 return false;
237 if (start == bufl)
238 start = 0;
239 Digest dig;
240 if (!createDigest(dig))
241 return false;
242 size_t size = blocks[blkno].size;
243 size_t len = bufl - start > size ? size : bufl - start;
244 dig.update((const char *)buf + start, len);
245 if (size > len)
246 dig.update((const char *)buf, size - len);
247 return verifyDigest(blkno, dig);
248}
249
250// write block to the file. can also deal with "rotated" buffers
251void
252MediaBlockList::writeBlock(size_t blkno, FILE *fp, const unsigned char *buf, size_t bufl, size_t start, std::vector<bool> &found) const
253{
254 if (blkno >= blocks.size() || bufl < blocks[blkno].size)
255 return;
256 off_t off = blocks[blkno].off;
257 size_t size = blocks[blkno].size;
258 if (fseeko(fp, off, SEEK_SET))
259 return;
260 if (start == bufl)
261 start = 0;
262 size_t len = bufl - start > size ? size : bufl - start;
263 if (fwrite(buf + start, len, 1, fp) != 1)
264 return;
265 if (size > len && fwrite(buf, size - len, 1, fp) != 1)
266 return;
267 found[blkno] = true;
268 found[blocks.size()] = true;
269}
270
271static size_t
272fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
273{
274 size_t l = blksize;
275 int c;
276
277 if (pushback)
278 {
279 if (pushbackp != bp)
280 memmove(bp, pushbackp, pushback);
281 bp += pushback;
282 l -= pushback;
283 }
284 while (l)
285 {
286 c = getc(fp);
287 if (c == EOF)
288 break;
289 *bp++ = c;
290 l--;
291 }
292 if (l)
293 memset(bp, 0, l);
294 return blksize - l;
295}
296
297
298void
299MediaBlockList::reuseBlocks(FILE *wfp, std::string filename)
300{
301 FILE *fp;
302
303 if (!chksumlen || (fp = fopen(filename.c_str(), "r")) == 0)
304 return;
305 size_t nblks = blocks.size();
306 std::vector<bool> found;
307 found.resize(nblks + 1);
308 if (rsumlen && !rsums.empty())
309 {
310 size_t blksize = blocks[0].size;
311 if (nblks == 1 && rsumpad && rsumpad > blksize)
312 blksize = rsumpad;
313 // create hash of checksums
314 unsigned int hm = rsums.size() * 2;
315 while (hm & (hm - 1))
316 hm &= hm - 1;
317 hm = hm * 2 - 1;
318 if (hm < 16383)
319 hm = 16383;
320 unsigned int *ht = new unsigned int[hm + 1];
321 memset(ht, 0, (hm + 1) * sizeof(unsigned int));
322 for (unsigned int i = 0; i < rsums.size(); i++)
323 {
324 if (blocks[i].size != blksize && (i != nblks - 1 || rsumpad != blksize))
325 continue;
326 unsigned int r = rsums[i];
327 unsigned int h = r & hm;
328 unsigned int hh = 7;
329 while (ht[h])
330 h = (h + hh++) & hm;
331 ht[h] = i + 1;
332 }
333
334 unsigned char *buf = new unsigned char[blksize];
335 unsigned char *buf2 = new unsigned char[blksize];
336 size_t pushback = 0;
337 unsigned char *pushbackp = 0;
338 int bshift = 0;
339 if ((blksize & (blksize - 1)) == 0)
340 for (bshift = 0; size_t(1 << bshift) != blksize; bshift++)
341 ;
342 unsigned short a, b;
343 a = b = 0;
344 memset(buf, 0, blksize);
345 bool eof = 0;
346 bool init = 1;
347 int sql = nblks > 1 && chksumlen < 16 ? 2 : 1;
348 while (!eof)
349 {
350 for (size_t i = 0; i < blksize; i++)
351 {
352 int c;
353 if (eof)
354 c = 0;
355 else
356 {
357 if (pushback)
358 {
359 c = *pushbackp++;
360 pushback--;
361 }
362 else
363 c = getc(fp);
364 if (c == EOF)
365 {
366 eof = true;
367 c = 0;
368 if (!i || sql == 2)
369 break;
370 }
371 }
372 int oc = buf[i];
373 buf[i] = c;
374 a += c - oc;
375 if (bshift)
376 b += a - (oc << bshift);
377 else
378 b += a - oc * blksize;
379 if (init)
380 {
381 if (size_t(i) != blksize - 1)
382 continue;
383 init = 0;
384 }
385 unsigned int r;
386 if (rsumlen == 1)
387 r = ((unsigned int)b & 255);
388 else if (rsumlen == 2)
389 r = ((unsigned int)b & 65535);
390 else if (rsumlen == 3)
391 r = ((unsigned int)a & 255) << 16 | ((unsigned int)b & 65535);
392 else
393 r = ((unsigned int)a & 65535) << 16 | ((unsigned int)b & 65535);
394 unsigned int h = r & hm;
395 unsigned int hh = 7;
396 for (; ht[h]; h = (h + hh++) & hm)
397 {
398 size_t blkno = ht[h] - 1;
399 if (rsums[blkno] != r)
400 continue;
401 if (found[blkno])
402 continue;
403 if (sql == 2)
404 {
405 if (eof || blkno + 1 >= nblks)
406 continue;
407 pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
408 pushbackp = buf2;
409 if (!pushback)
410 continue;
411 if (!checkRsum(blkno + 1, buf2, blksize))
412 continue;
413 }
414 if (!checkChecksumRotated(blkno, buf, blksize, i + 1))
415 continue;
416 if (sql == 2 && !checkChecksum(blkno + 1, buf2, blksize))
417 continue;
418 writeBlock(blkno, wfp, buf, blksize, i + 1, found);
419 if (sql == 2)
420 {
421 writeBlock(blkno + 1, wfp, buf2, blksize, 0, found);
422 pushback = 0;
423 blkno++;
424 }
425 while (!eof)
426 {
427 blkno++;
428 pushback = fetchnext(fp, buf2, blksize, pushback, pushbackp);
429 pushbackp = buf2;
430 if (!pushback)
431 break;
432 if (!checkRsum(blkno, buf2, blksize))
433 break;
434 if (!checkChecksum(blkno, buf2, blksize))
435 break;
436 writeBlock(blkno, wfp, buf2, blksize, 0, found);
437 pushback = 0;
438 }
439 init = false;
440 memset(buf, 0, blksize);
441 a = b = 0;
442 i = size_t(-1); // start with 0 on next iteration
443 break;
444 }
445 }
446 }
447 delete[] buf2;
448 delete[] buf;
449 delete[] ht;
450 }
451 else if (chksumlen >= 16)
452 {
453 // dummy variant, just check the checksums
454 size_t bufl = 4096;
455 off_t off = 0;
456 unsigned char *buf = new unsigned char[bufl];
457 for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
458 {
459 if (off > blocks[blkno].off)
460 continue;
461 size_t blksize = blocks[blkno].size;
462 if (blksize > bufl)
463 {
464 delete[] buf;
465 bufl = blksize;
466 buf = new unsigned char[bufl];
467 }
468 size_t skip = blocks[blkno].off - off;
469 while (skip)
470 {
471 size_t l = skip > bufl ? bufl : skip;
472 if (fread(buf, l, 1, fp) != 1)
473 break;
474 skip -= l;
475 off += l;
476 }
477 if (fread(buf, blksize, 1, fp) != 1)
478 break;
479 if (checkChecksum(blkno, buf, blksize))
480 writeBlock(blkno, wfp, buf, blksize, 0, found);
481 off += blksize;
482 }
483 }
484 if (!found[nblks])
485 return;
486 // now throw out all of the blocks we found
487 std::vector<MediaBlock> nblocks;
488 std::vector<unsigned char> nchksums;
489 std::vector<unsigned int> nrsums;
490
491 for (size_t blkno = 0; blkno < blocks.size(); ++blkno)
492 {
493 if (!found[blkno])
494 {
495 // still need it
496 nblocks.push_back(blocks[blkno]);
497 if (chksumlen && (blkno + 1) * chksumlen <= chksums.size())
498 {
499 nchksums.resize(nblocks.size() * chksumlen);
500 memcpy(&nchksums[(nblocks.size() - 1) * chksumlen], &chksums[blkno * chksumlen], chksumlen);
501 }
502 if (rsumlen && (blkno + 1) <= rsums.size())
503 nrsums.push_back(rsums[blkno]);
504 }
505 }
506 blocks = nblocks;
507 chksums = nchksums;
508 rsums = nrsums;
509}
510
511std::string
513{
514 std::string s;
515 size_t i, j;
516
517 if (filesize != off_t(-1))
518 {
519 long long size = filesize;
520 s = zypp::str::form("[ BlockList, file size %lld\n", size);
521 }
522 else
523 s = "[ BlockList, filesize unknown\n";
524 if (!haveblocks)
525 s += " No block information\n";
526 if (chksumpad)
527 s += zypp::str::form(" Checksum pad %zd\n", chksumpad);
528 if (rsumpad)
529 s += zypp::str::form(" Rsum pad %zd\n", rsumpad);
530 for (i = 0; i < blocks.size(); ++i)
531 {
532 long long off=blocks[i].off;
533 long long size=blocks[i].size;
534 s += zypp::str::form(" (%8lld, %8lld)", off, size);
535 if (chksumlen && chksums.size() >= (i + 1) * chksumlen)
536 {
537 s += " " + chksumtype + ":";
538 for (j = 0; j < size_t(chksumlen); j++)
539 s += zypp::str::form("%02hhx", chksums[i * chksumlen + j]);
540 }
541 if (rsumlen && rsums.size() > i)
542 {
543 s += " RSUM:";
544 s += zypp::str::form("%0*x", 2 * rsumlen, rsums[i]);
545 }
546 s += "\n";
547 }
548 s += "]";
549 return s;
550}
551
552 } // namespace media
553} // namespace zypp
554
Compute Message Digests (MD5, SHA1 etc)
Definition: Digest.h:36
UByteArray digestVector()
get vector of unsigned char representation of the digest
Definition: Digest.cc:230
bool update(const char *bytes, size_t len)
feed data into digest computation algorithm
Definition: Digest.cc:248
bool create(const std::string &name)
initialize creation of a new message digest
Definition: Digest.cc:143
std::vector< unsigned int > rsums
void setRsum(size_t blkno, int rsl, unsigned int rs, size_t rspad=0)
set / verify the (weak) rolling checksum over a single block
bool haveChecksum(size_t blkno) const
void setFileChecksum(std::string ctype, int cl, unsigned char *c)
set / verify the checksum over the whole file
bool verifyRsum(size_t blkno, unsigned int rs) const
void reuseBlocks(FILE *wfp, std::string filename)
scan a file for blocks from our blocklist.
const UByteArray & getFileChecksum()
void writeBlock(size_t blkno, FILE *fp, const unsigned char *buf, size_t bufl, size_t start, std::vector< bool > &found) const
bool createDigest(Digest &digest) const
std::string asString() const
return block list as string
UByteArray getChecksum(size_t blkno) const
bool checkChecksumRotated(size_t blkno, const unsigned char *buf, size_t bufl, size_t start) const
std::vector< unsigned char > chksums
size_t addBlock(off_t off, size_t size)
add a block with offset off and size size to the block list.
unsigned int updateRsum(unsigned int rs, const char *bytes, size_t len) const
void setChecksum(size_t blkno, std::string cstype, int csl, unsigned char *cs, size_t cspad=0)
set / verify the (strong) checksum over a single block
bool verifyDigest(size_t blkno, Digest &digest) const
bool checkRsum(size_t blkno, const unsigned char *buf, size_t bufl) const
std::vector< MediaBlock > blocks
std::string getChecksumType() const
bool createFileDigest(Digest &digest) const
bool verifyFileDigest(Digest &digest) const
std::string fileChecksumType() const
bool haveRsum(size_t blkno) const
bool checkChecksum(size_t blkno, const unsigned char *buf, size_t bufl) const
MediaBlockList(off_t filesize=off_t(-1))
static size_t fetchnext(FILE *fp, unsigned char *bp, size_t blksize, size_t pushback, unsigned char *pushbackp)
std::string form(const char *format,...) __attribute__((format(printf
Printf style construction of std::string.
Definition: String.cc:36
Easy-to use interface to the ZYPP dependency resolver.
Definition: CodePitfalls.doc:2
a single block from the blocklist, consisting of an offset and a size