Mercurial > hg > mercurial-source
comparison contrib/python-zstandard/zstd/compress/zstd_opt.c @ 42857:675775c33ab6
zstandard: vendor python-zstandard 0.11
The upstream source distribution from PyPI was extracted. Unwanted
files were removed.
The clang-format ignore list was updated to reflect the new source
of files.
The project contains a vendored copy of zstandard 1.3.8. The old
version was 1.3.6. This should result in some minor performance wins.
test-check-py3-compat.t was updated to reflect now-passing tests on
Python 3.8.
Some HTTP tests were updated to reflect new zstd compression output.
# no-check-commit because 3rd party code has different style guidelines
Differential Revision: https://phab.mercurial-scm.org/D6199
author | Gregory Szorc <gregory.szorc@gmail.com> |
---|---|
date | Thu, 04 Apr 2019 17:34:43 -0700 (2019-04-05) |
parents | 73fef626dae3 |
children | 69de49c4e39c |
comparison
equal
deleted
inserted
replaced
42856:668eff08387f | 42857:675775c33ab6 |
---|---|
14 | 14 |
15 | 15 |
16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ | 16 #define ZSTD_LITFREQ_ADD 2 /* scaling factor for litFreq, so that frequencies adapt faster to new stats */ |
17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ | 17 #define ZSTD_FREQ_DIV 4 /* log factor when using previous stats to init next stats */ |
18 #define ZSTD_MAX_PRICE (1<<30) | 18 #define ZSTD_MAX_PRICE (1<<30) |
19 | |
20 #define ZSTD_PREDEF_THRESHOLD 1024 /* if srcSize < ZSTD_PREDEF_THRESHOLD, symbols' cost is assumed static, directly determined by pre-defined distributions */ | |
19 | 21 |
20 | 22 |
21 /*-************************************* | 23 /*-************************************* |
22 * Price functions for optimal parser | 24 * Price functions for optimal parser |
23 ***************************************/ | 25 ***************************************/ |
50 U32 const weight = BWeight + FWeight; | 52 U32 const weight = BWeight + FWeight; |
51 assert(hb + BITCOST_ACCURACY < 31); | 53 assert(hb + BITCOST_ACCURACY < 31); |
52 return weight; | 54 return weight; |
53 } | 55 } |
54 | 56 |
55 /* debugging function, @return price in bytes */ | 57 #if (DEBUGLEVEL>=2) |
58 /* debugging function, | |
59 * @return price in bytes as fractional value | |
60 * for debug messages only */ | |
56 MEM_STATIC double ZSTD_fCost(U32 price) | 61 MEM_STATIC double ZSTD_fCost(U32 price) |
57 { | 62 { |
58 return (double)price / (BITCOST_MULTIPLIER*8); | 63 return (double)price / (BITCOST_MULTIPLIER*8); |
59 } | 64 } |
65 #endif | |
60 | 66 |
61 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) | 67 static void ZSTD_setBasePrices(optState_t* optPtr, int optLevel) |
62 { | 68 { |
63 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); | 69 optPtr->litSumBasePrice = WEIGHT(optPtr->litSum, optLevel); |
64 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); | 70 optPtr->litLengthSumBasePrice = WEIGHT(optPtr->litLengthSum, optLevel); |
65 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); | 71 optPtr->matchLengthSumBasePrice = WEIGHT(optPtr->matchLengthSum, optLevel); |
66 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); | 72 optPtr->offCodeSumBasePrice = WEIGHT(optPtr->offCodeSum, optLevel); |
67 } | 73 } |
68 | 74 |
69 | 75 |
70 static U32 ZSTD_downscaleStat(U32* table, U32 lastEltIndex, int malus) | 76 /* ZSTD_downscaleStat() : |
77 * reduce all elements in table by a factor 2^(ZSTD_FREQ_DIV+malus) | |
78 * return the resulting sum of elements */ | |
79 static U32 ZSTD_downscaleStat(unsigned* table, U32 lastEltIndex, int malus) | |
71 { | 80 { |
72 U32 s, sum=0; | 81 U32 s, sum=0; |
82 DEBUGLOG(5, "ZSTD_downscaleStat (nbElts=%u)", (unsigned)lastEltIndex+1); | |
73 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); | 83 assert(ZSTD_FREQ_DIV+malus > 0 && ZSTD_FREQ_DIV+malus < 31); |
74 for (s=0; s<=lastEltIndex; s++) { | 84 for (s=0; s<lastEltIndex+1; s++) { |
75 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus)); | 85 table[s] = 1 + (table[s] >> (ZSTD_FREQ_DIV+malus)); |
76 sum += table[s]; | 86 sum += table[s]; |
77 } | 87 } |
78 return sum; | 88 return sum; |
79 } | 89 } |
80 | 90 |
81 static void ZSTD_rescaleFreqs(optState_t* const optPtr, | 91 /* ZSTD_rescaleFreqs() : |
82 const BYTE* const src, size_t const srcSize, | 92 * if first block (detected by optPtr->litLengthSum == 0) : init statistics |
83 int optLevel) | 93 * take hints from dictionary if there is one |
84 { | 94 * or init from zero, using src for literals stats, or flat 1 for match symbols |
95 * otherwise downscale existing stats, to be used as seed for next block. | |
96 */ | |
97 static void | |
98 ZSTD_rescaleFreqs(optState_t* const optPtr, | |
99 const BYTE* const src, size_t const srcSize, | |
100 int const optLevel) | |
101 { | |
102 DEBUGLOG(5, "ZSTD_rescaleFreqs (srcSize=%u)", (unsigned)srcSize); | |
85 optPtr->priceType = zop_dynamic; | 103 optPtr->priceType = zop_dynamic; |
86 | 104 |
87 if (optPtr->litLengthSum == 0) { /* first block : init */ | 105 if (optPtr->litLengthSum == 0) { /* first block : init */ |
88 if (srcSize <= 1024) /* heuristic */ | 106 if (srcSize <= ZSTD_PREDEF_THRESHOLD) { /* heuristic */ |
107 DEBUGLOG(5, "(srcSize <= ZSTD_PREDEF_THRESHOLD) => zop_predef"); | |
89 optPtr->priceType = zop_predef; | 108 optPtr->priceType = zop_predef; |
109 } | |
90 | 110 |
91 assert(optPtr->symbolCosts != NULL); | 111 assert(optPtr->symbolCosts != NULL); |
92 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { /* huffman table presumed generated by dictionary */ | 112 if (optPtr->symbolCosts->huf.repeatMode == HUF_repeat_valid) { |
113 /* huffman table presumed generated by dictionary */ | |
93 optPtr->priceType = zop_dynamic; | 114 optPtr->priceType = zop_dynamic; |
94 | 115 |
95 assert(optPtr->litFreq != NULL); | 116 assert(optPtr->litFreq != NULL); |
96 optPtr->litSum = 0; | 117 optPtr->litSum = 0; |
97 { unsigned lit; | 118 { unsigned lit; |
206 { | 227 { |
207 if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); | 228 if (optPtr->priceType == zop_predef) return WEIGHT(litLength, optLevel); |
208 | 229 |
209 /* dynamic statistics */ | 230 /* dynamic statistics */ |
210 { U32 const llCode = ZSTD_LLcode(litLength); | 231 { U32 const llCode = ZSTD_LLcode(litLength); |
211 return (LL_bits[llCode] * BITCOST_MULTIPLIER) + (optPtr->litLengthSumBasePrice - WEIGHT(optPtr->litLengthFreq[llCode], optLevel)); | 232 return (LL_bits[llCode] * BITCOST_MULTIPLIER) |
233 + optPtr->litLengthSumBasePrice | |
234 - WEIGHT(optPtr->litLengthFreq[llCode], optLevel); | |
212 } | 235 } |
213 } | 236 } |
214 | 237 |
215 /* ZSTD_litLengthContribution() : | 238 /* ZSTD_litLengthContribution() : |
216 * @return ( cost(litlength) - cost(0) ) | 239 * @return ( cost(litlength) - cost(0) ) |
251 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. | 274 * Must be combined with ZSTD_fullLiteralsCost() to get the full cost of a sequence. |
252 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ | 275 * optLevel: when <2, favors small offset for decompression speed (improved cache efficiency) */ |
253 FORCE_INLINE_TEMPLATE U32 | 276 FORCE_INLINE_TEMPLATE U32 |
254 ZSTD_getMatchPrice(U32 const offset, | 277 ZSTD_getMatchPrice(U32 const offset, |
255 U32 const matchLength, | 278 U32 const matchLength, |
256 const optState_t* const optPtr, | 279 const optState_t* const optPtr, |
257 int const optLevel) | 280 int const optLevel) |
258 { | 281 { |
259 U32 price; | 282 U32 price; |
260 U32 const offCode = ZSTD_highbit32(offset+1); | 283 U32 const offCode = ZSTD_highbit32(offset+1); |
261 U32 const mlBase = matchLength - MINMATCH; | 284 U32 const mlBase = matchLength - MINMATCH; |
383 const U32 btLow = btMask >= current ? 0 : current - btMask; | 406 const U32 btLow = btMask >= current ? 0 : current - btMask; |
384 U32* smallerPtr = bt + 2*(current&btMask); | 407 U32* smallerPtr = bt + 2*(current&btMask); |
385 U32* largerPtr = smallerPtr + 1; | 408 U32* largerPtr = smallerPtr + 1; |
386 U32 dummy32; /* to be nullified at the end */ | 409 U32 dummy32; /* to be nullified at the end */ |
387 U32 const windowLow = ms->window.lowLimit; | 410 U32 const windowLow = ms->window.lowLimit; |
388 U32 const matchLow = windowLow ? windowLow : 1; | |
389 U32 matchEndIdx = current+8+1; | 411 U32 matchEndIdx = current+8+1; |
390 size_t bestLength = 8; | 412 size_t bestLength = 8; |
391 U32 nbCompares = 1U << cParams->searchLog; | 413 U32 nbCompares = 1U << cParams->searchLog; |
392 #ifdef ZSTD_C_PREDICT | 414 #ifdef ZSTD_C_PREDICT |
393 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); | 415 U32 predictedSmall = *(bt + 2*((current-1)&btMask) + 0); |
399 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); | 421 DEBUGLOG(8, "ZSTD_insertBt1 (%u)", current); |
400 | 422 |
401 assert(ip <= iend-8); /* required for h calculation */ | 423 assert(ip <= iend-8); /* required for h calculation */ |
402 hashTable[h] = current; /* Update Hash Table */ | 424 hashTable[h] = current; /* Update Hash Table */ |
403 | 425 |
404 while (nbCompares-- && (matchIndex >= matchLow)) { | 426 assert(windowLow > 0); |
427 while (nbCompares-- && (matchIndex >= windowLow)) { | |
405 U32* const nextPtr = bt + 2*(matchIndex & btMask); | 428 U32* const nextPtr = bt + 2*(matchIndex & btMask); |
406 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ | 429 size_t matchLength = MIN(commonLengthSmaller, commonLengthLarger); /* guaranteed minimum nb of common bytes */ |
407 assert(matchIndex < current); | 430 assert(matchIndex < current); |
408 | 431 |
409 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ | 432 #ifdef ZSTD_C_PREDICT /* note : can create issues when hlog small <= 11 */ |
477 const U32 mls, const ZSTD_dictMode_e dictMode) | 500 const U32 mls, const ZSTD_dictMode_e dictMode) |
478 { | 501 { |
479 const BYTE* const base = ms->window.base; | 502 const BYTE* const base = ms->window.base; |
480 U32 const target = (U32)(ip - base); | 503 U32 const target = (U32)(ip - base); |
481 U32 idx = ms->nextToUpdate; | 504 U32 idx = ms->nextToUpdate; |
482 DEBUGLOG(5, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", | 505 DEBUGLOG(6, "ZSTD_updateTree_internal, from %u to %u (dictMode:%u)", |
483 idx, target, dictMode); | 506 idx, target, dictMode); |
484 | 507 |
485 while(idx < target) | 508 while(idx < target) |
486 idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); | 509 idx += ZSTD_insertBt1(ms, base+idx, iend, mls, dictMode == ZSTD_extDict); |
487 ms->nextToUpdate = target; | 510 ms->nextToUpdate = target; |
488 } | 511 } |
489 | 512 |
490 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { | 513 void ZSTD_updateTree(ZSTD_matchState_t* ms, const BYTE* ip, const BYTE* iend) { |
491 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.searchLength, ZSTD_noDict); | 514 ZSTD_updateTree_internal(ms, ip, iend, ms->cParams.minMatch, ZSTD_noDict); |
492 } | 515 } |
493 | 516 |
494 FORCE_INLINE_TEMPLATE | 517 FORCE_INLINE_TEMPLATE |
495 U32 ZSTD_insertBtAndGetAllMatches ( | 518 U32 ZSTD_insertBtAndGetAllMatches ( |
496 ZSTD_matchState_t* ms, | 519 ZSTD_matchState_t* ms, |
497 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, | 520 const BYTE* const ip, const BYTE* const iLimit, const ZSTD_dictMode_e dictMode, |
498 U32 rep[ZSTD_REP_NUM], U32 const ll0, | 521 U32 rep[ZSTD_REP_NUM], |
499 ZSTD_match_t* matches, const U32 lengthToBeat, U32 const mls /* template */) | 522 U32 const ll0, /* tells if associated literal length is 0 or not. This value must be 0 or 1 */ |
523 ZSTD_match_t* matches, | |
524 const U32 lengthToBeat, | |
525 U32 const mls /* template */) | |
500 { | 526 { |
501 const ZSTD_compressionParameters* const cParams = &ms->cParams; | 527 const ZSTD_compressionParameters* const cParams = &ms->cParams; |
502 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); | 528 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); |
503 const BYTE* const base = ms->window.base; | 529 const BYTE* const base = ms->window.base; |
504 U32 const current = (U32)(ip-base); | 530 U32 const current = (U32)(ip-base); |
540 | 566 |
541 size_t bestLength = lengthToBeat-1; | 567 size_t bestLength = lengthToBeat-1; |
542 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); | 568 DEBUGLOG(8, "ZSTD_insertBtAndGetAllMatches: current=%u", current); |
543 | 569 |
544 /* check repCode */ | 570 /* check repCode */ |
571 assert(ll0 <= 1); /* necessarily 1 or 0 */ | |
545 { U32 const lastR = ZSTD_REP_NUM + ll0; | 572 { U32 const lastR = ZSTD_REP_NUM + ll0; |
546 U32 repCode; | 573 U32 repCode; |
547 for (repCode = ll0; repCode < lastR; repCode++) { | 574 for (repCode = ll0; repCode < lastR; repCode++) { |
548 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; | 575 U32 const repOffset = (repCode==ZSTD_REP_NUM) ? (rep[0] - 1) : rep[repCode]; |
549 U32 const repIndex = current - repOffset; | 576 U32 const repIndex = current - repOffset; |
722 const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, | 749 const BYTE* ip, const BYTE* const iHighLimit, const ZSTD_dictMode_e dictMode, |
723 U32 rep[ZSTD_REP_NUM], U32 const ll0, | 750 U32 rep[ZSTD_REP_NUM], U32 const ll0, |
724 ZSTD_match_t* matches, U32 const lengthToBeat) | 751 ZSTD_match_t* matches, U32 const lengthToBeat) |
725 { | 752 { |
726 const ZSTD_compressionParameters* const cParams = &ms->cParams; | 753 const ZSTD_compressionParameters* const cParams = &ms->cParams; |
727 U32 const matchLengthSearch = cParams->searchLength; | 754 U32 const matchLengthSearch = cParams->minMatch; |
728 DEBUGLOG(8, "ZSTD_BtGetAllMatches"); | 755 DEBUGLOG(8, "ZSTD_BtGetAllMatches"); |
729 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ | 756 if (ip < ms->window.base + ms->nextToUpdate) return 0; /* skipped area */ |
730 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); | 757 ZSTD_updateTree_internal(ms, ip, iHighLimit, matchLengthSearch, dictMode); |
731 switch(matchLengthSearch) | 758 switch(matchLengthSearch) |
732 { | 759 { |
772 static U32 ZSTD_totalLen(ZSTD_optimal_t sol) | 799 static U32 ZSTD_totalLen(ZSTD_optimal_t sol) |
773 { | 800 { |
774 return sol.litlen + sol.mlen; | 801 return sol.litlen + sol.mlen; |
775 } | 802 } |
776 | 803 |
804 #if 0 /* debug */ | |
805 | |
806 static void | |
807 listStats(const U32* table, int lastEltID) | |
808 { | |
809 int const nbElts = lastEltID + 1; | |
810 int enb; | |
811 for (enb=0; enb < nbElts; enb++) { | |
812 (void)table; | |
813 //RAWLOG(2, "%3i:%3i, ", enb, table[enb]); | |
814 RAWLOG(2, "%4i,", table[enb]); | |
815 } | |
816 RAWLOG(2, " \n"); | |
817 } | |
818 | |
819 #endif | |
820 | |
777 FORCE_INLINE_TEMPLATE size_t | 821 FORCE_INLINE_TEMPLATE size_t |
778 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, | 822 ZSTD_compressBlock_opt_generic(ZSTD_matchState_t* ms, |
779 seqStore_t* seqStore, | 823 seqStore_t* seqStore, |
780 U32 rep[ZSTD_REP_NUM], | 824 U32 rep[ZSTD_REP_NUM], |
781 const void* src, size_t srcSize, | 825 const void* src, size_t srcSize, |
782 const int optLevel, const ZSTD_dictMode_e dictMode) | 826 const int optLevel, |
827 const ZSTD_dictMode_e dictMode) | |
783 { | 828 { |
784 optState_t* const optStatePtr = &ms->opt; | 829 optState_t* const optStatePtr = &ms->opt; |
785 const BYTE* const istart = (const BYTE*)src; | 830 const BYTE* const istart = (const BYTE*)src; |
786 const BYTE* ip = istart; | 831 const BYTE* ip = istart; |
787 const BYTE* anchor = istart; | 832 const BYTE* anchor = istart; |
790 const BYTE* const base = ms->window.base; | 835 const BYTE* const base = ms->window.base; |
791 const BYTE* const prefixStart = base + ms->window.dictLimit; | 836 const BYTE* const prefixStart = base + ms->window.dictLimit; |
792 const ZSTD_compressionParameters* const cParams = &ms->cParams; | 837 const ZSTD_compressionParameters* const cParams = &ms->cParams; |
793 | 838 |
794 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); | 839 U32 const sufficient_len = MIN(cParams->targetLength, ZSTD_OPT_NUM -1); |
795 U32 const minMatch = (cParams->searchLength == 3) ? 3 : 4; | 840 U32 const minMatch = (cParams->minMatch == 3) ? 3 : 4; |
796 | 841 |
797 ZSTD_optimal_t* const opt = optStatePtr->priceTable; | 842 ZSTD_optimal_t* const opt = optStatePtr->priceTable; |
798 ZSTD_match_t* const matches = optStatePtr->matchTable; | 843 ZSTD_match_t* const matches = optStatePtr->matchTable; |
799 ZSTD_optimal_t lastSequence; | 844 ZSTD_optimal_t lastSequence; |
800 | 845 |
801 /* init */ | 846 /* init */ |
802 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic"); | 847 DEBUGLOG(5, "ZSTD_compressBlock_opt_generic: current=%u, prefix=%u, nextToUpdate=%u", |
848 (U32)(ip - base), ms->window.dictLimit, ms->nextToUpdate); | |
803 assert(optLevel <= 2); | 849 assert(optLevel <= 2); |
804 ms->nextToUpdate3 = ms->nextToUpdate; | 850 ms->nextToUpdate3 = ms->nextToUpdate; |
805 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); | 851 ZSTD_rescaleFreqs(optStatePtr, (const BYTE*)src, srcSize, optLevel); |
806 ip += (ip==prefixStart); | 852 ip += (ip==prefixStart); |
807 | 853 |
997 U32 const llen = opt[storePos].litlen; | 1043 U32 const llen = opt[storePos].litlen; |
998 U32 const mlen = opt[storePos].mlen; | 1044 U32 const mlen = opt[storePos].mlen; |
999 U32 const offCode = opt[storePos].off; | 1045 U32 const offCode = opt[storePos].off; |
1000 U32 const advance = llen + mlen; | 1046 U32 const advance = llen + mlen; |
1001 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", | 1047 DEBUGLOG(6, "considering seq starting at %zi, llen=%u, mlen=%u", |
1002 anchor - istart, llen, mlen); | 1048 anchor - istart, (unsigned)llen, (unsigned)mlen); |
1003 | 1049 |
1004 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ | 1050 if (mlen==0) { /* only literals => must be last "sequence", actually starting a new stream of sequences */ |
1005 assert(storePos == storeEnd); /* must be last sequence */ | 1051 assert(storePos == storeEnd); /* must be last sequence */ |
1006 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ | 1052 ip = anchor + llen; /* last "sequence" is a bunch of literals => don't progress anchor */ |
1007 continue; /* will finish */ | 1053 continue; /* will finish */ |
1045 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); | 1091 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 0 /*optLevel*/, ZSTD_noDict); |
1046 } | 1092 } |
1047 | 1093 |
1048 | 1094 |
1049 /* used in 2-pass strategy */ | 1095 /* used in 2-pass strategy */ |
1050 static U32 ZSTD_upscaleStat(U32* table, U32 lastEltIndex, int bonus) | 1096 static U32 ZSTD_upscaleStat(unsigned* table, U32 lastEltIndex, int bonus) |
1051 { | 1097 { |
1052 U32 s, sum=0; | 1098 U32 s, sum=0; |
1053 assert(ZSTD_FREQ_DIV+bonus > 0); | 1099 assert(ZSTD_FREQ_DIV+bonus >= 0); |
1054 for (s=0; s<=lastEltIndex; s++) { | 1100 for (s=0; s<lastEltIndex+1; s++) { |
1055 table[s] <<= ZSTD_FREQ_DIV+bonus; | 1101 table[s] <<= ZSTD_FREQ_DIV+bonus; |
1056 table[s]--; | 1102 table[s]--; |
1057 sum += table[s]; | 1103 sum += table[s]; |
1058 } | 1104 } |
1059 return sum; | 1105 return sum; |
1061 | 1107 |
1062 /* used in 2-pass strategy */ | 1108 /* used in 2-pass strategy */ |
1063 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) | 1109 MEM_STATIC void ZSTD_upscaleStats(optState_t* optPtr) |
1064 { | 1110 { |
1065 optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); | 1111 optPtr->litSum = ZSTD_upscaleStat(optPtr->litFreq, MaxLit, 0); |
1066 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 1); | 1112 optPtr->litLengthSum = ZSTD_upscaleStat(optPtr->litLengthFreq, MaxLL, 0); |
1067 optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 1); | 1113 optPtr->matchLengthSum = ZSTD_upscaleStat(optPtr->matchLengthFreq, MaxML, 0); |
1068 optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 1); | 1114 optPtr->offCodeSum = ZSTD_upscaleStat(optPtr->offCodeFreq, MaxOff, 0); |
1115 } | |
1116 | |
1117 /* ZSTD_initStats_ultra(): | |
1118 * make a first compression pass, just to seed stats with more accurate starting values. | |
1119 * only works on first block, with no dictionary and no ldm. | |
1120 * this function cannot error, hence its constract must be respected. | |
1121 */ | |
1122 static void | |
1123 ZSTD_initStats_ultra(ZSTD_matchState_t* ms, | |
1124 seqStore_t* seqStore, | |
1125 U32 rep[ZSTD_REP_NUM], | |
1126 const void* src, size_t srcSize) | |
1127 { | |
1128 U32 tmpRep[ZSTD_REP_NUM]; /* updated rep codes will sink here */ | |
1129 memcpy(tmpRep, rep, sizeof(tmpRep)); | |
1130 | |
1131 DEBUGLOG(4, "ZSTD_initStats_ultra (srcSize=%zu)", srcSize); | |
1132 assert(ms->opt.litLengthSum == 0); /* first block */ | |
1133 assert(seqStore->sequences == seqStore->sequencesStart); /* no ldm */ | |
1134 assert(ms->window.dictLimit == ms->window.lowLimit); /* no dictionary */ | |
1135 assert(ms->window.dictLimit - ms->nextToUpdate <= 1); /* no prefix (note: intentional overflow, defined as 2-complement) */ | |
1136 | |
1137 ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ | |
1138 | |
1139 /* invalidate first scan from history */ | |
1140 ZSTD_resetSeqStore(seqStore); | |
1141 ms->window.base -= srcSize; | |
1142 ms->window.dictLimit += (U32)srcSize; | |
1143 ms->window.lowLimit = ms->window.dictLimit; | |
1144 ms->nextToUpdate = ms->window.dictLimit; | |
1145 ms->nextToUpdate3 = ms->window.dictLimit; | |
1146 | |
1147 /* re-inforce weight of collected statistics */ | |
1148 ZSTD_upscaleStats(&ms->opt); | |
1069 } | 1149 } |
1070 | 1150 |
1071 size_t ZSTD_compressBlock_btultra( | 1151 size_t ZSTD_compressBlock_btultra( |
1072 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | 1152 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
1073 const void* src, size_t srcSize) | 1153 const void* src, size_t srcSize) |
1074 { | 1154 { |
1075 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); | 1155 DEBUGLOG(5, "ZSTD_compressBlock_btultra (srcSize=%zu)", srcSize); |
1076 #if 0 | 1156 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); |
1077 /* 2-pass strategy (disabled) | 1157 } |
1158 | |
1159 size_t ZSTD_compressBlock_btultra2( | |
1160 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | |
1161 const void* src, size_t srcSize) | |
1162 { | |
1163 U32 const current = (U32)((const BYTE*)src - ms->window.base); | |
1164 DEBUGLOG(5, "ZSTD_compressBlock_btultra2 (srcSize=%zu)", srcSize); | |
1165 | |
1166 /* 2-pass strategy: | |
1078 * this strategy makes a first pass over first block to collect statistics | 1167 * this strategy makes a first pass over first block to collect statistics |
1079 * and seed next round's statistics with it. | 1168 * and seed next round's statistics with it. |
1169 * After 1st pass, function forgets everything, and starts a new block. | |
1170 * Consequently, this can only work if no data has been previously loaded in tables, | |
1171 * aka, no dictionary, no prefix, no ldm preprocessing. | |
1080 * The compression ratio gain is generally small (~0.5% on first block), | 1172 * The compression ratio gain is generally small (~0.5% on first block), |
1081 * the cost is 2x cpu time on first block. */ | 1173 * the cost is 2x cpu time on first block. */ |
1082 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); | 1174 assert(srcSize <= ZSTD_BLOCKSIZE_MAX); |
1083 if ( (ms->opt.litLengthSum==0) /* first block */ | 1175 if ( (ms->opt.litLengthSum==0) /* first block */ |
1084 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ | 1176 && (seqStore->sequences == seqStore->sequencesStart) /* no ldm */ |
1085 && (ms->window.dictLimit == ms->window.lowLimit) ) { /* no dictionary */ | 1177 && (ms->window.dictLimit == ms->window.lowLimit) /* no dictionary */ |
1086 U32 tmpRep[ZSTD_REP_NUM]; | 1178 && (current == ms->window.dictLimit) /* start of frame, nothing already loaded nor skipped */ |
1087 DEBUGLOG(5, "ZSTD_compressBlock_btultra: first block: collecting statistics"); | 1179 && (srcSize > ZSTD_PREDEF_THRESHOLD) |
1088 assert(ms->nextToUpdate >= ms->window.dictLimit | 1180 ) { |
1089 && ms->nextToUpdate <= ms->window.dictLimit + 1); | 1181 ZSTD_initStats_ultra(ms, seqStore, rep, src, srcSize); |
1090 memcpy(tmpRep, rep, sizeof(tmpRep)); | 1182 } |
1091 ZSTD_compressBlock_opt_generic(ms, seqStore, tmpRep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); /* generate stats into ms->opt*/ | 1183 |
1092 ZSTD_resetSeqStore(seqStore); | |
1093 /* invalidate first scan from history */ | |
1094 ms->window.base -= srcSize; | |
1095 ms->window.dictLimit += (U32)srcSize; | |
1096 ms->window.lowLimit = ms->window.dictLimit; | |
1097 ms->nextToUpdate = ms->window.dictLimit; | |
1098 ms->nextToUpdate3 = ms->window.dictLimit; | |
1099 /* re-inforce weight of collected statistics */ | |
1100 ZSTD_upscaleStats(&ms->opt); | |
1101 } | |
1102 #endif | |
1103 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); | 1184 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_noDict); |
1104 } | 1185 } |
1105 | 1186 |
1106 size_t ZSTD_compressBlock_btopt_dictMatchState( | 1187 size_t ZSTD_compressBlock_btopt_dictMatchState( |
1107 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | 1188 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
1128 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], | 1209 ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], |
1129 const void* src, size_t srcSize) | 1210 const void* src, size_t srcSize) |
1130 { | 1211 { |
1131 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); | 1212 return ZSTD_compressBlock_opt_generic(ms, seqStore, rep, src, srcSize, 2 /*optLevel*/, ZSTD_extDict); |
1132 } | 1213 } |
1214 | |
1215 /* note : no btultra2 variant for extDict nor dictMatchState, | |
1216 * because btultra2 is not meant to work with dictionaries | |
1217 * and is only specific for the first block (no prefix) */ |