| lz_encoder_mf.c | lz_encoder_mf.c | |||
|---|---|---|---|---|
| skipping to change at line 16 | skipping to change at line 16 | |||
| // Authors: Igor Pavlov | // Authors: Igor Pavlov | |||
| // Lasse Collin | // Lasse Collin | |||
| // | // | |||
| // This file has been put into the public domain. | // This file has been put into the public domain. | |||
| // You can do whatever you want with this file. | // You can do whatever you want with this file. | |||
| // | // | |||
| /////////////////////////////////////////////////////////////////////////// //// | /////////////////////////////////////////////////////////////////////////// //// | |||
| #include "lz_encoder.h" | #include "lz_encoder.h" | |||
| #include "lz_encoder_hash.h" | #include "lz_encoder_hash.h" | |||
| #include "memcmplen.h" | ||||
| /// \brief Find matches starting from the current byte | /// \brief Find matches starting from the current byte | |||
| /// | /// | |||
| /// \return The length of the longest match found | /// \return The length of the longest match found | |||
| extern uint32_t | extern uint32_t | |||
| lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) | lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) | |||
| { | { | |||
| // Call the match finder. It returns the number of length-distance | // Call the match finder. It returns the number of length-distance | |||
| // pairs found. | // pairs found. | |||
| // FIXME: Minimum count is zero, what _exactly_ is the maximum? | // FIXME: Minimum count is zero, what _exactly_ is the maximum? | |||
| skipping to change at line 67 | skipping to change at line 68 | |||
| limit = mf->match_len_max; | limit = mf->match_len_max; | |||
| // Pointer to the byte we just ran through | // Pointer to the byte we just ran through | |||
| // the match finder. | // the match finder. | |||
| const uint8_t *p1 = mf_ptr(mf) - 1; | const uint8_t *p1 = mf_ptr(mf) - 1; | |||
| // Pointer to the beginning of the match. We need -1 | // Pointer to the beginning of the match. We need -1 | |||
| // here because the match distances are zero based. | // here because the match distances are zero based. | |||
| const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ; | const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ; | |||
| while (len_best < limit | len_best = lzma_memcmplen(p1, p2, len_best, limit); | |||
| && p1[len_best] == p2[len_best]) | ||||
| ++len_best; | ||||
| } | } | |||
| } | } | |||
| *count_ptr = count; | *count_ptr = count; | |||
| // Finally update the read position to indicate that match finder wa s | // Finally update the read position to indicate that match finder wa s | |||
| // run for this dictionary offset. | // run for this dictionary offset. | |||
| ++mf->read_ahead; | ++mf->read_ahead; | |||
| return len_best; | return len_best; | |||
| skipping to change at line 115 | skipping to change at line 114 | |||
| normalize(lzma_mf *mf) | normalize(lzma_mf *mf) | |||
| { | { | |||
| assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); | assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); | |||
| // In future we may not want to touch the lowest bits, because there | // In future we may not want to touch the lowest bits, because there | |||
| // may be match finders that use larger resolution than one byte. | // may be match finders that use larger resolution than one byte. | |||
| const uint32_t subvalue | const uint32_t subvalue | |||
| = (MUST_NORMALIZE_POS - mf->cyclic_size); | = (MUST_NORMALIZE_POS - mf->cyclic_size); | |||
| // & (~(UINT32_C(1) << 10) - 1); | // & (~(UINT32_C(1) << 10) - 1); | |||
| const uint32_t count = mf->hash_size_sum + mf->sons_count; | for (uint32_t i = 0; i < mf->hash_count; ++i) { | |||
| uint32_t *hash = mf->hash; | ||||
| for (uint32_t i = 0; i < count; ++i) { | ||||
| // If the distance is greater than the dictionary size, | // If the distance is greater than the dictionary size, | |||
| // we can simply mark the hash element as empty. | // we can simply mark the hash element as empty. | |||
| if (mf->hash[i] <= subvalue) | ||||
| mf->hash[i] = EMPTY_HASH_VALUE; | ||||
| else | ||||
| mf->hash[i] -= subvalue; | ||||
| } | ||||
| for (uint32_t i = 0; i < mf->sons_count; ++i) { | ||||
| // Do the same for mf->son. | ||||
| // | // | |||
| // NOTE: Only the first mf->hash_size_sum elements are | // NOTE: There may be uninitialized elements in mf->son. | |||
| // initialized for sure. There may be uninitialized elements | // Valgrind may complain that the "if" below depends on | |||
| // in mf->son. Since we go through both mf->hash and | // an uninitialized value. In this case it is safe to ignore | |||
| // mf->son here in normalization, Valgrind may complain | // the warning. See also the comments in lz_encoder_init() | |||
| // that the "if" below depends on uninitialized value. In | // in lz_encoder.c. | |||
| // this case it is safe to ignore the warning. See also the | if (mf->son[i] <= subvalue) | |||
| // comments in lz_encoder_init() in lz_encoder.c. | mf->son[i] = EMPTY_HASH_VALUE; | |||
| if (hash[i] <= subvalue) | ||||
| hash[i] = EMPTY_HASH_VALUE; | ||||
| else | else | |||
| hash[i] -= subvalue; | mf->son[i] -= subvalue; | |||
| } | } | |||
| // Update offset to match the new locations. | // Update offset to match the new locations. | |||
| mf->offset -= subvalue; | mf->offset -= subvalue; | |||
| return; | return; | |||
| } | } | |||
| /// Mark the current byte as processed from point of view of the match find er. | /// Mark the current byte as processed from point of view of the match find er. | |||
| static void | static void | |||
| skipping to change at line 261 | skipping to change at line 263 | |||
| while (true) { | while (true) { | |||
| const uint32_t delta = pos - cur_match; | const uint32_t delta = pos - cur_match; | |||
| if (depth-- == 0 || delta >= cyclic_size) | if (depth-- == 0 || delta >= cyclic_size) | |||
| return matches; | return matches; | |||
| const uint8_t *const pb = cur - delta; | const uint8_t *const pb = cur - delta; | |||
| cur_match = son[cyclic_pos - delta | cur_match = son[cyclic_pos - delta | |||
| + (delta > cyclic_pos ? cyclic_size : 0)]; | + (delta > cyclic_pos ? cyclic_size : 0)]; | |||
| if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { | if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { | |||
| uint32_t len = 0; | uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit) | |||
| while (++len != len_limit) | ; | |||
| if (pb[len] != cur[len]) | ||||
| break; | ||||
| if (len_best < len) { | if (len_best < len) { | |||
| len_best = len; | len_best = len; | |||
| matches->len = len; | matches->len = len; | |||
| matches->dist = delta - 1; | matches->dist = delta - 1; | |||
| ++matches; | ++matches; | |||
| if (len == len_limit) | if (len == len_limit) | |||
| return matches; | return matches; | |||
| } | } | |||
| skipping to change at line 307 | skipping to change at line 306 | |||
| const uint32_t delta2 = pos - mf->hash[hash_2_value]; | const uint32_t delta2 = pos - mf->hash[hash_2_value]; | |||
| const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | |||
| mf->hash[hash_2_value] = pos; | mf->hash[hash_2_value] = pos; | |||
| mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | |||
| uint32_t len_best = 2; | uint32_t len_best = 2; | |||
| if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | |||
| for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen(cur - delta2, cur, | |||
| if (*(cur + len_best - delta2) != cur[len_best]) | len_best, len_limit); | |||
| break; | ||||
| matches[0].len = len_best; | matches[0].len = len_best; | |||
| matches[0].dist = delta2 - 1; | matches[0].dist = delta2 - 1; | |||
| matches_count = 1; | matches_count = 1; | |||
| if (len_best == len_limit) { | if (len_best == len_limit) { | |||
| hc_skip(); | hc_skip(); | |||
| return 1; // matches_count | return 1; // matches_count | |||
| } | } | |||
| } | } | |||
| skipping to change at line 384 | skipping to change at line 382 | |||
| } | } | |||
| if (delta2 != delta3 && delta3 < mf->cyclic_size | if (delta2 != delta3 && delta3 < mf->cyclic_size | |||
| && *(cur - delta3) == *cur) { | && *(cur - delta3) == *cur) { | |||
| len_best = 3; | len_best = 3; | |||
| matches[matches_count++].dist = delta3 - 1; | matches[matches_count++].dist = delta3 - 1; | |||
| delta2 = delta3; | delta2 = delta3; | |||
| } | } | |||
| if (matches_count != 0) { | if (matches_count != 0) { | |||
| for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen(cur - delta2, cur, | |||
| if (*(cur + len_best - delta2) != cur[len_best]) | len_best, len_limit); | |||
| break; | ||||
| matches[matches_count - 1].len = len_best; | matches[matches_count - 1].len = len_best; | |||
| if (len_best == len_limit) { | if (len_best == len_limit) { | |||
| hc_skip(); | hc_skip(); | |||
| return matches_count; | return matches_count; | |||
| } | } | |||
| } | } | |||
| if (len_best < 3) | if (len_best < 3) | |||
| skipping to change at line 469 | skipping to change at line 466 | |||
| } | } | |||
| uint32_t *const pair = son + ((cyclic_pos - delta | uint32_t *const pair = son + ((cyclic_pos - delta | |||
| + (delta > cyclic_pos ? cyclic_size : 0)) | + (delta > cyclic_pos ? cyclic_size : 0)) | |||
| << 1); | << 1); | |||
| const uint8_t *const pb = cur - delta; | const uint8_t *const pb = cur - delta; | |||
| uint32_t len = my_min(len0, len1); | uint32_t len = my_min(len0, len1); | |||
| if (pb[len] == cur[len]) { | if (pb[len] == cur[len]) { | |||
| while (++len != len_limit) | len = lzma_memcmplen(pb, cur, len + 1, len_limit); | |||
| if (pb[len] != cur[len]) | ||||
| break; | ||||
| if (len_best < len) { | if (len_best < len) { | |||
| len_best = len; | len_best = len; | |||
| matches->len = len; | matches->len = len; | |||
| matches->dist = delta - 1; | matches->dist = delta - 1; | |||
| ++matches; | ++matches; | |||
| if (len == len_limit) { | if (len == len_limit) { | |||
| *ptr1 = pair[0]; | *ptr1 = pair[0]; | |||
| *ptr0 = pair[1]; | *ptr0 = pair[1]; | |||
| skipping to change at line 533 | skipping to change at line 528 | |||
| return; | return; | |||
| } | } | |||
| uint32_t *pair = son + ((cyclic_pos - delta | uint32_t *pair = son + ((cyclic_pos - delta | |||
| + (delta > cyclic_pos ? cyclic_size : 0)) | + (delta > cyclic_pos ? cyclic_size : 0)) | |||
| << 1); | << 1); | |||
| const uint8_t *pb = cur - delta; | const uint8_t *pb = cur - delta; | |||
| uint32_t len = my_min(len0, len1); | uint32_t len = my_min(len0, len1); | |||
| if (pb[len] == cur[len]) { | if (pb[len] == cur[len]) { | |||
| while (++len != len_limit) | len = lzma_memcmplen(pb, cur, len + 1, len_limit); | |||
| if (pb[len] != cur[len]) | ||||
| break; | ||||
| if (len == len_limit) { | if (len == len_limit) { | |||
| *ptr1 = pair[0]; | *ptr1 = pair[0]; | |||
| *ptr0 = pair[1]; | *ptr0 = pair[1]; | |||
| return; | return; | |||
| } | } | |||
| } | } | |||
| if (pb[len] < cur[len]) { | if (pb[len] < cur[len]) { | |||
| *ptr1 = cur_match; | *ptr1 = cur_match; | |||
| skipping to change at line 619 | skipping to change at line 612 | |||
| const uint32_t delta2 = pos - mf->hash[hash_2_value]; | const uint32_t delta2 = pos - mf->hash[hash_2_value]; | |||
| const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | |||
| mf->hash[hash_2_value] = pos; | mf->hash[hash_2_value] = pos; | |||
| mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | |||
| uint32_t len_best = 2; | uint32_t len_best = 2; | |||
| if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | |||
| for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen( | |||
| if (*(cur + len_best - delta2) != cur[len_best]) | cur, cur - delta2, len_best, len_limit); | |||
| break; | ||||
| matches[0].len = len_best; | matches[0].len = len_best; | |||
| matches[0].dist = delta2 - 1; | matches[0].dist = delta2 - 1; | |||
| matches_count = 1; | matches_count = 1; | |||
| if (len_best == len_limit) { | if (len_best == len_limit) { | |||
| bt_skip(); | bt_skip(); | |||
| return 1; // matches_count | return 1; // matches_count | |||
| } | } | |||
| } | } | |||
| skipping to change at line 690 | skipping to change at line 682 | |||
| } | } | |||
| if (delta2 != delta3 && delta3 < mf->cyclic_size | if (delta2 != delta3 && delta3 < mf->cyclic_size | |||
| && *(cur - delta3) == *cur) { | && *(cur - delta3) == *cur) { | |||
| len_best = 3; | len_best = 3; | |||
| matches[matches_count++].dist = delta3 - 1; | matches[matches_count++].dist = delta3 - 1; | |||
| delta2 = delta3; | delta2 = delta3; | |||
| } | } | |||
| if (matches_count != 0) { | if (matches_count != 0) { | |||
| for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen( | |||
| if (*(cur + len_best - delta2) != cur[len_best]) | cur, cur - delta2, len_best, len_limit); | |||
| break; | ||||
| matches[matches_count - 1].len = len_best; | matches[matches_count - 1].len = len_best; | |||
| if (len_best == len_limit) { | if (len_best == len_limit) { | |||
| bt_skip(); | bt_skip(); | |||
| return matches_count; | return matches_count; | |||
| } | } | |||
| } | } | |||
| if (len_best < 3) | if (len_best < 3) | |||
| End of changes. 13 change blocks. | ||||
| 39 lines changed or deleted | 31 lines changed or added | |||
This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ | ||||