lz_encoder_mf.c | lz_encoder_mf.c | |||
---|---|---|---|---|
skipping to change at line 16 | skipping to change at line 16 | |||
// Authors: Igor Pavlov | // Authors: Igor Pavlov | |||
// Lasse Collin | // Lasse Collin | |||
// | // | |||
// This file has been put into the public domain. | // This file has been put into the public domain. | |||
// You can do whatever you want with this file. | // You can do whatever you want with this file. | |||
// | // | |||
/////////////////////////////////////////////////////////////////////////// //// | /////////////////////////////////////////////////////////////////////////// //// | |||
#include "lz_encoder.h" | #include "lz_encoder.h" | |||
#include "lz_encoder_hash.h" | #include "lz_encoder_hash.h" | |||
#include "memcmplen.h" | ||||
/// \brief Find matches starting from the current byte | /// \brief Find matches starting from the current byte | |||
/// | /// | |||
/// \return The length of the longest match found | /// \return The length of the longest match found | |||
extern uint32_t | extern uint32_t | |||
lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) | lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) | |||
{ | { | |||
// Call the match finder. It returns the number of length-distance | // Call the match finder. It returns the number of length-distance | |||
// pairs found. | // pairs found. | |||
// FIXME: Minimum count is zero, what _exactly_ is the maximum? | // FIXME: Minimum count is zero, what _exactly_ is the maximum? | |||
skipping to change at line 67 | skipping to change at line 68 | |||
limit = mf->match_len_max; | limit = mf->match_len_max; | |||
// Pointer to the byte we just ran through | // Pointer to the byte we just ran through | |||
// the match finder. | // the match finder. | |||
const uint8_t *p1 = mf_ptr(mf) - 1; | const uint8_t *p1 = mf_ptr(mf) - 1; | |||
// Pointer to the beginning of the match. We need -1 | // Pointer to the beginning of the match. We need -1 | |||
// here because the match distances are zero based. | // here because the match distances are zero based. | |||
const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ; | const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ; | |||
while (len_best < limit | len_best = lzma_memcmplen(p1, p2, len_best, limit); | |||
&& p1[len_best] == p2[len_best]) | ||||
++len_best; | ||||
} | } | |||
} | } | |||
*count_ptr = count; | *count_ptr = count; | |||
// Finally update the read position to indicate that match finder wa s | // Finally update the read position to indicate that match finder wa s | |||
// run for this dictionary offset. | // run for this dictionary offset. | |||
++mf->read_ahead; | ++mf->read_ahead; | |||
return len_best; | return len_best; | |||
skipping to change at line 115 | skipping to change at line 114 | |||
normalize(lzma_mf *mf) | normalize(lzma_mf *mf) | |||
{ | { | |||
assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); | assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); | |||
// In future we may not want to touch the lowest bits, because there | // In future we may not want to touch the lowest bits, because there | |||
// may be match finders that use larger resolution than one byte. | // may be match finders that use larger resolution than one byte. | |||
const uint32_t subvalue | const uint32_t subvalue | |||
= (MUST_NORMALIZE_POS - mf->cyclic_size); | = (MUST_NORMALIZE_POS - mf->cyclic_size); | |||
// & (~(UINT32_C(1) << 10) - 1); | // & (~(UINT32_C(1) << 10) - 1); | |||
const uint32_t count = mf->hash_size_sum + mf->sons_count; | for (uint32_t i = 0; i < mf->hash_count; ++i) { | |||
uint32_t *hash = mf->hash; | ||||
for (uint32_t i = 0; i < count; ++i) { | ||||
// If the distance is greater than the dictionary size, | // If the distance is greater than the dictionary size, | |||
// we can simply mark the hash element as empty. | // we can simply mark the hash element as empty. | |||
if (mf->hash[i] <= subvalue) | ||||
mf->hash[i] = EMPTY_HASH_VALUE; | ||||
else | ||||
mf->hash[i] -= subvalue; | ||||
} | ||||
for (uint32_t i = 0; i < mf->sons_count; ++i) { | ||||
// Do the same for mf->son. | ||||
// | // | |||
// NOTE: Only the first mf->hash_size_sum elements are | // NOTE: There may be uninitialized elements in mf->son. | |||
// initialized for sure. There may be uninitialized elements | // Valgrind may complain that the "if" below depends on | |||
// in mf->son. Since we go through both mf->hash and | // an uninitialized value. In this case it is safe to ignore | |||
// mf->son here in normalization, Valgrind may complain | // the warning. See also the comments in lz_encoder_init() | |||
// that the "if" below depends on uninitialized value. In | // in lz_encoder.c. | |||
// this case it is safe to ignore the warning. See also the | if (mf->son[i] <= subvalue) | |||
// comments in lz_encoder_init() in lz_encoder.c. | mf->son[i] = EMPTY_HASH_VALUE; | |||
if (hash[i] <= subvalue) | ||||
hash[i] = EMPTY_HASH_VALUE; | ||||
else | else | |||
hash[i] -= subvalue; | mf->son[i] -= subvalue; | |||
} | } | |||
// Update offset to match the new locations. | // Update offset to match the new locations. | |||
mf->offset -= subvalue; | mf->offset -= subvalue; | |||
return; | return; | |||
} | } | |||
/// Mark the current byte as processed from point of view of the match find er. | /// Mark the current byte as processed from point of view of the match find er. | |||
static void | static void | |||
skipping to change at line 261 | skipping to change at line 263 | |||
while (true) { | while (true) { | |||
const uint32_t delta = pos - cur_match; | const uint32_t delta = pos - cur_match; | |||
if (depth-- == 0 || delta >= cyclic_size) | if (depth-- == 0 || delta >= cyclic_size) | |||
return matches; | return matches; | |||
const uint8_t *const pb = cur - delta; | const uint8_t *const pb = cur - delta; | |||
cur_match = son[cyclic_pos - delta | cur_match = son[cyclic_pos - delta | |||
+ (delta > cyclic_pos ? cyclic_size : 0)]; | + (delta > cyclic_pos ? cyclic_size : 0)]; | |||
if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { | if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { | |||
uint32_t len = 0; | uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit) | |||
while (++len != len_limit) | ; | |||
if (pb[len] != cur[len]) | ||||
break; | ||||
if (len_best < len) { | if (len_best < len) { | |||
len_best = len; | len_best = len; | |||
matches->len = len; | matches->len = len; | |||
matches->dist = delta - 1; | matches->dist = delta - 1; | |||
++matches; | ++matches; | |||
if (len == len_limit) | if (len == len_limit) | |||
return matches; | return matches; | |||
} | } | |||
skipping to change at line 307 | skipping to change at line 306 | |||
const uint32_t delta2 = pos - mf->hash[hash_2_value]; | const uint32_t delta2 = pos - mf->hash[hash_2_value]; | |||
const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | |||
mf->hash[hash_2_value] = pos; | mf->hash[hash_2_value] = pos; | |||
mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | |||
uint32_t len_best = 2; | uint32_t len_best = 2; | |||
if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | |||
for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen(cur - delta2, cur, | |||
if (*(cur + len_best - delta2) != cur[len_best]) | len_best, len_limit); | |||
break; | ||||
matches[0].len = len_best; | matches[0].len = len_best; | |||
matches[0].dist = delta2 - 1; | matches[0].dist = delta2 - 1; | |||
matches_count = 1; | matches_count = 1; | |||
if (len_best == len_limit) { | if (len_best == len_limit) { | |||
hc_skip(); | hc_skip(); | |||
return 1; // matches_count | return 1; // matches_count | |||
} | } | |||
} | } | |||
skipping to change at line 384 | skipping to change at line 382 | |||
} | } | |||
if (delta2 != delta3 && delta3 < mf->cyclic_size | if (delta2 != delta3 && delta3 < mf->cyclic_size | |||
&& *(cur - delta3) == *cur) { | && *(cur - delta3) == *cur) { | |||
len_best = 3; | len_best = 3; | |||
matches[matches_count++].dist = delta3 - 1; | matches[matches_count++].dist = delta3 - 1; | |||
delta2 = delta3; | delta2 = delta3; | |||
} | } | |||
if (matches_count != 0) { | if (matches_count != 0) { | |||
for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen(cur - delta2, cur, | |||
if (*(cur + len_best - delta2) != cur[len_best]) | len_best, len_limit); | |||
break; | ||||
matches[matches_count - 1].len = len_best; | matches[matches_count - 1].len = len_best; | |||
if (len_best == len_limit) { | if (len_best == len_limit) { | |||
hc_skip(); | hc_skip(); | |||
return matches_count; | return matches_count; | |||
} | } | |||
} | } | |||
if (len_best < 3) | if (len_best < 3) | |||
skipping to change at line 469 | skipping to change at line 466 | |||
} | } | |||
uint32_t *const pair = son + ((cyclic_pos - delta | uint32_t *const pair = son + ((cyclic_pos - delta | |||
+ (delta > cyclic_pos ? cyclic_size : 0)) | + (delta > cyclic_pos ? cyclic_size : 0)) | |||
<< 1); | << 1); | |||
const uint8_t *const pb = cur - delta; | const uint8_t *const pb = cur - delta; | |||
uint32_t len = my_min(len0, len1); | uint32_t len = my_min(len0, len1); | |||
if (pb[len] == cur[len]) { | if (pb[len] == cur[len]) { | |||
while (++len != len_limit) | len = lzma_memcmplen(pb, cur, len + 1, len_limit); | |||
if (pb[len] != cur[len]) | ||||
break; | ||||
if (len_best < len) { | if (len_best < len) { | |||
len_best = len; | len_best = len; | |||
matches->len = len; | matches->len = len; | |||
matches->dist = delta - 1; | matches->dist = delta - 1; | |||
++matches; | ++matches; | |||
if (len == len_limit) { | if (len == len_limit) { | |||
*ptr1 = pair[0]; | *ptr1 = pair[0]; | |||
*ptr0 = pair[1]; | *ptr0 = pair[1]; | |||
skipping to change at line 533 | skipping to change at line 528 | |||
return; | return; | |||
} | } | |||
uint32_t *pair = son + ((cyclic_pos - delta | uint32_t *pair = son + ((cyclic_pos - delta | |||
+ (delta > cyclic_pos ? cyclic_size : 0)) | + (delta > cyclic_pos ? cyclic_size : 0)) | |||
<< 1); | << 1); | |||
const uint8_t *pb = cur - delta; | const uint8_t *pb = cur - delta; | |||
uint32_t len = my_min(len0, len1); | uint32_t len = my_min(len0, len1); | |||
if (pb[len] == cur[len]) { | if (pb[len] == cur[len]) { | |||
while (++len != len_limit) | len = lzma_memcmplen(pb, cur, len + 1, len_limit); | |||
if (pb[len] != cur[len]) | ||||
break; | ||||
if (len == len_limit) { | if (len == len_limit) { | |||
*ptr1 = pair[0]; | *ptr1 = pair[0]; | |||
*ptr0 = pair[1]; | *ptr0 = pair[1]; | |||
return; | return; | |||
} | } | |||
} | } | |||
if (pb[len] < cur[len]) { | if (pb[len] < cur[len]) { | |||
*ptr1 = cur_match; | *ptr1 = cur_match; | |||
skipping to change at line 619 | skipping to change at line 612 | |||
const uint32_t delta2 = pos - mf->hash[hash_2_value]; | const uint32_t delta2 = pos - mf->hash[hash_2_value]; | |||
const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; | |||
mf->hash[hash_2_value] = pos; | mf->hash[hash_2_value] = pos; | |||
mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; | |||
uint32_t len_best = 2; | uint32_t len_best = 2; | |||
if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { | |||
for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen( | |||
if (*(cur + len_best - delta2) != cur[len_best]) | cur, cur - delta2, len_best, len_limit); | |||
break; | ||||
matches[0].len = len_best; | matches[0].len = len_best; | |||
matches[0].dist = delta2 - 1; | matches[0].dist = delta2 - 1; | |||
matches_count = 1; | matches_count = 1; | |||
if (len_best == len_limit) { | if (len_best == len_limit) { | |||
bt_skip(); | bt_skip(); | |||
return 1; // matches_count | return 1; // matches_count | |||
} | } | |||
} | } | |||
skipping to change at line 690 | skipping to change at line 682 | |||
} | } | |||
if (delta2 != delta3 && delta3 < mf->cyclic_size | if (delta2 != delta3 && delta3 < mf->cyclic_size | |||
&& *(cur - delta3) == *cur) { | && *(cur - delta3) == *cur) { | |||
len_best = 3; | len_best = 3; | |||
matches[matches_count++].dist = delta3 - 1; | matches[matches_count++].dist = delta3 - 1; | |||
delta2 = delta3; | delta2 = delta3; | |||
} | } | |||
if (matches_count != 0) { | if (matches_count != 0) { | |||
for ( ; len_best != len_limit; ++len_best) | len_best = lzma_memcmplen( | |||
if (*(cur + len_best - delta2) != cur[len_best]) | cur, cur - delta2, len_best, len_limit); | |||
break; | ||||
matches[matches_count - 1].len = len_best; | matches[matches_count - 1].len = len_best; | |||
if (len_best == len_limit) { | if (len_best == len_limit) { | |||
bt_skip(); | bt_skip(); | |||
return matches_count; | return matches_count; | |||
} | } | |||
} | } | |||
if (len_best < 3) | if (len_best < 3) | |||
End of changes. 13 change blocks. | ||||
39 lines changed or deleted | 31 lines changed or added | |||
This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/ |