lz_encoder_mf.c   lz_encoder_mf.c 
skipping to change at line 16 skipping to change at line 16
// Authors: Igor Pavlov // Authors: Igor Pavlov
// Lasse Collin // Lasse Collin
// //
// This file has been put into the public domain. // This file has been put into the public domain.
// You can do whatever you want with this file. // You can do whatever you want with this file.
// //
/////////////////////////////////////////////////////////////////////////// //// /////////////////////////////////////////////////////////////////////////// ////
#include "lz_encoder.h" #include "lz_encoder.h"
#include "lz_encoder_hash.h" #include "lz_encoder_hash.h"
#include "memcmplen.h"
/// \brief Find matches starting from the current byte /// \brief Find matches starting from the current byte
/// ///
/// \return The length of the longest match found /// \return The length of the longest match found
extern uint32_t extern uint32_t
lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches) lzma_mf_find(lzma_mf *mf, uint32_t *count_ptr, lzma_match *matches)
{ {
// Call the match finder. It returns the number of length-distance // Call the match finder. It returns the number of length-distance
// pairs found. // pairs found.
// FIXME: Minimum count is zero, what _exactly_ is the maximum? // FIXME: Minimum count is zero, what _exactly_ is the maximum?
skipping to change at line 67 skipping to change at line 68
limit = mf->match_len_max; limit = mf->match_len_max;
// Pointer to the byte we just ran through // Pointer to the byte we just ran through
// the match finder. // the match finder.
const uint8_t *p1 = mf_ptr(mf) - 1; const uint8_t *p1 = mf_ptr(mf) - 1;
// Pointer to the beginning of the match. We need -1 // Pointer to the beginning of the match. We need -1
// here because the match distances are zero based. // here because the match distances are zero based.
const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ; const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ;
while (len_best < limit len_best = lzma_memcmplen(p1, p2, len_best, limit);
&& p1[len_best] == p2[len_best])
++len_best;
} }
} }
*count_ptr = count; *count_ptr = count;
// Finally update the read position to indicate that match finder wa s // Finally update the read position to indicate that match finder wa s
// run for this dictionary offset. // run for this dictionary offset.
++mf->read_ahead; ++mf->read_ahead;
return len_best; return len_best;
skipping to change at line 115 skipping to change at line 114
normalize(lzma_mf *mf) normalize(lzma_mf *mf)
{ {
assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS); assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS);
// In future we may not want to touch the lowest bits, because there // In future we may not want to touch the lowest bits, because there
// may be match finders that use larger resolution than one byte. // may be match finders that use larger resolution than one byte.
const uint32_t subvalue const uint32_t subvalue
= (MUST_NORMALIZE_POS - mf->cyclic_size); = (MUST_NORMALIZE_POS - mf->cyclic_size);
// & (~(UINT32_C(1) << 10) - 1); // & (~(UINT32_C(1) << 10) - 1);
const uint32_t count = mf->hash_size_sum + mf->sons_count; for (uint32_t i = 0; i < mf->hash_count; ++i) {
uint32_t *hash = mf->hash;
for (uint32_t i = 0; i < count; ++i) {
// If the distance is greater than the dictionary size, // If the distance is greater than the dictionary size,
// we can simply mark the hash element as empty. // we can simply mark the hash element as empty.
if (mf->hash[i] <= subvalue)
mf->hash[i] = EMPTY_HASH_VALUE;
else
mf->hash[i] -= subvalue;
}
for (uint32_t i = 0; i < mf->sons_count; ++i) {
// Do the same for mf->son.
// //
// NOTE: Only the first mf->hash_size_sum elements are // NOTE: There may be uninitialized elements in mf->son.
// initialized for sure. There may be uninitialized elements // Valgrind may complain that the "if" below depends on
// in mf->son. Since we go through both mf->hash and // an uninitialized value. In this case it is safe to ignore
// mf->son here in normalization, Valgrind may complain // the warning. See also the comments in lz_encoder_init()
// that the "if" below depends on uninitialized value. In // in lz_encoder.c.
// this case it is safe to ignore the warning. See also the if (mf->son[i] <= subvalue)
// comments in lz_encoder_init() in lz_encoder.c. mf->son[i] = EMPTY_HASH_VALUE;
if (hash[i] <= subvalue)
hash[i] = EMPTY_HASH_VALUE;
else else
hash[i] -= subvalue; mf->son[i] -= subvalue;
} }
// Update offset to match the new locations. // Update offset to match the new locations.
mf->offset -= subvalue; mf->offset -= subvalue;
return; return;
} }
/// Mark the current byte as processed from point of view of the match find er. /// Mark the current byte as processed from point of view of the match find er.
static void static void
skipping to change at line 261 skipping to change at line 263
while (true) { while (true) {
const uint32_t delta = pos - cur_match; const uint32_t delta = pos - cur_match;
if (depth-- == 0 || delta >= cyclic_size) if (depth-- == 0 || delta >= cyclic_size)
return matches; return matches;
const uint8_t *const pb = cur - delta; const uint8_t *const pb = cur - delta;
cur_match = son[cyclic_pos - delta cur_match = son[cyclic_pos - delta
+ (delta > cyclic_pos ? cyclic_size : 0)]; + (delta > cyclic_pos ? cyclic_size : 0)];
if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) { if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {
uint32_t len = 0; uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit)
while (++len != len_limit) ;
if (pb[len] != cur[len])
break;
if (len_best < len) { if (len_best < len) {
len_best = len; len_best = len;
matches->len = len; matches->len = len;
matches->dist = delta - 1; matches->dist = delta - 1;
++matches; ++matches;
if (len == len_limit) if (len == len_limit)
return matches; return matches;
} }
skipping to change at line 307 skipping to change at line 306
const uint32_t delta2 = pos - mf->hash[hash_2_value]; const uint32_t delta2 = pos - mf->hash[hash_2_value];
const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];
mf->hash[hash_2_value] = pos; mf->hash[hash_2_value] = pos;
mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
uint32_t len_best = 2; uint32_t len_best = 2;
if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
for ( ; len_best != len_limit; ++len_best) len_best = lzma_memcmplen(cur - delta2, cur,
if (*(cur + len_best - delta2) != cur[len_best]) len_best, len_limit);
break;
matches[0].len = len_best; matches[0].len = len_best;
matches[0].dist = delta2 - 1; matches[0].dist = delta2 - 1;
matches_count = 1; matches_count = 1;
if (len_best == len_limit) { if (len_best == len_limit) {
hc_skip(); hc_skip();
return 1; // matches_count return 1; // matches_count
} }
} }
skipping to change at line 384 skipping to change at line 382
} }
if (delta2 != delta3 && delta3 < mf->cyclic_size if (delta2 != delta3 && delta3 < mf->cyclic_size
&& *(cur - delta3) == *cur) { && *(cur - delta3) == *cur) {
len_best = 3; len_best = 3;
matches[matches_count++].dist = delta3 - 1; matches[matches_count++].dist = delta3 - 1;
delta2 = delta3; delta2 = delta3;
} }
if (matches_count != 0) { if (matches_count != 0) {
for ( ; len_best != len_limit; ++len_best) len_best = lzma_memcmplen(cur - delta2, cur,
if (*(cur + len_best - delta2) != cur[len_best]) len_best, len_limit);
break;
matches[matches_count - 1].len = len_best; matches[matches_count - 1].len = len_best;
if (len_best == len_limit) { if (len_best == len_limit) {
hc_skip(); hc_skip();
return matches_count; return matches_count;
} }
} }
if (len_best < 3) if (len_best < 3)
skipping to change at line 469 skipping to change at line 466
} }
uint32_t *const pair = son + ((cyclic_pos - delta uint32_t *const pair = son + ((cyclic_pos - delta
+ (delta > cyclic_pos ? cyclic_size : 0)) + (delta > cyclic_pos ? cyclic_size : 0))
<< 1); << 1);
const uint8_t *const pb = cur - delta; const uint8_t *const pb = cur - delta;
uint32_t len = my_min(len0, len1); uint32_t len = my_min(len0, len1);
if (pb[len] == cur[len]) { if (pb[len] == cur[len]) {
while (++len != len_limit) len = lzma_memcmplen(pb, cur, len + 1, len_limit);
if (pb[len] != cur[len])
break;
if (len_best < len) { if (len_best < len) {
len_best = len; len_best = len;
matches->len = len; matches->len = len;
matches->dist = delta - 1; matches->dist = delta - 1;
++matches; ++matches;
if (len == len_limit) { if (len == len_limit) {
*ptr1 = pair[0]; *ptr1 = pair[0];
*ptr0 = pair[1]; *ptr0 = pair[1];
skipping to change at line 533 skipping to change at line 528
return; return;
} }
uint32_t *pair = son + ((cyclic_pos - delta uint32_t *pair = son + ((cyclic_pos - delta
+ (delta > cyclic_pos ? cyclic_size : 0)) + (delta > cyclic_pos ? cyclic_size : 0))
<< 1); << 1);
const uint8_t *pb = cur - delta; const uint8_t *pb = cur - delta;
uint32_t len = my_min(len0, len1); uint32_t len = my_min(len0, len1);
if (pb[len] == cur[len]) { if (pb[len] == cur[len]) {
while (++len != len_limit) len = lzma_memcmplen(pb, cur, len + 1, len_limit);
if (pb[len] != cur[len])
break;
if (len == len_limit) { if (len == len_limit) {
*ptr1 = pair[0]; *ptr1 = pair[0];
*ptr0 = pair[1]; *ptr0 = pair[1];
return; return;
} }
} }
if (pb[len] < cur[len]) { if (pb[len] < cur[len]) {
*ptr1 = cur_match; *ptr1 = cur_match;
skipping to change at line 619 skipping to change at line 612
const uint32_t delta2 = pos - mf->hash[hash_2_value]; const uint32_t delta2 = pos - mf->hash[hash_2_value];
const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value]; const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];
mf->hash[hash_2_value] = pos; mf->hash[hash_2_value] = pos;
mf->hash[FIX_3_HASH_SIZE + hash_value] = pos; mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;
uint32_t len_best = 2; uint32_t len_best = 2;
if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) { if (delta2 < mf->cyclic_size && *(cur - delta2) == *cur) {
for ( ; len_best != len_limit; ++len_best) len_best = lzma_memcmplen(
if (*(cur + len_best - delta2) != cur[len_best]) cur, cur - delta2, len_best, len_limit);
break;
matches[0].len = len_best; matches[0].len = len_best;
matches[0].dist = delta2 - 1; matches[0].dist = delta2 - 1;
matches_count = 1; matches_count = 1;
if (len_best == len_limit) { if (len_best == len_limit) {
bt_skip(); bt_skip();
return 1; // matches_count return 1; // matches_count
} }
} }
skipping to change at line 690 skipping to change at line 682
} }
if (delta2 != delta3 && delta3 < mf->cyclic_size if (delta2 != delta3 && delta3 < mf->cyclic_size
&& *(cur - delta3) == *cur) { && *(cur - delta3) == *cur) {
len_best = 3; len_best = 3;
matches[matches_count++].dist = delta3 - 1; matches[matches_count++].dist = delta3 - 1;
delta2 = delta3; delta2 = delta3;
} }
if (matches_count != 0) { if (matches_count != 0) {
for ( ; len_best != len_limit; ++len_best) len_best = lzma_memcmplen(
if (*(cur + len_best - delta2) != cur[len_best]) cur, cur - delta2, len_best, len_limit);
break;
matches[matches_count - 1].len = len_best; matches[matches_count - 1].len = len_best;
if (len_best == len_limit) { if (len_best == len_limit) {
bt_skip(); bt_skip();
return matches_count; return matches_count;
} }
} }
if (len_best < 3) if (len_best < 3)
 End of changes. 13 change blocks. 
39 lines changed or deleted 31 lines changed or added

This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/