Diff: lz_encoder_mf.c - lz_encoder

	lz_encoder_mf.c	lz_encoder_mf.c

	skipping to change at line 16	skipping to change at line 16
	// Authors: Igor Pavlov	// Authors: Igor Pavlov
	// Lasse Collin	// Lasse Collin
	//	//
	// This file has been put into the public domain.	// This file has been put into the public domain.
	// You can do whatever you want with this file.	// You can do whatever you want with this file.
	//	//
	/////////////////////////////////////////////////////////////////////////// ////	/////////////////////////////////////////////////////////////////////////// ////

	#include "lz_encoder.h"	#include "lz_encoder.h"
	#include "lz_encoder_hash.h"	#include "lz_encoder_hash.h"

		#include "memcmplen.h"

	/// \brief Find matches starting from the current byte	/// \brief Find matches starting from the current byte
	///	///
	/// \return The length of the longest match found	/// \return The length of the longest match found
	extern uint32_t	extern uint32_t
	lzma_mf_find(lzma_mf mf, uint32_t count_ptr, lzma_match *matches)	lzma_mf_find(lzma_mf mf, uint32_t count_ptr, lzma_match *matches)
	{	{
	// Call the match finder. It returns the number of length-distance	// Call the match finder. It returns the number of length-distance
	// pairs found.	// pairs found.
	// FIXME: Minimum count is zero, what _exactly_ is the maximum?	// FIXME: Minimum count is zero, what _exactly_ is the maximum?

	skipping to change at line 67	skipping to change at line 68
	limit = mf->match_len_max;	limit = mf->match_len_max;

	// Pointer to the byte we just ran through	// Pointer to the byte we just ran through
	// the match finder.	// the match finder.
	const uint8_t *p1 = mf_ptr(mf) - 1;	const uint8_t *p1 = mf_ptr(mf) - 1;

	// Pointer to the beginning of the match. We need -1	// Pointer to the beginning of the match. We need -1
	// here because the match distances are zero based.	// here because the match distances are zero based.
	const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ;	const uint8_t *p2 = p1 - matches[count - 1].dist - 1 ;


	while (len_best < limit	len_best = lzma_memcmplen(p1, p2, len_best, limit);
	&& p1[len_best] == p2[len_best])
	++len_best;
	}	}
	}	}

	*count_ptr = count;	*count_ptr = count;

	// Finally update the read position to indicate that match finder wa s	// Finally update the read position to indicate that match finder wa s
	// run for this dictionary offset.	// run for this dictionary offset.
	++mf->read_ahead;	++mf->read_ahead;

	return len_best;	return len_best;

	skipping to change at line 115	skipping to change at line 114
	normalize(lzma_mf *mf)	normalize(lzma_mf *mf)
	{	{
	assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS);	assert(mf->read_pos + mf->offset == MUST_NORMALIZE_POS);

	// In future we may not want to touch the lowest bits, because there	// In future we may not want to touch the lowest bits, because there
	// may be match finders that use larger resolution than one byte.	// may be match finders that use larger resolution than one byte.
	const uint32_t subvalue	const uint32_t subvalue
	= (MUST_NORMALIZE_POS - mf->cyclic_size);	= (MUST_NORMALIZE_POS - mf->cyclic_size);
	// & (~(UINT32_C(1) << 10) - 1);	// & (~(UINT32_C(1) << 10) - 1);


	const uint32_t count = mf->hash_size_sum + mf->sons_count;	for (uint32_t i = 0; i < mf->hash_count; ++i) {
	uint32_t *hash = mf->hash;

	for (uint32_t i = 0; i < count; ++i) {
	// If the distance is greater than the dictionary size,	// If the distance is greater than the dictionary size,
	// we can simply mark the hash element as empty.	// we can simply mark the hash element as empty.

		if (mf->hash[i] <= subvalue)
		mf->hash[i] = EMPTY_HASH_VALUE;
		else
		mf->hash[i] -= subvalue;
		}

		for (uint32_t i = 0; i < mf->sons_count; ++i) {
		// Do the same for mf->son.
	//	//

	// NOTE: Only the first mf->hash_size_sum elements are	// NOTE: There may be uninitialized elements in mf->son.
	// initialized for sure. There may be uninitialized elements	// Valgrind may complain that the "if" below depends on
	// in mf->son. Since we go through both mf->hash and	// an uninitialized value. In this case it is safe to ignore
	// mf->son here in normalization, Valgrind may complain	// the warning. See also the comments in lz_encoder_init()
	// that the "if" below depends on uninitialized value. In	// in lz_encoder.c.
	// this case it is safe to ignore the warning. See also the	if (mf->son[i] <= subvalue)
	// comments in lz_encoder_init() in lz_encoder.c.	mf->son[i] = EMPTY_HASH_VALUE;
	if (hash[i] <= subvalue)
	hash[i] = EMPTY_HASH_VALUE;
	else	else

	hash[i] -= subvalue;	mf->son[i] -= subvalue;
	}	}

	// Update offset to match the new locations.	// Update offset to match the new locations.
	mf->offset -= subvalue;	mf->offset -= subvalue;

	return;	return;
	}	}

	/// Mark the current byte as processed from point of view of the match find er.	/// Mark the current byte as processed from point of view of the match find er.
	static void	static void

	skipping to change at line 261	skipping to change at line 263
	while (true) {	while (true) {
	const uint32_t delta = pos - cur_match;	const uint32_t delta = pos - cur_match;
	if (depth-- == 0 \|\| delta >= cyclic_size)	if (depth-- == 0 \|\| delta >= cyclic_size)
	return matches;	return matches;

	const uint8_t *const pb = cur - delta;	const uint8_t *const pb = cur - delta;
	cur_match = son[cyclic_pos - delta	cur_match = son[cyclic_pos - delta
	+ (delta > cyclic_pos ? cyclic_size : 0)];	+ (delta > cyclic_pos ? cyclic_size : 0)];

	if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {	if (pb[len_best] == cur[len_best] && pb[0] == cur[0]) {

	uint32_t len = 0;	uint32_t len = lzma_memcmplen(pb, cur, 1, len_limit)
	while (++len != len_limit)	;
	if (pb[len] != cur[len])
	break;

	if (len_best < len) {	if (len_best < len) {
	len_best = len;	len_best = len;
	matches->len = len;	matches->len = len;
	matches->dist = delta - 1;	matches->dist = delta - 1;
	++matches;	++matches;

	if (len == len_limit)	if (len == len_limit)
	return matches;	return matches;
	}	}

	skipping to change at line 307	skipping to change at line 306

	const uint32_t delta2 = pos - mf->hash[hash_2_value];	const uint32_t delta2 = pos - mf->hash[hash_2_value];
	const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];	const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];

	mf->hash[hash_2_value] = pos;	mf->hash[hash_2_value] = pos;
	mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;	mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;

	uint32_t len_best = 2;	uint32_t len_best = 2;

	if (delta2 < mf->cyclic_size && (cur - delta2) == cur) {	if (delta2 < mf->cyclic_size && (cur - delta2) == cur) {

	for ( ; len_best != len_limit; ++len_best)	len_best = lzma_memcmplen(cur - delta2, cur,
	if (*(cur + len_best - delta2) != cur[len_best])	len_best, len_limit);
	break;

	matches[0].len = len_best;	matches[0].len = len_best;
	matches[0].dist = delta2 - 1;	matches[0].dist = delta2 - 1;
	matches_count = 1;	matches_count = 1;

	if (len_best == len_limit) {	if (len_best == len_limit) {
	hc_skip();	hc_skip();
	return 1; // matches_count	return 1; // matches_count
	}	}
	}	}

	skipping to change at line 384	skipping to change at line 382
	}	}

	if (delta2 != delta3 && delta3 < mf->cyclic_size	if (delta2 != delta3 && delta3 < mf->cyclic_size
	&& (cur - delta3) == cur) {	&& (cur - delta3) == cur) {
	len_best = 3;	len_best = 3;
	matches[matches_count++].dist = delta3 - 1;	matches[matches_count++].dist = delta3 - 1;
	delta2 = delta3;	delta2 = delta3;
	}	}

	if (matches_count != 0) {	if (matches_count != 0) {

	for ( ; len_best != len_limit; ++len_best)	len_best = lzma_memcmplen(cur - delta2, cur,
	if (*(cur + len_best - delta2) != cur[len_best])	len_best, len_limit);
	break;

	matches[matches_count - 1].len = len_best;	matches[matches_count - 1].len = len_best;

	if (len_best == len_limit) {	if (len_best == len_limit) {
	hc_skip();	hc_skip();
	return matches_count;	return matches_count;
	}	}
	}	}

	if (len_best < 3)	if (len_best < 3)

	skipping to change at line 469	skipping to change at line 466
	}	}

	uint32_t *const pair = son + ((cyclic_pos - delta	uint32_t *const pair = son + ((cyclic_pos - delta
	+ (delta > cyclic_pos ? cyclic_size : 0))	+ (delta > cyclic_pos ? cyclic_size : 0))
	<< 1);	<< 1);

	const uint8_t *const pb = cur - delta;	const uint8_t *const pb = cur - delta;
	uint32_t len = my_min(len0, len1);	uint32_t len = my_min(len0, len1);

	if (pb[len] == cur[len]) {	if (pb[len] == cur[len]) {

	while (++len != len_limit)	len = lzma_memcmplen(pb, cur, len + 1, len_limit);
	if (pb[len] != cur[len])
	break;

	if (len_best < len) {	if (len_best < len) {
	len_best = len;	len_best = len;
	matches->len = len;	matches->len = len;
	matches->dist = delta - 1;	matches->dist = delta - 1;
	++matches;	++matches;

	if (len == len_limit) {	if (len == len_limit) {
	*ptr1 = pair[0];	*ptr1 = pair[0];
	*ptr0 = pair[1];	*ptr0 = pair[1];

	skipping to change at line 533	skipping to change at line 528
	return;	return;
	}	}

	uint32_t *pair = son + ((cyclic_pos - delta	uint32_t *pair = son + ((cyclic_pos - delta
	+ (delta > cyclic_pos ? cyclic_size : 0))	+ (delta > cyclic_pos ? cyclic_size : 0))
	<< 1);	<< 1);
	const uint8_t *pb = cur - delta;	const uint8_t *pb = cur - delta;
	uint32_t len = my_min(len0, len1);	uint32_t len = my_min(len0, len1);

	if (pb[len] == cur[len]) {	if (pb[len] == cur[len]) {

	while (++len != len_limit)	len = lzma_memcmplen(pb, cur, len + 1, len_limit);
	if (pb[len] != cur[len])
	break;

	if (len == len_limit) {	if (len == len_limit) {
	*ptr1 = pair[0];	*ptr1 = pair[0];
	*ptr0 = pair[1];	*ptr0 = pair[1];
	return;	return;
	}	}
	}	}

	if (pb[len] < cur[len]) {	if (pb[len] < cur[len]) {
	*ptr1 = cur_match;	*ptr1 = cur_match;

	skipping to change at line 619	skipping to change at line 612

	const uint32_t delta2 = pos - mf->hash[hash_2_value];	const uint32_t delta2 = pos - mf->hash[hash_2_value];
	const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];	const uint32_t cur_match = mf->hash[FIX_3_HASH_SIZE + hash_value];

	mf->hash[hash_2_value] = pos;	mf->hash[hash_2_value] = pos;
	mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;	mf->hash[FIX_3_HASH_SIZE + hash_value] = pos;

	uint32_t len_best = 2;	uint32_t len_best = 2;

	if (delta2 < mf->cyclic_size && (cur - delta2) == cur) {	if (delta2 < mf->cyclic_size && (cur - delta2) == cur) {

	for ( ; len_best != len_limit; ++len_best)	len_best = lzma_memcmplen(
	if (*(cur + len_best - delta2) != cur[len_best])	cur, cur - delta2, len_best, len_limit);
	break;

	matches[0].len = len_best;	matches[0].len = len_best;
	matches[0].dist = delta2 - 1;	matches[0].dist = delta2 - 1;
	matches_count = 1;	matches_count = 1;

	if (len_best == len_limit) {	if (len_best == len_limit) {
	bt_skip();	bt_skip();
	return 1; // matches_count	return 1; // matches_count
	}	}
	}	}

	skipping to change at line 690	skipping to change at line 682
	}	}

	if (delta2 != delta3 && delta3 < mf->cyclic_size	if (delta2 != delta3 && delta3 < mf->cyclic_size
	&& (cur - delta3) == cur) {	&& (cur - delta3) == cur) {
	len_best = 3;	len_best = 3;
	matches[matches_count++].dist = delta3 - 1;	matches[matches_count++].dist = delta3 - 1;
	delta2 = delta3;	delta2 = delta3;
	}	}

	if (matches_count != 0) {	if (matches_count != 0) {

	for ( ; len_best != len_limit; ++len_best)	len_best = lzma_memcmplen(
	if (*(cur + len_best - delta2) != cur[len_best])	cur, cur - delta2, len_best, len_limit);
	break;

	matches[matches_count - 1].len = len_best;	matches[matches_count - 1].len = len_best;

	if (len_best == len_limit) {	if (len_best == len_limit) {
	bt_skip();	bt_skip();
	return matches_count;	return matches_count;
	}	}
	}	}

	if (len_best < 3)	if (len_best < 3)

End of changes. 13 change blocks.
	39 lines changed or deleted	31 lines changed or added
This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/