Diff: lz_encoder.c - lz

	lz_encoder.c	lz_encoder.c

	skipping to change at line 23	skipping to change at line 23

	#include "lz_encoder.h"	#include "lz_encoder.h"
	#include "lz_encoder_hash.h"	#include "lz_encoder_hash.h"

	// See lz_encoder_hash.h. This is a bit hackish but avoids making	// See lz_encoder_hash.h. This is a bit hackish but avoids making
	// endianness a conditional in makefiles.	// endianness a conditional in makefiles.
	#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)	#if defined(WORDS_BIGENDIAN) && !defined(HAVE_SMALL)
	# include "lz_encoder_hash_table.h"	# include "lz_encoder_hash_table.h"
	#endif	#endif


		#include "memcmplen.h"

	struct lzma_coder_s {	struct lzma_coder_s {
	/// LZ-based encoder e.g. LZMA	/// LZ-based encoder e.g. LZMA
	lzma_lz_encoder lz;	lzma_lz_encoder lz;

	/// History buffer and match finder	/// History buffer and match finder
	lzma_mf mf;	lzma_mf mf;

	/// Next coder in the chain	/// Next coder in the chain
	lzma_next_coder next;	lzma_next_coder next;
	};	};

	skipping to change at line 76	skipping to change at line 78

	/// \brief Tries to fill the input window (mf->buffer)	/// \brief Tries to fill the input window (mf->buffer)
	///	///
	/// If we are the last encoder in the chain, our input data is in in[].	/// If we are the last encoder in the chain, our input data is in in[].
	/// Otherwise we call the next filter in the chain to process in[] and	/// Otherwise we call the next filter in the chain to process in[] and
	/// write its output to mf->buffer.	/// write its output to mf->buffer.
	///	///
	/// This function must not be called once it has returned LZMA_STREAM_END.	/// This function must not be called once it has returned LZMA_STREAM_END.
	///	///
	static lzma_ret	static lzma_ret

	fill_window(lzma_coder coder, lzma_allocator allocator, const uint8_t *in	fill_window(lzma_coder coder, const lzma_allocator allocator,
	,	const uint8_t in, size_t in_pos, size_t in_size,
	size_t *in_pos, size_t in_size, lzma_action action)	lzma_action action)
	{	{
	assert(coder->mf.read_pos <= coder->mf.write_pos);	assert(coder->mf.read_pos <= coder->mf.write_pos);

	// Move the sliding window if needed.	// Move the sliding window if needed.
	if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after )	if (coder->mf.read_pos >= coder->mf.size - coder->mf.keep_size_after )
	move_window(&coder->mf);	move_window(&coder->mf);

	// Maybe this is ugly, but lzma_mf uses uint32_t for most things	// Maybe this is ugly, but lzma_mf uses uint32_t for most things
	// (which I find cleanest), but we need size_t here when filling	// (which I find cleanest), but we need size_t here when filling
	// the history window.	// the history window.

	skipping to change at line 147	skipping to change at line 150

	// Call the skip function directly instead of using	// Call the skip function directly instead of using
	// mf_skip(), since we don't want to touch mf->read_ahead.	// mf_skip(), since we don't want to touch mf->read_ahead.
	coder->mf.skip(&coder->mf, pending);	coder->mf.skip(&coder->mf, pending);
	}	}

	return ret;	return ret;
	}	}

	static lzma_ret	static lzma_ret

	lz_encode(lzma_coder coder, lzma_allocator allocator,	lz_encode(lzma_coder coder, const lzma_allocator allocator,
	const uint8_t restrict in, size_t restrict in_pos,	const uint8_t restrict in, size_t restrict in_pos,
	size_t in_size,	size_t in_size,
	uint8_t restrict out, size_t restrict out_pos,	uint8_t restrict out, size_t restrict out_pos,
	size_t out_size, lzma_action action)	size_t out_size, lzma_action action)
	{	{
	while (*out_pos < out_size	while (*out_pos < out_size
	&& (*in_pos < in_size \|\| action != LZMA_RUN)) {	&& (*in_pos < in_size \|\| action != LZMA_RUN)) {
	// Read more data to coder->mf.buffer if needed.	// Read more data to coder->mf.buffer if needed.
	if (coder->mf.action == LZMA_RUN && coder->mf.read_pos	if (coder->mf.action == LZMA_RUN && coder->mf.read_pos
	>= coder->mf.read_limit)	>= coder->mf.read_limit)

	skipping to change at line 177	skipping to change at line 180
	// an error occurred.	// an error occurred.
	coder->mf.action = LZMA_RUN;	coder->mf.action = LZMA_RUN;
	return ret;	return ret;
	}	}
	}	}

	return LZMA_OK;	return LZMA_OK;
	}	}

	static bool	static bool

	lz_encoder_prepare(lzma_mf mf, lzma_allocator allocator,	lz_encoder_prepare(lzma_mf mf, const lzma_allocator allocator,
	const lzma_lz_options *lz_options)	const lzma_lz_options *lz_options)
	{	{
	// For now, the dictionary size is limited to 1.5 GiB. This may grow	// For now, the dictionary size is limited to 1.5 GiB. This may grow
	// in the future if needed, but it needs a little more work than jus t	// in the future if needed, but it needs a little more work than jus t
	// changing this check.	// changing this check.
	if (lz_options->dict_size < LZMA_DICT_SIZE_MIN	if (lz_options->dict_size < LZMA_DICT_SIZE_MIN
	\|\| lz_options->dict_size	\|\| lz_options->dict_size
	> (UINT32_C(1) << 30) + (UINT32_C(1) << 29)	> (UINT32_C(1) << 30) + (UINT32_C(1) << 29)
	\|\| lz_options->nice_len > lz_options->match_len_max)	\|\| lz_options->nice_len > lz_options->match_len_max)
	return true;	return true;

	skipping to change at line 323	skipping to change at line 326
	if (hash_bytes > 2)	if (hash_bytes > 2)
	hs += HASH_2_SIZE;	hs += HASH_2_SIZE;
	if (hash_bytes > 3)	if (hash_bytes > 3)
	hs += HASH_3_SIZE;	hs += HASH_3_SIZE;
	/*	/*
	No match finder uses this at the moment.	No match finder uses this at the moment.
	if (mf->hash_bytes > 4)	if (mf->hash_bytes > 4)
	hs += HASH_4_SIZE;	hs += HASH_4_SIZE;
	*/	*/


	// If the above code calculating hs is modified, make sure that	const uint32_t old_hash_count = mf->hash_count;
	// this assertion stays valid (UINT32_MAX / 5 is not strictly the	const uint32_t old_sons_count = mf->sons_count;
	// exact limit). If it doesn't, you need to calculate that	mf->hash_count = hs;
	// hash_size_sum + sons_count cannot overflow.
	assert(hs < UINT32_MAX / 5);

	const uint32_t old_count = mf->hash_size_sum + mf->sons_count;
	mf->hash_size_sum = hs;
	mf->sons_count = mf->cyclic_size;	mf->sons_count = mf->cyclic_size;
	if (is_bt)	if (is_bt)
	mf->sons_count *= 2;	mf->sons_count *= 2;


	const uint32_t new_count = mf->hash_size_sum + mf->sons_count;

	// Deallocate the old hash array if it exists and has different size	// Deallocate the old hash array if it exists and has different size
	// than what is needed now.	// than what is needed now.

	if (old_count != new_count) {	if (old_hash_count != mf->hash_count
		\|\| old_sons_count != mf->sons_count) {
	lzma_free(mf->hash, allocator);	lzma_free(mf->hash, allocator);
	mf->hash = NULL;	mf->hash = NULL;


		lzma_free(mf->son, allocator);
		mf->son = NULL;
	}	}

	// Maximum number of match finder cycles	// Maximum number of match finder cycles
	mf->depth = lz_options->depth;	mf->depth = lz_options->depth;
	if (mf->depth == 0) {	if (mf->depth == 0) {
	if (is_bt)	if (is_bt)
	mf->depth = 16 + mf->nice_len / 2;	mf->depth = 16 + mf->nice_len / 2;
	else	else
	mf->depth = 4 + mf->nice_len / 4;	mf->depth = 4 + mf->nice_len / 4;
	}	}

	return false;	return false;
	}	}

	static bool	static bool

	lz_encoder_init(lzma_mf mf, lzma_allocator allocator,	lz_encoder_init(lzma_mf mf, const lzma_allocator allocator,
	const lzma_lz_options *lz_options)	const lzma_lz_options *lz_options)
	{	{
	// Allocate the history buffer.	// Allocate the history buffer.
	if (mf->buffer == NULL) {	if (mf->buffer == NULL) {

	mf->buffer = lzma_alloc(mf->size, allocator);	// lzma_memcmplen() is used for the dictionary buffer
		// so we need to allocate a few extra bytes to prevent
		// it from reading past the end of the buffer.
		mf->buffer = lzma_alloc(mf->size + LZMA_MEMCMPLEN_EXTRA,
		allocator);
	if (mf->buffer == NULL)	if (mf->buffer == NULL)
	return true;	return true;


		// Keep Valgrind happy with lzma_memcmplen() and initialize
		// the extra bytes whose value may get read but which will
		// effectively get ignored.
		memzero(mf->buffer + mf->size, LZMA_MEMCMPLEN_EXTRA);
	}	}

	// Use cyclic_size as initial mf->offset. This allows	// Use cyclic_size as initial mf->offset. This allows
	// avoiding a few branches in the match finders. The downside is	// avoiding a few branches in the match finders. The downside is
	// that match finder needs to be normalized more often, which may	// that match finder needs to be normalized more often, which may
	// hurt performance with huge dictionaries.	// hurt performance with huge dictionaries.
	mf->offset = mf->cyclic_size;	mf->offset = mf->cyclic_size;
	mf->read_pos = 0;	mf->read_pos = 0;
	mf->read_ahead = 0;	mf->read_ahead = 0;
	mf->read_limit = 0;	mf->read_limit = 0;
	mf->write_pos = 0;	mf->write_pos = 0;
	mf->pending = 0;	mf->pending = 0;


	// Allocate match finder's hash array.
	const size_t alloc_count = mf->hash_size_sum + mf->sons_count;

	#if UINT32_MAX >= SIZE_MAX / 4	#if UINT32_MAX >= SIZE_MAX / 4
	// Check for integer overflow. (Huge dictionaries are not	// Check for integer overflow. (Huge dictionaries are not
	// possible on 32-bit CPU.)	// possible on 32-bit CPU.)

	if (alloc_count > SIZE_MAX / sizeof(uint32_t))	if (mf->hash_count > SIZE_MAX / sizeof(uint32_t)
		\|\| mf->sons_count > SIZE_MAX / sizeof(uint32_t))
	return true;	return true;
	#endif	#endif


		// Allocate and initialize the hash table. Since EMPTY_HASH_VALUE
		// is zero, we can use lzma_alloc_zero() or memzero() for mf->hash.
		//
		// We don't need to initialize mf->son, but not doing that may
		// make Valgrind complain in normalization (see normalize() in
		// lz_encoder_mf.c). Skipping the initialization is very good
		// when big dictionary is used but only small amount of data gets
		// actually compressed: most of the mf->son won't get actually
		// allocated by the kernel, so we avoid wasting RAM and improve
		// initialization speed a lot.
	if (mf->hash == NULL) {	if (mf->hash == NULL) {

	mf->hash = lzma_alloc(alloc_count * sizeof(uint32_t),	mf->hash = lzma_alloc_zero(mf->hash_count * sizeof(uint32_t)
		,
		allocator);
		mf->son = lzma_alloc(mf->sons_count * sizeof(uint32_t),
	allocator);	allocator);

	if (mf->hash == NULL)
	return true;
	}


	mf->son = mf->hash + mf->hash_size_sum;	if (mf->hash == NULL \|\| mf->son == NULL) {
	mf->cyclic_pos = 0;	lzma_free(mf->hash, allocator);
		mf->hash = NULL;

		lzma_free(mf->son, allocator);
		mf->son = NULL;


	// Initialize the hash table. Since EMPTY_HASH_VALUE is zero, we	return true;
	// can use memset().	}
		} else {
	/*	/*

	for (uint32_t i = 0; i < hash_size_sum; ++i)	for (uint32_t i = 0; i < mf->hash_count; ++i)
	mf->hash[i] = EMPTY_HASH_VALUE;	mf->hash[i] = EMPTY_HASH_VALUE;
	*/	*/

	memzero(mf->hash, (size_t)(mf->hash_size_sum) * sizeof(uint32_t));	memzero(mf->hash, mf->hash_count * sizeof(uint32_t));
		}


	// We don't need to initialize mf->son, but not doing that will	mf->cyclic_pos = 0;
	// make Valgrind complain in normalization (see normalize() in
	// lz_encoder_mf.c).
	//
	// Skipping this initialization is very good when big dictionary i
	s
	// used but only small amount of data gets actually compressed: most
	// of the mf->hash won't get actually allocated by the kernel, so
	// we avoid wasting RAM and improve initialization speed a lot.
	//memzero(mf->son, (size_t)(mf->sons_count) * sizeof(uint32_t));

	// Handle preset dictionary.	// Handle preset dictionary.
	if (lz_options->preset_dict != NULL	if (lz_options->preset_dict != NULL
	&& lz_options->preset_dict_size > 0) {	&& lz_options->preset_dict_size > 0) {
	// If the preset dictionary is bigger than the actual	// If the preset dictionary is bigger than the actual
	// dictionary, use only the tail.	// dictionary, use only the tail.
	mf->write_pos = my_min(lz_options->preset_dict_size, mf->siz e);	mf->write_pos = my_min(lz_options->preset_dict_size, mf->siz e);
	memcpy(mf->buffer, lz_options->preset_dict	memcpy(mf->buffer, lz_options->preset_dict
	+ lz_options->preset_dict_size - mf->write_p os,	+ lz_options->preset_dict_size - mf->write_p os,
	mf->write_pos);	mf->write_pos);

	skipping to change at line 441	skipping to change at line 455
	return false;	return false;
	}	}

	extern uint64_t	extern uint64_t
	lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)	lzma_lz_encoder_memusage(const lzma_lz_options *lz_options)
	{	{
	// Old buffers must not exist when calling lz_encoder_prepare().	// Old buffers must not exist when calling lz_encoder_prepare().
	lzma_mf mf = {	lzma_mf mf = {
	.buffer = NULL,	.buffer = NULL,
	.hash = NULL,	.hash = NULL,

	.hash_size_sum = 0,	.son = NULL,
		.hash_count = 0,
	.sons_count = 0,	.sons_count = 0,
	};	};

	// Setup the size information into mf.	// Setup the size information into mf.
	if (lz_encoder_prepare(&mf, NULL, lz_options))	if (lz_encoder_prepare(&mf, NULL, lz_options))
	return UINT64_MAX;	return UINT64_MAX;

	// Calculate the memory usage.	// Calculate the memory usage.

	return (uint64_t)(mf.hash_size_sum + mf.sons_count)	return ((uint64_t)(mf.hash_count) + mf.sons_count) * sizeof(uint32_t
	* sizeof(uint32_t)	)
	+ (uint64_t)(mf.size) + sizeof(lzma_coder);	+ mf.size + sizeof(lzma_coder);
	}	}

	static void	static void

	lz_encoder_end(lzma_coder coder, lzma_allocator allocator)	lz_encoder_end(lzma_coder coder, const lzma_allocator allocator)
	{	{
	lzma_next_end(&coder->next, allocator);	lzma_next_end(&coder->next, allocator);


		lzma_free(coder->mf.son, allocator);
	lzma_free(coder->mf.hash, allocator);	lzma_free(coder->mf.hash, allocator);
	lzma_free(coder->mf.buffer, allocator);	lzma_free(coder->mf.buffer, allocator);

	if (coder->lz.end != NULL)	if (coder->lz.end != NULL)
	coder->lz.end(coder->lz.coder, allocator);	coder->lz.end(coder->lz.coder, allocator);
	else	else
	lzma_free(coder->lz.coder, allocator);	lzma_free(coder->lz.coder, allocator);

	lzma_free(coder, allocator);	lzma_free(coder, allocator);
	return;	return;
	}	}

	static lzma_ret	static lzma_ret

	lz_encoder_update(lzma_coder coder, lzma_allocator allocator,	lz_encoder_update(lzma_coder coder, const lzma_allocator allocator,
	const lzma_filter *filters_null lzma_attribute((__unused__)) ,	const lzma_filter *filters_null lzma_attribute((__unused__)) ,
	const lzma_filter *reversed_filters)	const lzma_filter *reversed_filters)
	{	{
	if (coder->lz.options_update == NULL)	if (coder->lz.options_update == NULL)
	return LZMA_PROG_ERROR;	return LZMA_PROG_ERROR;

	return_if_error(coder->lz.options_update(	return_if_error(coder->lz.options_update(
	coder->lz.coder, reversed_filters));	coder->lz.coder, reversed_filters));

	return lzma_next_filter_update(	return lzma_next_filter_update(
	&coder->next, allocator, reversed_filters + 1);	&coder->next, allocator, reversed_filters + 1);
	}	}

	extern lzma_ret	extern lzma_ret

	lzma_lz_encoder_init(lzma_next_coder next, lzma_allocator allocator,	lzma_lz_encoder_init(lzma_next_coder next, const lzma_allocator allocator ,
	const lzma_filter_info *filters,	const lzma_filter_info *filters,
	lzma_ret (lz_init)(lzma_lz_encoder lz,	lzma_ret (lz_init)(lzma_lz_encoder lz,

	lzma_allocator allocator, const void options,	const lzma_allocator allocator, const void options ,
	lzma_lz_options *lz_options))	lzma_lz_options *lz_options))
	{	{
	#ifdef HAVE_SMALL	#ifdef HAVE_SMALL
	// We need that the CRC32 table has been initialized.	// We need that the CRC32 table has been initialized.
	lzma_crc32_init();	lzma_crc32_init();
	#endif	#endif

	// Allocate and initialize the base data structure.	// Allocate and initialize the base data structure.
	if (next->coder == NULL) {	if (next->coder == NULL) {
	next->coder = lzma_alloc(sizeof(lzma_coder), allocator);	next->coder = lzma_alloc(sizeof(lzma_coder), allocator);

	skipping to change at line 515	skipping to change at line 530
	next->code = &lz_encode;	next->code = &lz_encode;
	next->end = &lz_encoder_end;	next->end = &lz_encoder_end;
	next->update = &lz_encoder_update;	next->update = &lz_encoder_update;

	next->coder->lz.coder = NULL;	next->coder->lz.coder = NULL;
	next->coder->lz.code = NULL;	next->coder->lz.code = NULL;
	next->coder->lz.end = NULL;	next->coder->lz.end = NULL;

	next->coder->mf.buffer = NULL;	next->coder->mf.buffer = NULL;
	next->coder->mf.hash = NULL;	next->coder->mf.hash = NULL;

	next->coder->mf.hash_size_sum = 0;	next->coder->mf.son = NULL;
		next->coder->mf.hash_count = 0;
	next->coder->mf.sons_count = 0;	next->coder->mf.sons_count = 0;

	next->coder->next = LZMA_NEXT_CODER_INIT;	next->coder->next = LZMA_NEXT_CODER_INIT;
	}	}

	// Initialize the LZ-based encoder.	// Initialize the LZ-based encoder.
	lzma_lz_options lz_options;	lzma_lz_options lz_options;
	return_if_error(lz_init(&next->coder->lz, allocator,	return_if_error(lz_init(&next->coder->lz, allocator,
	filters[0].options, &lz_options));	filters[0].options, &lz_options));


End of changes. 29 change blocks.
	52 lines changed or deleted	68 lines changed or added
This html diff was produced by rfcdiff 1.41. The latest version is available from http://tools.ietf.org/tools/rfcdiff/