CnC_Remastered_Collection/REDALERT/LZW.CPP

//
// Copyright 2020 Electronic Arts Inc.
//
// TiberianDawn.DLL and RedAlert.dll and corresponding source code is free
// software: you can redistribute it and/or modify it under the terms of
// the GNU General Public License as published by the Free Software Foundation,
// either version 3 of the License, or (at your option) any later version.

// TiberianDawn.DLL and RedAlert.dll and corresponding source code is distributed
// in the hope that it will be useful, but with permitted additional restrictions
// under Section 7 of the GPL. See the GNU General Public License in LICENSE.TXT
// distributed with this program. You should have received a copy of the
// GNU General Public License along with permitted additional restrictions
// with this program. If not, see https://github.com/electronicarts/CnC_Remastered_Collection

/* $Header: /CounterStrike/LZW.CPP 1     3/03/97 10:25a Joe_bostic $ */
/***********************************************************************************************
 ***              C O N F I D E N T I A L  ---  W E S T W O O D  S T U D I O S               ***
 ***********************************************************************************************
 *                                                                                             *
 *                 Project Name : Command & Conquer                                            *
 *                                                                                             *
 *                    File Name : LZW.CPP                                                      *
 *                                                                                             *
 *                   Programmer : Joe L. Bostic                                                *
 *                                                                                             *
 *                   Start Date : 08/28/96                                                     *
 *                                                                                             *
 *                  Last Update : August 28, 1996 [JLB]                                        *
 *                                                                                             *
 *---------------------------------------------------------------------------------------------*
 * Functions:                                                                                  *
 *   Find_Child_Node -- Find a matching dictionary entry.                                      *
 * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */

#include <stdio.h>
#include <stdlib.h>
#include <string.h>

#include	"xstraw.h"
#include	"xpipe.h"
#include	"buff.h"
#include	"lzw.h"


LZWEngine::LZWEngine(void)
{
	Reset();
}


void LZWEngine::Reset(void)
{
	for (int i = 0; i < TABLE_SIZE; i++) {
		dict[i].Make_Unused();
	}
}

int LZWEngine::Compress(Buffer const & input, Buffer const & output)
{
	BufferStraw instraw(input);
	BufferPipe outpipe(output);

	int outcount = 0;
	CodeType string_code = END_OF_STREAM;
	CodeType next_code = FIRST_CODE;

	string_code = 0;
	if (instraw.Get(&string_code, sizeof(char)) == 0) {
		string_code = END_OF_STREAM;
	}

	for (;;) {

		/*
		**	Fetch a character from the source data stream. If exhausted,
		**	then break out of the process loop so that the final code
		**	can be written out.
		*/
		unsigned char character;
		if (instraw.Get(&character, sizeof(character)) == 0) break;

		/*
		**	See if there is a match for the current code and current
		**	character. A match indicates that there is already a
		**	dictionary entry that fully represents the character
		**	sequence.
		*/
		int index = Find_Child_Node(string_code, character);

		/*
		**	If a code match was found, then set the current code
		**	value to this code value that represents the concatenation
		**	of the previous code value and the current character.
		*/
		if (index != -1 && dict[index].CodeValue != -1) {
			string_code = dict[index].CodeValue;
		} else {

			/*
			**	Since no exact match was found, then create a new code
			**	entry that represents the current code and character
			**	value concatenated. This presumes there is room in the
			**	code table.
			*/
			if (index != -1 && next_code <= MAX_CODE) {
				dict[index] = CodeClass(next_code, string_code, character);
				next_code++;
			}

			/*
			**	Output the code to the compression stream and reset the
			**	current code value to match the current character. This
			**	has the effect of clearing out the current character
			**	sequence scan in preparation for building a new one. It
			**	also ensures that the character will be written out.
			*/
			outcount += outpipe.Put(&string_code, sizeof(string_code));
			string_code = character;
		}
	}

	outcount += outpipe.Put(&string_code, sizeof(string_code));
	if (string_code != END_OF_STREAM) {
		string_code = END_OF_STREAM;
		outcount += outpipe.Put(&string_code, sizeof(string_code));
	}

	return(outcount);
}


int LZWEngine::Uncompress(Buffer const & input, Buffer const & output)
{
	int outcount = 0;
	BufferStraw instraw(input);
	BufferPipe outpipe(output);

	CodeType old_code;
	if (instraw.Get(&old_code, sizeof(old_code)) == 0) {
		return(outcount);
	}

	unsigned char character = (unsigned char)old_code;
	outcount += outpipe.Put(&character, sizeof(character));

	unsigned int count;
	CodeType new_code;
	CodeType next_code = FIRST_CODE;
	for (;;) {
		if (instraw.Get(&new_code, sizeof(new_code)) == 0) break;

		if (new_code == END_OF_STREAM) break;

		/*
		** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
		** case which generates an undefined code.  It handles it by decoding
		** the last code, and adding a single character to the end of the decode string.
		*/
		if (new_code >= next_code) {
			decode_stack[0] = character;
			count = 1;
			count += Decode_String(&decode_stack[1], old_code);
		} else {
			count = Decode_String(decode_stack, new_code);
		}

		character = decode_stack[count-1];
		while (count > 0) {
			--count;
			outcount += outpipe.Put(&decode_stack[count], sizeof(decode_stack[0]));
		}

		/*
		**	Add the new code sequence to the dictionary (presuming there is still
		**	room).
		*/
		if (next_code <= MAX_CODE) {
			dict[next_code] = CodeClass(next_code, old_code, character);
			next_code++;
		}
		old_code = new_code;
	}

	return(outcount);
}


int LZWEngine::Make_LZW_Hash(CodeType code, char character)
{
	return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
}


int LZWEngine::Find_Child_Node(CodeType parent_code, char child_character)
{
	/*
	**	Fetch the first try index for the code and character.
	*/
	int hash_index = Make_LZW_Hash(parent_code, child_character);

	/*
	**	Base the hash-miss-try-again-displacement value on the current
	**	index. [Shouldn't the value be some large prime number???].
	*/
	int offset = 1;
	if (hash_index != 0) {
		offset = TABLE_SIZE - hash_index;
	}

	/*
	**	Keep offsetting through the dictionary until an exact match is
	**	found for the code and character specified.
	*/
	int initial = hash_index;
	while (!dict[hash_index].Is_Matching(parent_code, child_character)) {

		/*
		**	Stop searching if an unused index is found since this means that
		**	a match doesn't exist in the table at all.
		*/
		if (dict[hash_index].Is_Unused()) break;

		/*
		**	Bump the hash index to another value such that sequential bumps
		**	will not result in the same index value until all of the table
		**	has been scanned.
		*/
		hash_index -= offset;
		if (hash_index < 0) {
			hash_index += TABLE_SIZE;
		}

		/*
		**	If the entire table has been scanned and no match or unused
		**	entry was found, then return a special value indicating this
		**	condition.
		*/
		if (initial == hash_index) {
			hash_index = -1;
			break;
		}
	}
	return(hash_index);
}


int LZWEngine::Decode_String(char * ptr, CodeType code)
{
	int count = 0;
	while (code > 255) {
		*ptr++ = dict[code].CharValue;
		count++;
		code = dict[code].ParentCode;
	}
	*ptr = (char)code;
	count++;
	return(count);
}


int LZW_Uncompress(Buffer const & inbuff, Buffer const & outbuff)
{
	LZWEngine lzw;
	return(lzw.Uncompress(inbuff, outbuff));
}


int LZW_Compress(Buffer const & inbuff, Buffer const & outbuff)
{
	LZWEngine lzw;
	return(lzw.Compress(inbuff, outbuff));
}


#ifdef NEVER


/*
 * Constants used throughout the program.  BITS defines how many bits
 * will be in a code.  TABLE_SIZE defines the size of the dictionary
 * table.
 */
#define BITS                       12
#define MAX_CODE                   ( ( 1 << BITS ) - 1 )
#define TABLE_SIZE                 5021
#define END_OF_STREAM              256
#define FIRST_CODE                 257
#define UNUSED                     -1

typedef unsigned short CodeType;

/*
 * This data structure defines the dictionary.  Each entry in the dictionary
 * has a code value.  This is the code emitted by the compressor.  Each
 * code is actually made up of two pieces:  a parent_code, and a
 * character.  Code values of less than 256 are actually plain
 * text codes.
 */
struct CodeClass
{
	CodeType CodeValue;
	CodeType ParentCode;
	char CharValue;

	CodeClass(void) {}
	CodeClass(CodeType code, CodeType parent, char c) : CodeValue(code), ParentCode(parent), CharValue(c) {}

	void Make_Unused(void) {CodeValue = UNUSED;}
	bool Is_Unused(void) const {return(CodeValue == UNUSED);}
	bool Is_Matching(CodeType code, char c) const {return(ParentCode == code && CharValue == c);}
};
CodeClass dict[TABLE_SIZE];

char decode_stack[TABLE_SIZE];

inline int Make_LZW_Hash(CodeType code, char character)
{
	return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
}


/***********************************************************************************************
 * Find_Child_Node -- Find a matching dictionary entry.                                        *
 *                                                                                             *
 *    This hashing routine is responsible for finding the table location                       *
 *    for a string/character combination.  The table index is created                          *
 *    by using an exclusive OR combination of the prefix and character.                        *
 *    This code also has to check for collisions, and handles them by                          *
 *    jumping around in the table.                                                             *
 *                                                                                             *
 * INPUT:   parent_code -- The code of the parent string sequence.                             *
 *                                                                                             *
 *          character   -- The current character.                                              *
 *                                                                                             *
 * OUTPUT:  Returns with the index for the matching dictionary entry. If no matching index     *
 *          could be found, then it returns with the index to an unused dictionary entry. If   *
 *          there are also no unused entries in the dictionary, then -1 is returned.           *
 *                                                                                             *
 * WARNINGS:   none                                                                            *
 *                                                                                             *
 * HISTORY:                                                                                    *
 *   08/28/1996 JLB : Created.                                                                 *
 *=============================================================================================*/
static int Find_Child_Node(CodeType parent_code, char child_character)
{
	/*
	**	Fetch the first try index for the code and character.
	*/
	int hash_index = Make_LZW_Hash(parent_code, child_character);

	/*
	**	Base the hash-miss-try-again-displacement value on the current
	**	index. [Shouldn't the value be some large prime number???].
	*/
	int offset = 1;
	if (hash_index != 0) {
		offset = TABLE_SIZE - hash_index;
	}

	/*
	**	Keep offsetting through the dictionary until an exact match is
	**	found for the code and character specified.
	*/
	int initial = hash_index;
	while (!dict[hash_index].Is_Matching(parent_code, child_character)) {

		/*
		**	Stop searching if an unused index is found since this means that
		**	a match doesn't exist in the table at all.
		*/
		if (dict[hash_index].Is_Unused()) break;

		/*
		**	Bump the hash index to another value such that sequential bumps
		**	will not result in the same index value until all of the table
		**	has been scanned.
		*/
		hash_index -= offset;
		if (hash_index < 0) {
			hash_index += TABLE_SIZE;
		}

		/*
		**	If the entire table has been scanned and no match or unused
		**	entry was found, then return a special value indicating this
		**	condition.
		*/
		if (initial == hash_index) {
			hash_index = -1;
			break;
		}
	}
	return(hash_index);
}


/*
 * This routine decodes a string from the dictionary, and stores it
 * in the decode_stack data structure.  It returns a count to the
 * calling program of how many characters were placed in the stack.
 */
static int Decode_String(char * ptr, CodeType code)
{
	int count = 0;
	while (code > 255) {
		*ptr++ = dict[code].CharValue;
		count++;
		code = dict[code].ParentCode;
	}
	*ptr = (char)code;
	count++;
	return(count);
}


/*
 * The compressor is short and simple.  It reads in new symbols one
 * at a time from the input file.  It then  checks to see if the
 * combination of the current symbol and the current code are already
 * defined in the dictionary.  If they are not, they are added to the
 * dictionary, and we start over with a new one symbol code.  If they
 * are, the code for the combination of the code and character becomes
 * our new code.
 */

int LZW_Compress(Buffer & inbuff, Buffer & outbuff)
{
	BufferStraw input(inbuff);
	BufferPipe output(outbuff);

	for (int i = 0; i < TABLE_SIZE; i++) {
		dict[i].Make_Unused();
//		dict[i].code_value = UNUSED;
	}

	int outcount = 0;
	CodeType string_code = END_OF_STREAM;
	CodeType next_code = FIRST_CODE;
	for (;;) {
		char character;

		if (input.Get(&character, sizeof(character)) == 0) break;

		int index = Find_Child_Node(string_code, character);

		if (index == -1) {
			string_code = character;
			outcount += output.Put(&string_code, sizeof(string_code));
		} else {

			if (dict[index].CodeValue != -1) {
				string_code = dict[ index ].CodeValue;
			} else {
				if (next_code <= MAX_CODE) {
					dict[index] = CodeClass(next_code++, string_code, character);
				}
				outcount += output.Put(&string_code, sizeof(string_code));
				string_code = character;
			}
		}
	}

	outcount += output.Put(&string_code, sizeof(string_code));
	string_code = END_OF_STREAM;
	outcount += output.Put(&string_code, sizeof(string_code));

	return(outcount);
}


/*
 * The file expander operates much like the encoder.  It has to
 * read in codes, the convert the codes to a string of characters.
 * The only catch in the whole operation occurs when the encoder
 * encounters a CHAR+STRING+CHAR+STRING+CHAR sequence.  When this
 * occurs, the encoder outputs a code that is not presently defined
 * in the table.  This is handled as an exception.
 */
int LZW_Uncompress(Buffer & inbuff, Buffer & outbuff)
{
	int outcount = 0;
	BufferStraw input(inbuff);
	BufferPipe output(outbuff);

	CodeType old_code;
	if (input.Get(&old_code, sizeof(old_code)) == 0) {
		return(outcount);
	}

	char character = (char)old_code;
	outcount += output.Put(&character, sizeof(character));

	unsigned int count;
	CodeType new_code;
	CodeType next_code = FIRST_CODE;
	for (;;) {
		if (input.Get(&new_code, sizeof(new_code)) == 0) break;

		/*
		** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
		** case which generates an undefined code.  It handles it by decoding
		** the last code, and adding a single character to the end of the decode string.
		*/
		if (new_code >= next_code) {
			decode_stack[0] = character;
			count = 1;
			count += Decode_String(&decode_stack[1], old_code);
		} else {
			count = Decode_String(decode_stack, new_code);
		}

		character = decode_stack[count-1];
		while (count > 0) {
			--count;
			outcount += output.Put(&decode_stack[count], sizeof(decode_stack[0]));
		}

		/*
		**	Add the new code sequence to the dictionary (presuming there is still
		**	room).
		*/
		if (next_code <= MAX_CODE) {
			dict[next_code] = CodeClass(next_code, old_code, character);
			next_code++;
		}
		old_code = new_code;
	}

	return(outcount);
}

#endif