537 lines
16 KiB
C++
537 lines
16 KiB
C++
//
|
|
// Copyright 2020 Electronic Arts Inc.
|
|
//
|
|
// TiberianDawn.DLL and RedAlert.dll and corresponding source code is free
|
|
// software: you can redistribute it and/or modify it under the terms of
|
|
// the GNU General Public License as published by the Free Software Foundation,
|
|
// either version 3 of the License, or (at your option) any later version.
|
|
|
|
// TiberianDawn.DLL and RedAlert.dll and corresponding source code is distributed
|
|
// in the hope that it will be useful, but with permitted additional restrictions
|
|
// under Section 7 of the GPL. See the GNU General Public License in LICENSE.TXT
|
|
// distributed with this program. You should have received a copy of the
|
|
// GNU General Public License along with permitted additional restrictions
|
|
// with this program. If not, see https://github.com/electronicarts/CnC_Remastered_Collection
|
|
|
|
/* $Header: /CounterStrike/LZW.CPP 1 3/03/97 10:25a Joe_bostic $ */
|
|
/***********************************************************************************************
|
|
*** C O N F I D E N T I A L --- W E S T W O O D S T U D I O S ***
|
|
***********************************************************************************************
|
|
* *
|
|
* Project Name : Command & Conquer *
|
|
* *
|
|
* File Name : LZW.CPP *
|
|
* *
|
|
* Programmer : Joe L. Bostic *
|
|
* *
|
|
* Start Date : 08/28/96 *
|
|
* *
|
|
* Last Update : August 28, 1996 [JLB] *
|
|
* *
|
|
*---------------------------------------------------------------------------------------------*
|
|
* Functions: *
|
|
* Find_Child_Node -- Find a matching dictionary entry. *
|
|
* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - */
|
|
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
#include "xstraw.h"
|
|
#include "xpipe.h"
|
|
#include "buff.h"
|
|
#include "lzw.h"
|
|
|
|
|
|
LZWEngine::LZWEngine(void)
|
|
{
|
|
Reset();
|
|
}
|
|
|
|
|
|
void LZWEngine::Reset(void)
|
|
{
|
|
for (int i = 0; i < TABLE_SIZE; i++) {
|
|
dict[i].Make_Unused();
|
|
}
|
|
}
|
|
|
|
int LZWEngine::Compress(Buffer const & input, Buffer const & output)
|
|
{
|
|
BufferStraw instraw(input);
|
|
BufferPipe outpipe(output);
|
|
|
|
int outcount = 0;
|
|
CodeType string_code = END_OF_STREAM;
|
|
CodeType next_code = FIRST_CODE;
|
|
|
|
string_code = 0;
|
|
if (instraw.Get(&string_code, sizeof(char)) == 0) {
|
|
string_code = END_OF_STREAM;
|
|
}
|
|
|
|
for (;;) {
|
|
|
|
/*
|
|
** Fetch a character from the source data stream. If exhausted,
|
|
** then break out of the process loop so that the final code
|
|
** can be written out.
|
|
*/
|
|
unsigned char character;
|
|
if (instraw.Get(&character, sizeof(character)) == 0) break;
|
|
|
|
/*
|
|
** See if there is a match for the current code and current
|
|
** character. A match indicates that there is already a
|
|
** dictionary entry that fully represents the character
|
|
** sequence.
|
|
*/
|
|
int index = Find_Child_Node(string_code, character);
|
|
|
|
/*
|
|
** If a code match was found, then set the current code
|
|
** value to this code value that represents the concatenation
|
|
** of the previous code value and the current character.
|
|
*/
|
|
if (index != -1 && dict[index].CodeValue != -1) {
|
|
string_code = dict[index].CodeValue;
|
|
} else {
|
|
|
|
/*
|
|
** Since no exact match was found, then create a new code
|
|
** entry that represents the current code and character
|
|
** value concatenated. This presumes there is room in the
|
|
** code table.
|
|
*/
|
|
if (index != -1 && next_code <= MAX_CODE) {
|
|
dict[index] = CodeClass(next_code, string_code, character);
|
|
next_code++;
|
|
}
|
|
|
|
/*
|
|
** Output the code to the compression stream and reset the
|
|
** current code value to match the current character. This
|
|
** has the effect of clearing out the current character
|
|
** sequence scan in preparation for building a new one. It
|
|
** also ensures that the character will be written out.
|
|
*/
|
|
outcount += outpipe.Put(&string_code, sizeof(string_code));
|
|
string_code = character;
|
|
}
|
|
}
|
|
|
|
outcount += outpipe.Put(&string_code, sizeof(string_code));
|
|
if (string_code != END_OF_STREAM) {
|
|
string_code = END_OF_STREAM;
|
|
outcount += outpipe.Put(&string_code, sizeof(string_code));
|
|
}
|
|
|
|
return(outcount);
|
|
}
|
|
|
|
|
|
int LZWEngine::Uncompress(Buffer const & input, Buffer const & output)
|
|
{
|
|
int outcount = 0;
|
|
BufferStraw instraw(input);
|
|
BufferPipe outpipe(output);
|
|
|
|
CodeType old_code;
|
|
if (instraw.Get(&old_code, sizeof(old_code)) == 0) {
|
|
return(outcount);
|
|
}
|
|
|
|
unsigned char character = (unsigned char)old_code;
|
|
outcount += outpipe.Put(&character, sizeof(character));
|
|
|
|
unsigned int count;
|
|
CodeType new_code;
|
|
CodeType next_code = FIRST_CODE;
|
|
for (;;) {
|
|
if (instraw.Get(&new_code, sizeof(new_code)) == 0) break;
|
|
|
|
if (new_code == END_OF_STREAM) break;
|
|
|
|
/*
|
|
** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
|
|
** case which generates an undefined code. It handles it by decoding
|
|
** the last code, and adding a single character to the end of the decode string.
|
|
*/
|
|
if (new_code >= next_code) {
|
|
decode_stack[0] = character;
|
|
count = 1;
|
|
count += Decode_String(&decode_stack[1], old_code);
|
|
} else {
|
|
count = Decode_String(decode_stack, new_code);
|
|
}
|
|
|
|
character = decode_stack[count-1];
|
|
while (count > 0) {
|
|
--count;
|
|
outcount += outpipe.Put(&decode_stack[count], sizeof(decode_stack[0]));
|
|
}
|
|
|
|
/*
|
|
** Add the new code sequence to the dictionary (presuming there is still
|
|
** room).
|
|
*/
|
|
if (next_code <= MAX_CODE) {
|
|
dict[next_code] = CodeClass(next_code, old_code, character);
|
|
next_code++;
|
|
}
|
|
old_code = new_code;
|
|
}
|
|
|
|
return(outcount);
|
|
}
|
|
|
|
|
|
int LZWEngine::Make_LZW_Hash(CodeType code, char character)
|
|
{
|
|
return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
|
|
}
|
|
|
|
|
|
int LZWEngine::Find_Child_Node(CodeType parent_code, char child_character)
|
|
{
|
|
/*
|
|
** Fetch the first try index for the code and character.
|
|
*/
|
|
int hash_index = Make_LZW_Hash(parent_code, child_character);
|
|
|
|
/*
|
|
** Base the hash-miss-try-again-displacement value on the current
|
|
** index. [Shouldn't the value be some large prime number???].
|
|
*/
|
|
int offset = 1;
|
|
if (hash_index != 0) {
|
|
offset = TABLE_SIZE - hash_index;
|
|
}
|
|
|
|
/*
|
|
** Keep offsetting through the dictionary until an exact match is
|
|
** found for the code and character specified.
|
|
*/
|
|
int initial = hash_index;
|
|
while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
|
|
|
|
/*
|
|
** Stop searching if an unused index is found since this means that
|
|
** a match doesn't exist in the table at all.
|
|
*/
|
|
if (dict[hash_index].Is_Unused()) break;
|
|
|
|
/*
|
|
** Bump the hash index to another value such that sequential bumps
|
|
** will not result in the same index value until all of the table
|
|
** has been scanned.
|
|
*/
|
|
hash_index -= offset;
|
|
if (hash_index < 0) {
|
|
hash_index += TABLE_SIZE;
|
|
}
|
|
|
|
/*
|
|
** If the entire table has been scanned and no match or unused
|
|
** entry was found, then return a special value indicating this
|
|
** condition.
|
|
*/
|
|
if (initial == hash_index) {
|
|
hash_index = -1;
|
|
break;
|
|
}
|
|
}
|
|
return(hash_index);
|
|
}
|
|
|
|
|
|
int LZWEngine::Decode_String(char * ptr, CodeType code)
|
|
{
|
|
int count = 0;
|
|
while (code > 255) {
|
|
*ptr++ = dict[code].CharValue;
|
|
count++;
|
|
code = dict[code].ParentCode;
|
|
}
|
|
*ptr = (char)code;
|
|
count++;
|
|
return(count);
|
|
}
|
|
|
|
|
|
int LZW_Uncompress(Buffer const & inbuff, Buffer const & outbuff)
|
|
{
|
|
LZWEngine lzw;
|
|
return(lzw.Uncompress(inbuff, outbuff));
|
|
}
|
|
|
|
|
|
int LZW_Compress(Buffer const & inbuff, Buffer const & outbuff)
|
|
{
|
|
LZWEngine lzw;
|
|
return(lzw.Compress(inbuff, outbuff));
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
#ifdef NEVER
|
|
|
|
|
|
/*
|
|
* Constants used throughout the program. BITS defines how many bits
|
|
* will be in a code. TABLE_SIZE defines the size of the dictionary
|
|
* table.
|
|
*/
|
|
#define BITS 12
|
|
#define MAX_CODE ( ( 1 << BITS ) - 1 )
|
|
#define TABLE_SIZE 5021
|
|
#define END_OF_STREAM 256
|
|
#define FIRST_CODE 257
|
|
#define UNUSED -1
|
|
|
|
typedef unsigned short CodeType;
|
|
|
|
/*
|
|
* This data structure defines the dictionary. Each entry in the dictionary
|
|
* has a code value. This is the code emitted by the compressor. Each
|
|
* code is actually made up of two pieces: a parent_code, and a
|
|
* character. Code values of less than 256 are actually plain
|
|
* text codes.
|
|
*/
|
|
struct CodeClass
|
|
{
|
|
CodeType CodeValue;
|
|
CodeType ParentCode;
|
|
char CharValue;
|
|
|
|
CodeClass(void) {}
|
|
CodeClass(CodeType code, CodeType parent, char c) : CodeValue(code), ParentCode(parent), CharValue(c) {}
|
|
|
|
void Make_Unused(void) {CodeValue = UNUSED;}
|
|
bool Is_Unused(void) const {return(CodeValue == UNUSED);}
|
|
bool Is_Matching(CodeType code, char c) const {return(ParentCode == code && CharValue == c);}
|
|
};
|
|
CodeClass dict[TABLE_SIZE];
|
|
|
|
char decode_stack[TABLE_SIZE];
|
|
|
|
inline int Make_LZW_Hash(CodeType code, char character)
|
|
{
|
|
return((((int)(unsigned char)character) << ( BITS - 8 ) ) ^ (int)code);
|
|
}
|
|
|
|
|
|
/***********************************************************************************************
|
|
* Find_Child_Node -- Find a matching dictionary entry. *
|
|
* *
|
|
* This hashing routine is responsible for finding the table location *
|
|
* for a string/character combination. The table index is created *
|
|
* by using an exclusive OR combination of the prefix and character. *
|
|
* This code also has to check for collisions, and handles them by *
|
|
* jumping around in the table. *
|
|
* *
|
|
* INPUT: parent_code -- The code of the parent string sequence. *
|
|
* *
|
|
* character -- The current character. *
|
|
* *
|
|
* OUTPUT: Returns with the index for the matching dictionary entry. If no matching index *
|
|
* could be found, then it returns with the index to an unused dictionary entry. If *
|
|
* there are also no unused entries in the dictionary, then -1 is returned. *
|
|
* *
|
|
* WARNINGS: none *
|
|
* *
|
|
* HISTORY: *
|
|
* 08/28/1996 JLB : Created. *
|
|
*=============================================================================================*/
|
|
static int Find_Child_Node(CodeType parent_code, char child_character)
|
|
{
|
|
/*
|
|
** Fetch the first try index for the code and character.
|
|
*/
|
|
int hash_index = Make_LZW_Hash(parent_code, child_character);
|
|
|
|
/*
|
|
** Base the hash-miss-try-again-displacement value on the current
|
|
** index. [Shouldn't the value be some large prime number???].
|
|
*/
|
|
int offset = 1;
|
|
if (hash_index != 0) {
|
|
offset = TABLE_SIZE - hash_index;
|
|
}
|
|
|
|
/*
|
|
** Keep offsetting through the dictionary until an exact match is
|
|
** found for the code and character specified.
|
|
*/
|
|
int initial = hash_index;
|
|
while (!dict[hash_index].Is_Matching(parent_code, child_character)) {
|
|
|
|
/*
|
|
** Stop searching if an unused index is found since this means that
|
|
** a match doesn't exist in the table at all.
|
|
*/
|
|
if (dict[hash_index].Is_Unused()) break;
|
|
|
|
/*
|
|
** Bump the hash index to another value such that sequential bumps
|
|
** will not result in the same index value until all of the table
|
|
** has been scanned.
|
|
*/
|
|
hash_index -= offset;
|
|
if (hash_index < 0) {
|
|
hash_index += TABLE_SIZE;
|
|
}
|
|
|
|
/*
|
|
** If the entire table has been scanned and no match or unused
|
|
** entry was found, then return a special value indicating this
|
|
** condition.
|
|
*/
|
|
if (initial == hash_index) {
|
|
hash_index = -1;
|
|
break;
|
|
}
|
|
}
|
|
return(hash_index);
|
|
}
|
|
|
|
|
|
/*
|
|
* This routine decodes a string from the dictionary, and stores it
|
|
* in the decode_stack data structure. It returns a count to the
|
|
* calling program of how many characters were placed in the stack.
|
|
*/
|
|
static int Decode_String(char * ptr, CodeType code)
|
|
{
|
|
int count = 0;
|
|
while (code > 255) {
|
|
*ptr++ = dict[code].CharValue;
|
|
count++;
|
|
code = dict[code].ParentCode;
|
|
}
|
|
*ptr = (char)code;
|
|
count++;
|
|
return(count);
|
|
}
|
|
|
|
|
|
/*
|
|
* The compressor is short and simple. It reads in new symbols one
|
|
* at a time from the input file. It then checks to see if the
|
|
* combination of the current symbol and the current code are already
|
|
* defined in the dictionary. If they are not, they are added to the
|
|
* dictionary, and we start over with a new one symbol code. If they
|
|
* are, the code for the combination of the code and character becomes
|
|
* our new code.
|
|
*/
|
|
|
|
int LZW_Compress(Buffer & inbuff, Buffer & outbuff)
|
|
{
|
|
BufferStraw input(inbuff);
|
|
BufferPipe output(outbuff);
|
|
|
|
for (int i = 0; i < TABLE_SIZE; i++) {
|
|
dict[i].Make_Unused();
|
|
// dict[i].code_value = UNUSED;
|
|
}
|
|
|
|
int outcount = 0;
|
|
CodeType string_code = END_OF_STREAM;
|
|
CodeType next_code = FIRST_CODE;
|
|
for (;;) {
|
|
char character;
|
|
|
|
if (input.Get(&character, sizeof(character)) == 0) break;
|
|
|
|
int index = Find_Child_Node(string_code, character);
|
|
|
|
if (index == -1) {
|
|
string_code = character;
|
|
outcount += output.Put(&string_code, sizeof(string_code));
|
|
} else {
|
|
|
|
if (dict[index].CodeValue != -1) {
|
|
string_code = dict[ index ].CodeValue;
|
|
} else {
|
|
if (next_code <= MAX_CODE) {
|
|
dict[index] = CodeClass(next_code++, string_code, character);
|
|
}
|
|
outcount += output.Put(&string_code, sizeof(string_code));
|
|
string_code = character;
|
|
}
|
|
}
|
|
}
|
|
|
|
outcount += output.Put(&string_code, sizeof(string_code));
|
|
string_code = END_OF_STREAM;
|
|
outcount += output.Put(&string_code, sizeof(string_code));
|
|
|
|
return(outcount);
|
|
}
|
|
|
|
|
|
/*
|
|
* The file expander operates much like the encoder. It has to
|
|
* read in codes, the convert the codes to a string of characters.
|
|
* The only catch in the whole operation occurs when the encoder
|
|
* encounters a CHAR+STRING+CHAR+STRING+CHAR sequence. When this
|
|
* occurs, the encoder outputs a code that is not presently defined
|
|
* in the table. This is handled as an exception.
|
|
*/
|
|
int LZW_Uncompress(Buffer & inbuff, Buffer & outbuff)
|
|
{
|
|
int outcount = 0;
|
|
BufferStraw input(inbuff);
|
|
BufferPipe output(outbuff);
|
|
|
|
CodeType old_code;
|
|
if (input.Get(&old_code, sizeof(old_code)) == 0) {
|
|
return(outcount);
|
|
}
|
|
|
|
char character = (char)old_code;
|
|
outcount += output.Put(&character, sizeof(character));
|
|
|
|
unsigned int count;
|
|
CodeType new_code;
|
|
CodeType next_code = FIRST_CODE;
|
|
for (;;) {
|
|
if (input.Get(&new_code, sizeof(new_code)) == 0) break;
|
|
|
|
/*
|
|
** This code checks for the CHARACTER+STRING+CHARACTER+STRING+CHARACTER
|
|
** case which generates an undefined code. It handles it by decoding
|
|
** the last code, and adding a single character to the end of the decode string.
|
|
*/
|
|
if (new_code >= next_code) {
|
|
decode_stack[0] = character;
|
|
count = 1;
|
|
count += Decode_String(&decode_stack[1], old_code);
|
|
} else {
|
|
count = Decode_String(decode_stack, new_code);
|
|
}
|
|
|
|
character = decode_stack[count-1];
|
|
while (count > 0) {
|
|
--count;
|
|
outcount += output.Put(&decode_stack[count], sizeof(decode_stack[0]));
|
|
}
|
|
|
|
/*
|
|
** Add the new code sequence to the dictionary (presuming there is still
|
|
** room).
|
|
*/
|
|
if (next_code <= MAX_CODE) {
|
|
dict[next_code] = CodeClass(next_code, old_code, character);
|
|
next_code++;
|
|
}
|
|
old_code = new_code;
|
|
}
|
|
|
|
return(outcount);
|
|
}
|
|
|
|
#endif
|