extmod/uzlib: Update uzlib to v2.9.2.

Major changes include robust parsing of erroneous compressed streams and
updated API.
This commit is contained in:
Paul Sokolovsky 2018-11-30 23:36:49 +03:00 committed by Damien George
parent 3e25d611ef
commit b1cca8fbe0
8 changed files with 391 additions and 154 deletions

View File

@ -0,0 +1,45 @@
/*
* Copyright (c) uzlib authors
*
* This software is provided 'as-is', without any express
* or implied warranty. In no event will the authors be
* held liable for any damages arising from the use of
* this software.
*
* Permission is granted to anyone to use this software
* for any purpose, including commercial applications,
* and to alter it and redistribute it freely, subject to
* the following restrictions:
*
* 1. The origin of this software must not be
* misrepresented; you must not claim that you
* wrote the original software. If you use this
* software in a product, an acknowledgment in
* the product documentation would be appreciated
* but is not required.
*
* 2. Altered source versions must be plainly marked
* as such, and must not be misrepresented as
* being the original software.
*
* 3. This notice may not be removed or altered from
* any source distribution.
*/
/* This files contains type declaration and prototypes for defl_static.c.
They may be altered/distinct from the originals used in PuTTY source
code. */
struct Outbuf {
unsigned char *outbuf;
int outlen, outsize;
unsigned long outbits;
int noutbits;
int comp_disabled;
};
void outbits(struct Outbuf *out, unsigned long bits, int nbits);
void zlib_start_block(struct Outbuf *ctx);
void zlib_finish_block(struct Outbuf *ctx);
void zlib_literal(struct Outbuf *ectx, unsigned char c);
void zlib_match(struct Outbuf *ectx, int distance, int len);

View File

@ -1,117 +1,3 @@
/*
* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
*
* Copyright (c) 2003 by Joergen Ibsen / Jibz
* All Rights Reserved
* http://www.ibsensoftware.com/
*
* Copyright (c) 2014-2016 by Paul Sokolovsky
*/
#ifndef TINF_H_INCLUDED
#define TINF_H_INCLUDED
#include <stdint.h>
/* calling convention */
#ifndef TINFCC
#ifdef __WATCOMC__
#define TINFCC __cdecl
#else
#define TINFCC
#endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* ok status, more data produced */
#define TINF_OK 0
/* end of compressed stream reached */
#define TINF_DONE 1
#define TINF_DATA_ERROR (-3)
#define TINF_CHKSUM_ERROR (-4)
#define TINF_DICT_ERROR (-5)
/* checksum types */
#define TINF_CHKSUM_NONE 0
#define TINF_CHKSUM_ADLER 1
#define TINF_CHKSUM_CRC 2
/* data structures */
typedef struct {
unsigned short table[16]; /* table of code length counts */
unsigned short trans[288]; /* code -> symbol translation table */
} TINF_TREE;
struct TINF_DATA;
typedef struct TINF_DATA {
const unsigned char *source;
/* If source above is NULL, this function will be used to read
next byte from source stream */
unsigned char (*readSource)(struct TINF_DATA *data);
unsigned int tag;
unsigned int bitcount;
/* Buffer start */
unsigned char *destStart;
/* Buffer total size */
unsigned int destSize;
/* Current pointer in buffer */
unsigned char *dest;
/* Remaining bytes in buffer */
unsigned int destRemaining;
/* Accumulating checksum */
unsigned int checksum;
char checksum_type;
int btype;
int bfinal;
unsigned int curlen;
int lzOff;
unsigned char *dict_ring;
unsigned int dict_size;
unsigned int dict_idx;
TINF_TREE ltree; /* dynamic length/symbol tree */
TINF_TREE dtree; /* dynamic distance tree */
} TINF_DATA;
#define TINF_PUT(d, c) \
{ \
*d->dest++ = c; \
if (d->dict_ring) { d->dict_ring[d->dict_idx++] = c; if (d->dict_idx == d->dict_size) d->dict_idx = 0; } \
}
unsigned char TINFCC uzlib_get_byte(TINF_DATA *d);
/* Decompression API */
void TINFCC uzlib_init(void);
void TINFCC uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen);
int TINFCC uzlib_uncompress(TINF_DATA *d);
int TINFCC uzlib_uncompress_chksum(TINF_DATA *d);
int TINFCC uzlib_zlib_parse_header(TINF_DATA *d);
int TINFCC uzlib_gzip_parse_header(TINF_DATA *d);
/* Compression API */
void TINFCC uzlib_compress(void *data, const uint8_t *src, unsigned slen);
/* Checksum API */
/* prev_sum is previous value for incremental computation, 1 initially */
uint32_t TINFCC uzlib_adler32(const void *data, unsigned int length, uint32_t prev_sum);
/* crc is previous value for incremental computation, 0xffffffff initially */
uint32_t TINFCC uzlib_crc32(const void *data, unsigned int length, uint32_t crc);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* TINF_H_INCLUDED */
/* Compatibility header for the original tinf lib/older versions of uzlib.
Note: may be removed in the future, please migrate to uzlib.h. */
#include "uzlib.h"

View File

@ -0,0 +1,9 @@
/* This header contains compatibility defines for the original tinf API
and uzlib 2.x and below API. These defines are deprecated and going
to be removed in the future, so applications should migrate to new
uzlib API. */
#define TINF_DATA struct uzlib_uncomp
#define destSize dest_size
#define destStart dest_start
#define readSource source_read_cb

View File

@ -1,12 +1,12 @@
/*
* tinfgzip - tiny gzip decompressor
* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
*
* Copyright (c) 2003 by Joergen Ibsen / Jibz
* All Rights Reserved
*
* http://www.ibsensoftware.com/
*
* Copyright (c) 2014-2016 by Paul Sokolovsky
* Copyright (c) 2014-2018 by Paul Sokolovsky
*
* This software is provided 'as-is', without any express
* or implied warranty. In no event will the authors be

View File

@ -1,11 +1,11 @@
/*
* tinflate - tiny inflate
* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
*
* Copyright (c) 2003 by Joergen Ibsen / Jibz
* All Rights Reserved
* http://www.ibsensoftware.com/
*
* Copyright (c) 2014-2016 by Paul Sokolovsky
* Copyright (c) 2014-2018 by Paul Sokolovsky
*
* This software is provided 'as-is', without any express
* or implied warranty. In no event will the authors be
@ -35,6 +35,15 @@
#include <assert.h>
#include "tinf.h"
#define UZLIB_DUMP_ARRAY(heading, arr, size) \
{ \
printf("%s", heading); \
for (int i = 0; i < size; ++i) { \
printf(" %d", (arr)[i]); \
} \
printf("\n"); \
}
uint32_t tinf_get_le_uint32(TINF_DATA *d);
uint32_t tinf_get_be_uint32(TINF_DATA *d);
@ -149,6 +158,13 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned
/* scan symbol lengths, and sum code length counts */
for (i = 0; i < num; ++i) t->table[lengths[i]]++;
#if UZLIB_CONF_DEBUG_LOG >= 2
UZLIB_DUMP_ARRAY("codelen counts:", t->table, TINF_ARRAY_SIZE(t->table));
#endif
/* In the lengths array, 0 means unused code. So, t->table[0] now contains
number of unused codes. But table's purpose is to contain # of codes of
particular length, and there're 0 codes of length 0. */
t->table[0] = 0;
/* compute offset table for distribution sort */
@ -158,6 +174,10 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned
sum += t->table[i];
}
#if UZLIB_CONF_DEBUG_LOG >= 2
UZLIB_DUMP_ARRAY("codelen offsets:", offs, TINF_ARRAY_SIZE(offs));
#endif
/* create code->symbol translation table (symbols sorted by code) */
for (i = 0; i < num; ++i)
{
@ -171,10 +191,28 @@ static void tinf_build_tree(TINF_TREE *t, const unsigned char *lengths, unsigned
unsigned char uzlib_get_byte(TINF_DATA *d)
{
if (d->source) {
/* If end of source buffer is not reached, return next byte from source
buffer. */
if (d->source < d->source_limit) {
return *d->source++;
}
return d->readSource(d);
/* Otherwise if there's callback and we haven't seen EOF yet, try to
read next byte using it. (Note: the callback can also update ->source
and ->source_limit). */
if (d->readSource && !d->eof) {
int val = d->readSource(d);
if (val >= 0) {
return (unsigned char)val;
}
}
/* Otherwise, we hit EOF (either from ->readSource() or from exhaustion
of the buffer), and it will be "sticky", i.e. further calls to this
function will end up here too. */
d->eof = true;
return 0;
}
uint32_t tinf_get_le_uint32(TINF_DATA *d)
@ -182,7 +220,7 @@ uint32_t tinf_get_le_uint32(TINF_DATA *d)
uint32_t val = 0;
int i;
for (i = 4; i--;) {
val = val >> 8 | uzlib_get_byte(d) << 24;
val = val >> 8 | ((uint32_t)uzlib_get_byte(d)) << 24;
}
return val;
}
@ -245,21 +283,31 @@ static int tinf_decode_symbol(TINF_DATA *d, TINF_TREE *t)
cur = 2*cur + tinf_getbit(d);
++len;
if (++len == TINF_ARRAY_SIZE(t->table)) {
return TINF_DATA_ERROR;
}
sum += t->table[len];
cur -= t->table[len];
} while (cur >= 0);
return t->trans[sum + cur];
sum += cur;
#if UZLIB_CONF_PARANOID_CHECKS
if (sum < 0 || sum >= TINF_ARRAY_SIZE(t->trans)) {
return TINF_DATA_ERROR;
}
#endif
return t->trans[sum];
}
/* given a data stream, decode dynamic trees from it */
static void tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
static int tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
{
/* code lengths for 288 literal/len symbols and 32 dist symbols */
unsigned char lengths[288+32];
unsigned int hlit, hdist, hclen;
unsigned int hlit, hdist, hclen, hlimit;
unsigned int i, num, length;
/* get 5 bits HLIT (257-286) */
@ -286,53 +334,75 @@ static void tinf_decode_trees(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
tinf_build_tree(lt, lengths, 19);
/* decode code lengths for the dynamic trees */
for (num = 0; num < hlit + hdist; )
hlimit = hlit + hdist;
for (num = 0; num < hlimit; )
{
int sym = tinf_decode_symbol(d, lt);
unsigned char fill_value = 0;
int lbits, lbase = 3;
/* error decoding */
if (sym < 0) return sym;
switch (sym)
{
case 16:
/* copy previous code length 3-6 times (read 2 bits) */
{
unsigned char prev = lengths[num - 1];
for (length = tinf_read_bits(d, 2, 3); length; --length)
{
lengths[num++] = prev;
}
}
if (num == 0) return TINF_DATA_ERROR;
fill_value = lengths[num - 1];
lbits = 2;
break;
case 17:
/* repeat code length 0 for 3-10 times (read 3 bits) */
for (length = tinf_read_bits(d, 3, 3); length; --length)
{
lengths[num++] = 0;
}
lbits = 3;
break;
case 18:
/* repeat code length 0 for 11-138 times (read 7 bits) */
for (length = tinf_read_bits(d, 7, 11); length; --length)
{
lengths[num++] = 0;
}
lbits = 7;
lbase = 11;
break;
default:
/* values 0-15 represent the actual code lengths */
lengths[num++] = sym;
break;
/* continue the for loop */
continue;
}
/* special code length 16-18 are handled here */
length = tinf_read_bits(d, lbits, lbase);
if (num + length > hlimit) return TINF_DATA_ERROR;
for (; length; --length)
{
lengths[num++] = fill_value;
}
}
#if UZLIB_CONF_DEBUG_LOG >= 2
printf("lit code lengths (%d):", hlit);
UZLIB_DUMP_ARRAY("", lengths, hlit);
printf("dist code lengths (%d):", hdist);
UZLIB_DUMP_ARRAY("", lengths + hlit, hdist);
#endif
#if UZLIB_CONF_PARANOID_CHECKS
/* Check that there's "end of block" symbol */
if (lengths[256] == 0) {
return TINF_DATA_ERROR;
}
#endif
/* build dynamic trees */
tinf_build_tree(lt, lengths, hlit);
tinf_build_tree(dt, lengths + hlit, hdist);
return TINF_OK;
}
/* ----------------------------- *
* -- block inflate functions -- *
* ----------------------------- */
/* given a stream and two trees, inflate a block of data */
/* given a stream and two trees, inflate next byte of output */
static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
{
if (d->curlen == 0) {
@ -341,6 +411,10 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
int sym = tinf_decode_symbol(d, lt);
//printf("huff sym: %02x\n", sym);
if (d->eof) {
return TINF_DATA_ERROR;
}
/* literal byte */
if (sym < 256) {
TINF_PUT(d, sym);
@ -354,21 +428,45 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
/* substring from sliding dictionary */
sym -= 257;
if (sym >= 29) {
return TINF_DATA_ERROR;
}
/* possibly get more bits from length code */
d->curlen = tinf_read_bits(d, length_bits[sym], length_base[sym]);
dist = tinf_decode_symbol(d, dt);
if (dist >= 30) {
return TINF_DATA_ERROR;
}
/* possibly get more bits from distance code */
offs = tinf_read_bits(d, dist_bits[dist], dist_base[dist]);
/* calculate and validate actual LZ offset to use */
if (d->dict_ring) {
if (offs > d->dict_size) {
return TINF_DICT_ERROR;
}
/* Note: unlike full-dest-in-memory case below, we don't
try to catch offset which points to not yet filled
part of the dictionary here. Doing so would require
keeping another variable to track "filled in" size
of the dictionary. Appearance of such an offset cannot
lead to accessing memory outside of the dictionary
buffer, and clients which don't want to leak unrelated
information, should explicitly initialize dictionary
buffer passed to uzlib. */
d->lzOff = d->dict_idx - offs;
if (d->lzOff < 0) {
d->lzOff += d->dict_size;
}
} else {
/* catch trying to point before the start of dest buffer */
if (offs > d->dest - d->destStart) {
return TINF_DATA_ERROR;
}
d->lzOff = -offs;
}
}
@ -387,7 +485,7 @@ static int tinf_inflate_block_data(TINF_DATA *d, TINF_TREE *lt, TINF_TREE *dt)
return TINF_OK;
}
/* inflate an uncompressed block of data */
/* inflate next byte from uncompressed block of data */
static int tinf_inflate_uncompressed_block(TINF_DATA *d)
{
if (d->curlen == 0) {
@ -440,6 +538,7 @@ void uzlib_init(void)
/* initialize decompression structure */
void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen)
{
d->eof = 0;
d->bitcount = 0;
d->bfinal = 0;
d->btype = -1;
@ -449,7 +548,7 @@ void uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen)
d->curlen = 0;
}
/* inflate next byte of compressed stream */
/* inflate next output bytes from compressed stream */
int uzlib_uncompress(TINF_DATA *d)
{
do {
@ -463,14 +562,19 @@ next_blk:
/* read block type (2 bits) */
d->btype = tinf_read_bits(d, 2, 0);
//printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal);
#if UZLIB_CONF_DEBUG_LOG >= 1
printf("Started new block: type=%d final=%d\n", d->btype, d->bfinal);
#endif
if (d->btype == 1) {
/* build fixed huffman trees */
tinf_build_fixed_trees(&d->ltree, &d->dtree);
} else if (d->btype == 2) {
/* decode trees from stream */
tinf_decode_trees(d, &d->ltree, &d->dtree);
res = tinf_decode_trees(d, &d->ltree, &d->dtree);
if (res != TINF_OK) {
return res;
}
}
}
@ -483,7 +587,7 @@ next_blk:
break;
case 1:
case 2:
/* decompress block with fixed/dyanamic huffman trees */
/* decompress block with fixed/dynamic huffman trees */
/* trees were decoded previously, so it's the same routine for both */
res = tinf_inflate_block_data(d, &d->ltree, &d->dtree);
break;
@ -501,11 +605,13 @@ next_blk:
return res;
}
} while (--d->destSize);
} while (d->dest < d->dest_limit);
return TINF_OK;
}
/* inflate next output bytes from compressed stream, updating
checksum, and at the end of stream, verify it */
int uzlib_uncompress_chksum(TINF_DATA *d)
{
int res;

View File

@ -1,12 +1,12 @@
/*
* tinfzlib - tiny zlib decompressor
* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
*
* Copyright (c) 2003 by Joergen Ibsen / Jibz
* All Rights Reserved
*
* http://www.ibsensoftware.com/
*
* Copyright (c) 2014-2016 by Paul Sokolovsky
* Copyright (c) 2014-2018 by Paul Sokolovsky
*
* This software is provided 'as-is', without any express
* or implied warranty. In no event will the authors be

169
extmod/uzlib/uzlib.h Normal file
View File

@ -0,0 +1,169 @@
/*
* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
*
* Copyright (c) 2003 by Joergen Ibsen / Jibz
* All Rights Reserved
* http://www.ibsensoftware.com/
*
* Copyright (c) 2014-2018 by Paul Sokolovsky
*
* This software is provided 'as-is', without any express
* or implied warranty. In no event will the authors be
* held liable for any damages arising from the use of
* this software.
*
* Permission is granted to anyone to use this software
* for any purpose, including commercial applications,
* and to alter it and redistribute it freely, subject to
* the following restrictions:
*
* 1. The origin of this software must not be
* misrepresented; you must not claim that you
* wrote the original software. If you use this
* software in a product, an acknowledgment in
* the product documentation would be appreciated
* but is not required.
*
* 2. Altered source versions must be plainly marked
* as such, and must not be misrepresented as
* being the original software.
*
* 3. This notice may not be removed or altered from
* any source distribution.
*/
#ifndef UZLIB_H_INCLUDED
#define UZLIB_H_INCLUDED
#include <stdlib.h>
#include <stdint.h>
#include <stdbool.h>
#include "defl_static.h"
#include "uzlib_conf.h"
#if UZLIB_CONF_DEBUG_LOG
#include <stdio.h>
#endif
/* calling convention */
#ifndef TINFCC
#ifdef __WATCOMC__
#define TINFCC __cdecl
#else
#define TINFCC
#endif
#endif
#ifdef __cplusplus
extern "C" {
#endif
/* ok status, more data produced */
#define TINF_OK 0
/* end of compressed stream reached */
#define TINF_DONE 1
#define TINF_DATA_ERROR (-3)
#define TINF_CHKSUM_ERROR (-4)
#define TINF_DICT_ERROR (-5)
/* checksum types */
#define TINF_CHKSUM_NONE 0
#define TINF_CHKSUM_ADLER 1
#define TINF_CHKSUM_CRC 2
/* helper macros */
#define TINF_ARRAY_SIZE(arr) (sizeof(arr) / sizeof(*(arr)))
/* data structures */
typedef struct {
unsigned short table[16]; /* table of code length counts */
unsigned short trans[288]; /* code -> symbol translation table */
} TINF_TREE;
struct uzlib_uncomp {
/* Pointer to the next byte in the input buffer */
const unsigned char *source;
/* Pointer to the next byte past the input buffer (source_limit = source + len) */
const unsigned char *source_limit;
/* If source_limit == NULL, or source >= source_limit, this function
will be used to read next byte from source stream. The function may
also return -1 in case of EOF (or irrecoverable error). Note that
besides returning the next byte, it may also update source and
source_limit fields, thus allowing for buffered operation. */
int (*source_read_cb)(struct uzlib_uncomp *uncomp);
unsigned int tag;
unsigned int bitcount;
/* Destination (output) buffer start */
unsigned char *dest_start;
/* Current pointer in dest buffer */
unsigned char *dest;
/* Pointer past the end of the dest buffer, similar to source_limit */
unsigned char *dest_limit;
/* Accumulating checksum */
unsigned int checksum;
char checksum_type;
bool eof;
int btype;
int bfinal;
unsigned int curlen;
int lzOff;
unsigned char *dict_ring;
unsigned int dict_size;
unsigned int dict_idx;
TINF_TREE ltree; /* dynamic length/symbol tree */
TINF_TREE dtree; /* dynamic distance tree */
};
#include "tinf_compat.h"
#define TINF_PUT(d, c) \
{ \
*d->dest++ = c; \
if (d->dict_ring) { d->dict_ring[d->dict_idx++] = c; if (d->dict_idx == d->dict_size) d->dict_idx = 0; } \
}
unsigned char TINFCC uzlib_get_byte(TINF_DATA *d);
/* Decompression API */
void TINFCC uzlib_init(void);
void TINFCC uzlib_uncompress_init(TINF_DATA *d, void *dict, unsigned int dictLen);
int TINFCC uzlib_uncompress(TINF_DATA *d);
int TINFCC uzlib_uncompress_chksum(TINF_DATA *d);
int TINFCC uzlib_zlib_parse_header(TINF_DATA *d);
int TINFCC uzlib_gzip_parse_header(TINF_DATA *d);
/* Compression API */
typedef const uint8_t *uzlib_hash_entry_t;
struct uzlib_comp {
struct Outbuf out;
uzlib_hash_entry_t *hash_table;
unsigned int hash_bits;
unsigned int dict_size;
};
void TINFCC uzlib_compress(struct uzlib_comp *c, const uint8_t *src, unsigned slen);
/* Checksum API */
/* prev_sum is previous value for incremental computation, 1 initially */
uint32_t TINFCC uzlib_adler32(const void *data, unsigned int length, uint32_t prev_sum);
/* crc is previous value for incremental computation, 0xffffffff initially */
uint32_t TINFCC uzlib_crc32(const void *data, unsigned int length, uint32_t crc);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* UZLIB_H_INCLUDED */

22
extmod/uzlib/uzlib_conf.h Normal file
View File

@ -0,0 +1,22 @@
/*
* uzlib - tiny deflate/inflate library (deflate, gzip, zlib)
*
* Copyright (c) 2014-2018 by Paul Sokolovsky
*/
#ifndef UZLIB_CONF_H_INCLUDED
#define UZLIB_CONF_H_INCLUDED
#ifndef UZLIB_CONF_DEBUG_LOG
/* Debug logging level 0, 1, 2, etc. */
#define UZLIB_CONF_DEBUG_LOG 0
#endif
#ifndef UZLIB_CONF_PARANOID_CHECKS
/* Perform extra checks on the input stream, even if they aren't proven
to be strictly required (== lack of them wasn't proven to lead to
crashes). */
#define UZLIB_CONF_PARANOID_CHECKS 0
#endif
#endif /* UZLIB_CONF_H_INCLUDED */