lib/uzlib: For matches of the same length, take the closest one.

Signed-off-by: Damien George <damien@micropython.org>
This commit is contained in:
Damien George 2023-11-29 11:40:21 +11:00
parent e182f3862e
commit 6ba57f760c
1 changed files with 6 additions and 2 deletions

View File

@ -28,13 +28,13 @@ void uzlib_lz77_init(uzlib_lz77_state_t *state, uint8_t *hist, size_t hist_max)
state->hist_len = 0; state->hist_len = 0;
} }
// Push the given byte to the history.
// Search back in the history for the maximum match of the given src data, // Search back in the history for the maximum match of the given src data,
// with support for searching beyond the end of the history and into the src buffer // with support for searching beyond the end of the history and into the src buffer
// (effectively the history and src buffer are concatenated). // (effectively the history and src buffer are concatenated).
static size_t uzlib_lz77_search_max_match(uzlib_lz77_state_t *state, const uint8_t *src, size_t len, size_t *longest_offset) { static size_t uzlib_lz77_search_max_match(uzlib_lz77_state_t *state, const uint8_t *src, size_t len, size_t *longest_offset) {
size_t longest_len = 0; size_t longest_len = 0;
for (size_t hist_search = 0; hist_search < state->hist_len; ++hist_search) { for (size_t hist_search = 0; hist_search < state->hist_len; ++hist_search) {
// Search for a match.
size_t match_len; size_t match_len;
for (match_len = 0; match_len <= MATCH_LEN_MAX && match_len < len; ++match_len) { for (match_len = 0; match_len <= MATCH_LEN_MAX && match_len < len; ++match_len) {
uint8_t hist; uint8_t hist;
@ -47,7 +47,11 @@ static size_t uzlib_lz77_search_max_match(uzlib_lz77_state_t *state, const uint8
break; break;
} }
} }
if (match_len >= MATCH_LEN_MIN && match_len > longest_len) {
// Take this match if its length is at least the minimum, and larger than previous matches.
// If the length is the same as the previous longest then take this match as well, because
// this match will be closer (more recent in the history) and take less bits to encode.
if (match_len >= MATCH_LEN_MIN && match_len >= longest_len) {
longest_len = match_len; longest_len = match_len;
*longest_offset = state->hist_len - hist_search; *longest_offset = state->hist_len - hist_search;
} }