Merge pull request #8468 from s-hadinger/unishox_class

Unishox code optimized
This commit is contained in:
Theo Arends 2020-05-17 18:24:00 +02:00 committed by GitHub
commit 80ad999a38
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 174 additions and 169 deletions

View File

@ -153,52 +153,47 @@ const uint16_t BIN_CODE_TASMOTA_LEN = 3;
// uint16_t mask[] PROGMEM = {0x8000, 0xC000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00}; // uint16_t mask[] PROGMEM = {0x8000, 0xC000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00};
uint8_t mask[] PROGMEM = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF}; uint8_t mask[] PROGMEM = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF};
int append_bits(char *out, size_t ol, unsigned int code, int clen, byte state) {
byte cur_bit;
byte blen;
unsigned char a_byte;
if (state == SHX_STATE_2) { void Unishox::append_bits(unsigned int code, int clen) {
// remove change state prefix
if ((code >> 9) == 0x1C) { byte cur_bit;
code <<= 7; byte blen;
clen -= 7; unsigned char a_byte;
if (state == SHX_STATE_2) {
// remove change state prefix
if ((code >> 9) == 0x1C) {
code <<= 7;
clen -= 7;
}
}
while (clen > 0) {
cur_bit = ol % 8;
blen = (clen > 8 ? 8 : clen);
a_byte = (code >> 8) & pgm_read_word(&mask[blen - 1]);
a_byte >>= cur_bit;
if (blen + cur_bit > 8)
blen = (8 - cur_bit);
if (out) { // if out == nullptr, then we are in dry-run mode
if (cur_bit == 0)
out[ol >> 3] = a_byte;
else
out[ol >> 3] |= a_byte;
}
code <<= blen;
ol += blen;
if ((out) && (0 == ol % 8)) { // if out == nullptr, dry-run mode. We miss the escaping of characters in the length
// we completed a full byte
char last_c = out[(ol / 8) - 1];
if ((0 == last_c) || (ESCAPE_MARKER == last_c)) {
out[ol >> 3] = 1 + last_c; // increment to 0x01 or 0x2B
out[(ol >>3) -1] = ESCAPE_MARKER; // replace old value with marker
ol += 8; // add one full byte
} }
//if (code == 14272 && clen == 10) { }
// code = 9084; clen -= blen;
// clen = 14; }
//}
}
while (clen > 0) {
cur_bit = ol % 8;
blen = (clen > 8 ? 8 : clen);
// a_byte = (code & pgm_read_word(&mask[blen - 1])) >> 8;
// a_byte = (code & (pgm_read_word(&mask[blen - 1]) << 8)) >> 8;
a_byte = (code >> 8) & pgm_read_word(&mask[blen - 1]);
a_byte >>= cur_bit;
if (blen + cur_bit > 8)
blen = (8 - cur_bit);
if (out) { // if out == nullptr, then we are in dry-run mode
if (cur_bit == 0)
out[ol / 8] = a_byte;
else
out[ol / 8] |= a_byte;
}
code <<= blen;
ol += blen;
if ((out) && (0 == ol % 8)) { // if out == nullptr, dry-run mode. We miss the escaping of characters in the length
// we completed a full byte
char last_c = out[(ol / 8) - 1];
if ((0 == last_c) || (ESCAPE_MARKER == last_c)) {
out[ol / 8] = 1 + last_c; // increment to 0x01 or 0x2B
out[(ol / 8) -1] = ESCAPE_MARKER; // replace old value with marker
ol += 8; // add one full byte
}
}
clen -= blen;
}
return ol;
} }
// First five bits are code and Last three bits of codes represent length // First five bits are code and Last three bits of codes represent length
@ -210,40 +205,36 @@ byte codes[] PROGMEM = { 0x82, 0xC3, 0xE5, 0xED, 0xF5 };
byte bit_len[] PROGMEM = { 5, 7, 9, 12, 16 }; byte bit_len[] PROGMEM = { 5, 7, 9, 12, 16 };
// uint16_t adder[7] PROGMEM = { 0, 32, 160, 672, 4768 }; // no more used // uint16_t adder[7] PROGMEM = { 0, 32, 160, 672, 4768 }; // no more used
int encodeCount(char *out, int ol, int count) { void Unishox::encodeCount(int32_t count) {
int till = 0; int till = 0;
int base = 0; int base = 0;
for (int i = 0; i < sizeof(bit_len); i++) { for (uint32_t i = 0; i < sizeof(bit_len); i++) {
uint32_t bit_len_i = pgm_read_byte(&bit_len[i]); uint32_t bit_len_i = pgm_read_byte(&bit_len[i]);
till += (1 << bit_len_i); till += (1 << bit_len_i);
if (count < till) { if (count < till) {
byte codes_i = pgm_read_byte(&codes[i]); byte codes_i = pgm_read_byte(&codes[i]);
ol = append_bits(out, ol, (codes_i & 0xF8) << 8, codes_i & 0x07, 1); append_bits((codes_i & 0xF8) << 8, codes_i & 0x07);
// ol = append_bits(out, ol, (count - pgm_read_word(&adder[i])) << (16 - bit_len_i), bit_len_i, 1); // ol = append_bits(out, ol, (count - pgm_read_word(&adder[i])) << (16 - bit_len_i), bit_len_i, 1);
ol = append_bits(out, ol, (count - base) << (16 - bit_len_i), bit_len_i, 1); append_bits((count - base) << (16 - bit_len_i), bit_len_i);
return ol; return;
} }
base = till; base = till;
} }
return ol; return;
} }
int matchOccurance(const char *in, int len, int l, char *out, int *ol, byte *state, byte *is_all_upper) { bool Unishox::matchOccurance(void) {
int j, k; int32_t j, k;
int longest_dist = 0; uint32_t longest_dist = 0;
int longest_len = 0; uint32_t longest_len = 0;
for (j = l - NICE_LEN; j >= 0; j--) { for (j = l - NICE_LEN; j >= 0; j--) {
for (k = l; k < len && j + k - l < l; k++) { for (k = l; k < len && j + k - l < l; k++) {
if (in[k] != in[j + k - l]) if (in[k] != in[j + k - l])
break; break;
} }
// while ((((unsigned char) in[k]) >> 6) == 2)
// k--; // Skip partial UTF-8 matches
//if ((in[k - 1] >> 3) == 0x1E || (in[k - 1] >> 4) == 0x0E || (in[k - 1] >> 5) == 0x06)
// k--;
if (k - l > NICE_LEN - 1) { if (k - l > NICE_LEN - 1) {
int match_len = k - l - NICE_LEN; uint32_t match_len = k - l - NICE_LEN;
int match_dist = l - j - NICE_LEN + 1; uint32_t match_dist = l - j - NICE_LEN + 1;
if (match_len > longest_len) { if (match_len > longest_len) {
longest_len = match_len; longest_len = match_len;
longest_dist = match_dist; longest_dist = match_dist;
@ -251,19 +242,18 @@ int matchOccurance(const char *in, int len, int l, char *out, int *ol, byte *sta
} }
} }
if (longest_len) { if (longest_len) {
if (*state == SHX_STATE_2 || *is_all_upper) { if (state == SHX_STATE_2 || is_all_upper) {
*is_all_upper = 0; is_all_upper = 0;
*state = SHX_STATE_1; state = SHX_STATE_1;
*ol = append_bits(out, *ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, *state); append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN);
} }
*ol = append_bits(out, *ol, DICT_CODE, DICT_CODE_LEN, 1); append_bits(DICT_CODE, DICT_CODE_LEN);
*ol = encodeCount(out, *ol, longest_len); encodeCount(longest_len);
*ol = encodeCount(out, *ol, longest_dist); encodeCount(longest_dist);
l += (longest_len + NICE_LEN); l += longest_len + NICE_LEN - 1;
l--; return true;
return l;
} }
return -l; return false;
} }
// Compress a buffer. // Compress a buffer.
@ -275,15 +265,18 @@ int matchOccurance(const char *in, int len, int l, char *out, int *ol, byte *sta
// Output: // Output:
// - if >= 0: size of the compressed buffer. The output buffer does not contain NULL bytes, and it is not NULL terminated // - if >= 0: size of the compressed buffer. The output buffer does not contain NULL bytes, and it is not NULL terminated
// - if < 0: an error occured, most certainly the output buffer was not large enough // - if < 0: an error occured, most certainly the output buffer was not large enough
int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out) { int32_t Unishox::unishox_compress(const char *p_in, size_t p_len, char *p_out, size_t p_len_out) {
in = p_in;
len = p_len;
out = p_out;
len_out = p_len_out;
char *ptr; char *ptr;
byte bits; byte bits;
byte state;
int l, ll, ol; int ll;
char c_in, c_next; char c_in, c_next;
byte is_upper, is_all_upper; byte is_upper;
ol = 0; ol = 0;
state = SHX_STATE_1; state = SHX_STATE_1;
@ -302,23 +295,20 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
if (state == SHX_STATE_2 || is_all_upper) { if (state == SHX_STATE_2 || is_all_upper) {
is_all_upper = 0; is_all_upper = 0;
state = SHX_STATE_1; state = SHX_STATE_1;
ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state); // back to lower case and Set1 append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN); // back to lower case and Set1
} }
// ol = append_bits(out, ol, RPT_CODE, RPT_CODE_LEN, 1); // ol = append_bits(out, ol, RPT_CODE, RPT_CODE_LEN, 1);
ol = append_bits(out, ol, RPT_CODE_TASMOTA, RPT_CODE_TASMOTA_LEN, 1); // reusing CRLF for RPT append_bits(RPT_CODE_TASMOTA, RPT_CODE_TASMOTA_LEN); // reusing CRLF for RPT
ol = encodeCount(out, ol, rpt_count - 4); encodeCount(rpt_count - 4);
l += rpt_count; l += rpt_count - 1;
l--;
continue; continue;
} }
} }
if (l < (len - NICE_LEN + 1)) { if (l < (len - NICE_LEN + 1)) {
l = matchOccurance(in, len, l, out, &ol, &state, &is_all_upper); if (matchOccurance()) {
if (l > 0) { continue;
continue; }
}
l = -l;
} }
if (state == SHX_STATE_2) { // if Set2 if (state == SHX_STATE_2) { // if Set2
if ((c_in >= ' ' && c_in <= '@') || if ((c_in >= ' ' && c_in <= '@') ||
@ -326,7 +316,7 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
(c_in >= '{' && c_in <= '~')) { (c_in >= '{' && c_in <= '~')) {
} else { } else {
state = SHX_STATE_1; // back to Set1 and lower case state = SHX_STATE_1; // back to Set1 and lower case
ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state); append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN);
} }
} }
@ -336,7 +326,7 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
else { else {
if (is_all_upper) { if (is_all_upper) {
is_all_upper = 0; is_all_upper = 0;
ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state); append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN);
} }
} }
@ -351,37 +341,30 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
break; break;
} }
if (ll == l-1) { if (ll == l-1) {
ol = append_bits(out, ol, ALL_UPPER_CODE, ALL_UPPER_CODE_LEN, state); // CapsLock append_bits(ALL_UPPER_CODE, ALL_UPPER_CODE_LEN); // CapsLock
is_all_upper = 1; is_all_upper = 1;
} }
} }
if (state == SHX_STATE_1 && c_in >= '0' && c_in <= '9') { if (state == SHX_STATE_1 && c_in >= '0' && c_in <= '9') {
ol = append_bits(out, ol, SW2_STATE2_CODE, SW2_STATE2_CODE_LEN, state); // Switch to sticky Set2 append_bits(SW2_STATE2_CODE, SW2_STATE2_CODE_LEN); // Switch to sticky Set2
state = SHX_STATE_2; state = SHX_STATE_2;
} }
c_in -= 32; c_in -= 32;
if (is_all_upper && is_upper) if (is_all_upper && is_upper)
c_in += 32; c_in += 32;
if (c_in == 0 && state == SHX_STATE_2) if (c_in == 0 && state == SHX_STATE_2)
ol = append_bits(out, ol, ST2_SPC_CODE, ST2_SPC_CODE_LEN, state); // space from Set2 ionstead of Set1 append_bits(ST2_SPC_CODE, ST2_SPC_CODE_LEN); // space from Set2 ionstead of Set1
else { else {
// ol = append_bits(out, ol, pgm_read_word(&c_95[c_in]), pgm_read_byte(&l_95[c_in]), state); // original version with c/l in split arrays
uint16_t cl = pgm_read_word(&cl_95[c_in]); uint16_t cl = pgm_read_word(&cl_95[c_in]);
ol = append_bits(out, ol, cl & 0xFFF0, cl & 0x000F, state); append_bits(cl & 0xFFF0, cl & 0x000F);
} }
} else } else if (c_in == 10) {
// if (c_in == 13 && c_next == 10) { // CRLF disabled append_bits(LF_CODE, LF_CODE_LEN); // LF
// ol = append_bits(out, ol, CRLF_CODE, CRLF_CODE_LEN, state); // CRLF } else if (c_in == '\t') {
// l++; append_bits(TAB_CODE, TAB_CODE_LEN); // TAB
// } else
if (c_in == 10) {
ol = append_bits(out, ol, LF_CODE, LF_CODE_LEN, state); // LF
} else
if (c_in == '\t') {
ol = append_bits(out, ol, TAB_CODE, TAB_CODE_LEN, state); // TAB
} else { } else {
ol = append_bits(out, ol, BIN_CODE_TASMOTA, BIN_CODE_TASMOTA_LEN, state); // Binary, we reuse the Unicode marker which 3 bits instead of 9 append_bits(BIN_CODE_TASMOTA, BIN_CODE_TASMOTA_LEN); // Binary, we reuse the Unicode marker which 3 bits instead of 9
ol = encodeCount(out, ol, (unsigned char) 255 - c_in); encodeCount((unsigned char) 255 - c_in);
} }
// check that we have some headroom in the output buffer // check that we have some headroom in the output buffer
@ -392,50 +375,46 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
bits = ol % 8; bits = ol % 8;
if (bits) { if (bits) {
ol = append_bits(out, ol, TERM_CODE, 8 - bits, 1); // 0011 0111 1100 0000 TERM = 0011 0111 11 state = SHX_STATE_1;
append_bits(TERM_CODE, 8 - bits); // 0011 0111 1100 0000 TERM = 0011 0111 11
} }
return ol/8+(ol%8?1:0); return ol/8+(ol%8?1:0);
} }
int getBitVal(const char *in, int bit_no, int count) { uint32_t Unishox::getNextBit(void) {
char c_in = in[bit_no >> 3]; if (8 == bit_no) {
if ((bit_no >> 3) && (ESCAPE_MARKER == in[(bit_no >> 3) - 1])) { // if previous byte is a marker, decrement byte_in = in[byte_no++];
c_in--; if (ESCAPE_MARKER == byte_in) {
byte_in = in[byte_no++] - 1;
}
bit_no = 0;
} }
return (c_in & (0x80 >> (bit_no % 8)) ? 1 << count : 0); return byte_in & (0x80 >> bit_no++) ? 1 : 0;
} }
// Returns: // Returns:
// 0..11 // 0..11
// or -1 if end of stream // or -1 if end of stream
int getCodeIdx(char *code_type, const char *in, int len, int *bit_no_p) { int32_t Unishox::getCodeIdx(const char *code_type) {
int code = 0; int32_t code = 0;
int count = 0; int32_t count = 0;
do { do {
// detect marker if (bit_no >= len)
if (ESCAPE_MARKER == in[*bit_no_p >> 3]) {
*bit_no_p += 8; // skip marker
}
if (*bit_no_p >= len)
return -1; // invalid state return -1; // invalid state
code += getBitVal(in, *bit_no_p, count); code += getNextBit() << count;
(*bit_no_p)++;
count++; count++;
uint8_t code_type_code = pgm_read_byte(&code_type[code]); uint8_t code_type_code = pgm_read_byte(&code_type[code]);
if (code_type_code && (code_type_code & 0x07) == count) { if (code_type_code && (code_type_code & 0x07) == count) {
return code_type_code >> 3; return code_type_code >> 3;
} }
} while (count < 5); } while (count < 5);
return 1; // skip if code not found return -1; // skip if code not found
} }
int getNumFromBits(const char *in, int bit_no, int count) { int32_t Unishox::getNumFromBits(uint32_t count) {
int ret = 0; int ret = 0;
while (count--) { while (count--) {
if (ESCAPE_MARKER == in[bit_no >> 3]) { ret += getNextBit() << count;
bit_no += 8; // skip marker
}
ret += getBitVal(in, bit_no++, count);
} }
return ret; return ret;
} }
@ -452,8 +431,8 @@ int getNumFromBits(const char *in, int bit_no, int count) {
// uint16_t adder_read[] PROGMEM = {0, 32, 160, 672, 4768 }; // uint16_t adder_read[] PROGMEM = {0, 32, 160, 672, 4768 };
// Code size optimized, recalculate adder[] like in encodeCount // Code size optimized, recalculate adder[] like in encodeCount
int readCount(const char *in, int *bit_no_p, int len) { uint32_t Unishox::readCount(void) {
int idx = getCodeIdx(us_hcode, in, len, bit_no_p); int32_t idx = getCodeIdx(us_hcode);
if (idx >= 1) idx--; // we skip v = 1 (code '0') since we no more accept 2 bits encoding if (idx >= 1) idx--; // we skip v = 1 (code '0') since we no more accept 2 bits encoding
if ((idx >= sizeof(bit_len)) || (idx < 0)) return 0; // unsupported or end of stream if ((idx >= sizeof(bit_len)) || (idx < 0)) return 0; // unsupported or end of stream
@ -465,44 +444,41 @@ int readCount(const char *in, int *bit_no_p, int len) {
bit_len_idx = pgm_read_byte(&bit_len[i]); bit_len_idx = pgm_read_byte(&bit_len[i]);
till += (1 << bit_len_idx); till += (1 << bit_len_idx);
} }
int count = getNumFromBits(in, *bit_no_p, bit_len_idx) + base; int count = getNumFromBits(bit_len_idx) + base;
(*bit_no_p) += bit_len_idx;
return count; return count;
} }
int decodeRepeat(const char *in, int len, char *out, int ol, int *bit_no) { void Unishox::decodeRepeat(void) {
int dict_len = readCount(in, bit_no, len) + NICE_LEN; uint32_t dict_len = readCount() + NICE_LEN;
int dist = readCount(in, bit_no, len) + NICE_LEN - 1; uint32_t dist = readCount() + NICE_LEN - 1;
memcpy(out + ol, out + ol - dist, dict_len); memcpy(out + ol, out + ol - dist, dict_len);
ol += dict_len; ol += dict_len;
return ol;
} }
int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out) { int32_t Unishox::unishox_decompress(const char *p_in, size_t p_len, char *p_out, size_t p_len_out) {
in = p_in;
len = p_len;
out = p_out;
len_out = p_len_out;
int dstate; ol = 0;
int bit_no; bit_no = 8; // force load of first byte, pretending we expired the last one
byte is_all_upper; byte_no = 0;
int ol = 0;
bit_no = 0;
dstate = SHX_SET1; dstate = SHX_SET1;
is_all_upper = 0; is_all_upper = 0;
len <<= 3; // *8, len in bits len <<= 3; // *8, len in bits
out[ol] = 0; out[ol] = 0;
while (bit_no < len) { while (bit_no < len) {
int h, v; int32_t h, v;
char c = 0; char c = 0;
byte is_upper = is_all_upper; byte is_upper = is_all_upper;
int orig_bit_no = bit_no; v = getCodeIdx(us_vcode); // read vCode
v = getCodeIdx(us_vcode, in, len, &bit_no); // read vCode
if (v < 0) break; // end of stream if (v < 0) break; // end of stream
h = dstate; // Set1 or Set2 h = dstate; // Set1 or Set2
if (v == 0) { // Switch which is common to Set1 and Set2, first entry if (v == 0) { // Switch which is common to Set1 and Set2, first entry
h = getCodeIdx(us_hcode, in, len, &bit_no); // read hCode h = getCodeIdx(us_hcode); // read hCode
if (h < 0) break; // end of stream if (h < 0) break; // end of stream
if (h == SHX_SET1) { // target is Set1 if (h == SHX_SET1) { // target is Set1
if (dstate == SHX_SET1) { // Switch from Set1 to Set1 us UpperCase if (dstate == SHX_SET1) { // Switch from Set1 to Set1 us UpperCase
@ -510,10 +486,10 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
is_upper = is_all_upper = 0; is_upper = is_all_upper = 0;
continue; continue;
} }
v = getCodeIdx(us_vcode, in, len, &bit_no); // read again vCode v = getCodeIdx(us_vcode); // read again vCode
if (v < 0) break; // end of stream if (v < 0) break; // end of stream
if (v == 0) { if (v == 0) {
h = getCodeIdx(us_hcode, in, len, &bit_no); // read second hCode h = getCodeIdx(us_hcode); // read second hCode
if (h < 0) break; // end of stream if (h < 0) break; // end of stream
if (h == SHX_SET1) { // If double Switch Set1, the CapsLock if (h == SHX_SET1) { // If double Switch Set1, the CapsLock
is_all_upper = 1; is_all_upper = 1;
@ -532,23 +508,23 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
continue; continue;
} }
if (h != SHX_SET1) { // all other Sets (why not else) if (h != SHX_SET1) { // all other Sets (why not else)
v = getCodeIdx(us_vcode, in, len, &bit_no); // we changed set, now read vCode for char v = getCodeIdx(us_vcode); // we changed set, now read vCode for char
if (v < 0) break; // end of stream if (v < 0) break; // end of stream
} }
} }
if (v == 0 && h == SHX_SET1A) { if (v == 0 && h == SHX_SET1A) {
if (is_upper) { if (is_upper) {
out[ol++] = 255 - readCount(in, &bit_no, len); // binary out[ol++] = 255 - readCount(); // binary
} else { } else {
ol = decodeRepeat(in, len, out, ol, &bit_no); // dist decodeRepeat(); // dist
} }
continue; continue;
} }
if (h == SHX_SET1 && v == 3) { if (h == SHX_SET1 && v == 3) {
// was Unicode, will do Binary instead // was Unicode, will do Binary instead
out[ol++] = 255 - readCount(in, &bit_no, len); // binary out[ol++] = 255 - readCount(); // binary
continue; continue;
} }
if (h < 7 && v < 11) // TODO: are these the actual limits? Not 11x7 ? if (h < 7 && v < 11) // TODO: are these the actual limits? Not 11x7 ?
@ -561,22 +537,11 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
c = '\t'; // If UpperCase Space, change to TAB c = '\t'; // If UpperCase Space, change to TAB
if (h == SHX_SET1B) { if (h == SHX_SET1B) {
if (8 == v) { // was LF or RPT, now only LF if (8 == v) { // was LF or RPT, now only LF
// if (is_upper) { // rpt
// int count = readCount(in, &bit_no, len);
// count += 4;
// char rpt_c = out[ol - 1];
// while (count--)
// out[ol++] = rpt_c;
// } else {
out[ol++] = '\n'; out[ol++] = '\n';
// }
continue; continue;
} }
if (9 == v) { // was CRLF, now RPT if (9 == v) { // was CRLF, now RPT
// out[ol++] = '\r'; // CRLF removed uint32_t count = readCount() + 4;
// out[ol++] = '\n';
int count = readCount(in, &bit_no, len);
count += 4;
if (ol + count >= len_out) { if (ol + count >= len_out) {
return -1; // overflow return -1; // overflow
} }
@ -598,5 +563,4 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
} }
return ol; return ol;
} }

View File

@ -20,7 +20,45 @@
#define unishox #define unishox
extern int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out); extern int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out);
extern int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out); //extern int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out);
class Unishox {
public:
Unishox() {};
int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out);
int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out);
private:
void append_bits(unsigned int code, int clen);
void encodeCount(int32_t count);
bool matchOccurance(void);
uint32_t getNextBit(void);
int32_t getCodeIdx(const char *code_type);
uint32_t readCount(void);
void decodeRepeat(void);
int32_t getNumFromBits(uint32_t count);
inline void writeOut(char c) { out[ol++] = c; }
int32_t l;
uint32_t ol;
int32_t bit_no;
uint32_t byte_no;
const char * in;
char * out;
size_t len;
size_t len_out;
uint8_t dstate;
unsigned char byte_in;
uint8_t state;
uint8_t is_all_upper;
};
#endif #endif

View File

@ -397,7 +397,7 @@
// -- Rules or Script ---------------------------- // -- Rules or Script ----------------------------
// Select none or only one of the below defines USE_RULES or USE_SCRIPT // Select none or only one of the below defines USE_RULES or USE_SCRIPT
#define USE_RULES // Add support for rules (+8k code) #define USE_RULES // Add support for rules (+8k code)
#define USE_RULES_COMPRESSION // Compresses rules in Flash at about ~50% (+3.8k code) #define USE_RULES_COMPRESSION // Compresses rules in Flash at about ~50% (+3.3k code)
//#define USE_SCRIPT // Add support for script (+17k code) //#define USE_SCRIPT // Add support for script (+17k code)
//#define USE_SCRIPT_FATFS 4 // Script: Add FAT FileSystem Support //#define USE_SCRIPT_FATFS 4 // Script: Add FAT FileSystem Support

View File

@ -213,6 +213,7 @@ char rules_vars[MAX_RULE_VARS][33] = {{ 0 }};
#ifdef USE_RULES_COMPRESSION #ifdef USE_RULES_COMPRESSION
// Statically allocate one String per rule // Statically allocate one String per rule
String k_rules[MAX_RULE_SETS] = { String(), String(), String() }; // Strings are created empty String k_rules[MAX_RULE_SETS] = { String(), String(), String() }; // Strings are created empty
Unishox compressor; // singleton
#endif // USE_RULES_COMPRESSION #endif // USE_RULES_COMPRESSION
// Returns whether the rule is uncompressed, which means the first byte is not NULL // Returns whether the rule is uncompressed, which means the first byte is not NULL
@ -256,6 +257,7 @@ size_t GetRuleLenStorage(uint32_t idx) {
#endif #endif
} }
#ifdef USE_RULES_COMPRESSION
// internal function, do the actual decompression // internal function, do the actual decompression
void GetRule_decompress(String &rule, const char *rule_head) { void GetRule_decompress(String &rule, const char *rule_head) {
size_t buf_len = 1 + *rule_head * 8; // the first byte contains size of buffer for uncompressed rule / 8, buf_len may overshoot by 7 size_t buf_len = 1 + *rule_head * 8; // the first byte contains size of buffer for uncompressed rule / 8, buf_len may overshoot by 7
@ -268,12 +270,13 @@ void GetRule_decompress(String &rule, const char *rule_head) {
rule.reserve(buf_len); rule.reserve(buf_len);
char* buf = rule.begin(); char* buf = rule.begin();
int32_t len_decompressed = unishox_decompress(rule_head, strlen(rule_head), buf, buf_len); int32_t len_decompressed = compressor.unishox_decompress(rule_head, strlen(rule_head), buf, buf_len);
buf[len_decompressed] = 0; // add NULL terminator buf[len_decompressed] = 0; // add NULL terminator
// AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Rawdecompressed: %d"), len_decompressed); // AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Rawdecompressed: %d"), len_decompressed);
rule = buf; // assign the raw string to the String object (in reality re-writing the same data in the same place) rule = buf; // assign the raw string to the String object (in reality re-writing the same data in the same place)
} }
#endif // USE_RULES_COMPRESSION
// //
// Read rule in memory, uncompress if needed // Read rule in memory, uncompress if needed
@ -308,7 +311,7 @@ String GetRule(uint32_t idx) {
// If out == nullptr, we are in dry-run mode, so don't keep rule in cache // If out == nullptr, we are in dry-run mode, so don't keep rule in cache
int32_t SetRule_compress(uint32_t idx, const char *in, size_t in_len, char *out, size_t out_len) { int32_t SetRule_compress(uint32_t idx, const char *in, size_t in_len, char *out, size_t out_len) {
int32_t len_compressed; int32_t len_compressed;
len_compressed = unishox_compress(in, in_len, out, out_len); len_compressed = compressor.unishox_compress(in, in_len, out, out_len);
if (len_compressed >= 0) { // negative means compression failed because of buffer too small, we leave the rule untouched if (len_compressed >= 0) { // negative means compression failed because of buffer too small, we leave the rule untouched
// check if we need to store in cache // check if we need to store in cache
@ -357,7 +360,7 @@ int32_t SetRule(uint32_t idx, const char *content, bool append = false) {
int32_t len_compressed, len_uncompressed; int32_t len_compressed, len_uncompressed;
len_uncompressed = strlen(Settings.rules[idx]); len_uncompressed = strlen(Settings.rules[idx]);
len_compressed = unishox_compress(Settings.rules[idx], len_uncompressed, nullptr /* dry-run */, MAX_RULE_SIZE + 8); len_compressed = compressor.unishox_compress(Settings.rules[idx], len_uncompressed, nullptr /* dry-run */, MAX_RULE_SIZE + 8);
AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Stored uncompressed, would compress from %d to %d (-%d%%)"), len_uncompressed, len_compressed, 100 - changeUIntScale(len_compressed, 0, len_uncompressed, 0, 100)); AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Stored uncompressed, would compress from %d to %d (-%d%%)"), len_uncompressed, len_compressed, 100 - changeUIntScale(len_compressed, 0, len_uncompressed, 0, 100));
} }