mirror of https://github.com/arendst/Tasmota.git
Merge pull request #8468 from s-hadinger/unishox_class
Unishox code optimized
This commit is contained in:
commit
80ad999a38
|
@ -153,52 +153,47 @@ const uint16_t BIN_CODE_TASMOTA_LEN = 3;
|
||||||
// uint16_t mask[] PROGMEM = {0x8000, 0xC000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00};
|
// uint16_t mask[] PROGMEM = {0x8000, 0xC000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00};
|
||||||
uint8_t mask[] PROGMEM = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF};
|
uint8_t mask[] PROGMEM = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF};
|
||||||
|
|
||||||
int append_bits(char *out, size_t ol, unsigned int code, int clen, byte state) {
|
|
||||||
|
|
||||||
byte cur_bit;
|
|
||||||
byte blen;
|
|
||||||
unsigned char a_byte;
|
|
||||||
|
|
||||||
if (state == SHX_STATE_2) {
|
void Unishox::append_bits(unsigned int code, int clen) {
|
||||||
// remove change state prefix
|
|
||||||
if ((code >> 9) == 0x1C) {
|
byte cur_bit;
|
||||||
code <<= 7;
|
byte blen;
|
||||||
clen -= 7;
|
unsigned char a_byte;
|
||||||
|
|
||||||
|
if (state == SHX_STATE_2) {
|
||||||
|
// remove change state prefix
|
||||||
|
if ((code >> 9) == 0x1C) {
|
||||||
|
code <<= 7;
|
||||||
|
clen -= 7;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
while (clen > 0) {
|
||||||
|
cur_bit = ol % 8;
|
||||||
|
blen = (clen > 8 ? 8 : clen);
|
||||||
|
a_byte = (code >> 8) & pgm_read_word(&mask[blen - 1]);
|
||||||
|
a_byte >>= cur_bit;
|
||||||
|
if (blen + cur_bit > 8)
|
||||||
|
blen = (8 - cur_bit);
|
||||||
|
if (out) { // if out == nullptr, then we are in dry-run mode
|
||||||
|
if (cur_bit == 0)
|
||||||
|
out[ol >> 3] = a_byte;
|
||||||
|
else
|
||||||
|
out[ol >> 3] |= a_byte;
|
||||||
|
}
|
||||||
|
code <<= blen;
|
||||||
|
ol += blen;
|
||||||
|
if ((out) && (0 == ol % 8)) { // if out == nullptr, dry-run mode. We miss the escaping of characters in the length
|
||||||
|
// we completed a full byte
|
||||||
|
char last_c = out[(ol / 8) - 1];
|
||||||
|
if ((0 == last_c) || (ESCAPE_MARKER == last_c)) {
|
||||||
|
out[ol >> 3] = 1 + last_c; // increment to 0x01 or 0x2B
|
||||||
|
out[(ol >>3) -1] = ESCAPE_MARKER; // replace old value with marker
|
||||||
|
ol += 8; // add one full byte
|
||||||
}
|
}
|
||||||
//if (code == 14272 && clen == 10) {
|
}
|
||||||
// code = 9084;
|
clen -= blen;
|
||||||
// clen = 14;
|
}
|
||||||
//}
|
|
||||||
}
|
|
||||||
while (clen > 0) {
|
|
||||||
cur_bit = ol % 8;
|
|
||||||
blen = (clen > 8 ? 8 : clen);
|
|
||||||
// a_byte = (code & pgm_read_word(&mask[blen - 1])) >> 8;
|
|
||||||
// a_byte = (code & (pgm_read_word(&mask[blen - 1]) << 8)) >> 8;
|
|
||||||
a_byte = (code >> 8) & pgm_read_word(&mask[blen - 1]);
|
|
||||||
a_byte >>= cur_bit;
|
|
||||||
if (blen + cur_bit > 8)
|
|
||||||
blen = (8 - cur_bit);
|
|
||||||
if (out) { // if out == nullptr, then we are in dry-run mode
|
|
||||||
if (cur_bit == 0)
|
|
||||||
out[ol / 8] = a_byte;
|
|
||||||
else
|
|
||||||
out[ol / 8] |= a_byte;
|
|
||||||
}
|
|
||||||
code <<= blen;
|
|
||||||
ol += blen;
|
|
||||||
if ((out) && (0 == ol % 8)) { // if out == nullptr, dry-run mode. We miss the escaping of characters in the length
|
|
||||||
// we completed a full byte
|
|
||||||
char last_c = out[(ol / 8) - 1];
|
|
||||||
if ((0 == last_c) || (ESCAPE_MARKER == last_c)) {
|
|
||||||
out[ol / 8] = 1 + last_c; // increment to 0x01 or 0x2B
|
|
||||||
out[(ol / 8) -1] = ESCAPE_MARKER; // replace old value with marker
|
|
||||||
ol += 8; // add one full byte
|
|
||||||
}
|
|
||||||
}
|
|
||||||
clen -= blen;
|
|
||||||
}
|
|
||||||
return ol;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// First five bits are code and Last three bits of codes represent length
|
// First five bits are code and Last three bits of codes represent length
|
||||||
|
@ -210,40 +205,36 @@ byte codes[] PROGMEM = { 0x82, 0xC3, 0xE5, 0xED, 0xF5 };
|
||||||
byte bit_len[] PROGMEM = { 5, 7, 9, 12, 16 };
|
byte bit_len[] PROGMEM = { 5, 7, 9, 12, 16 };
|
||||||
// uint16_t adder[7] PROGMEM = { 0, 32, 160, 672, 4768 }; // no more used
|
// uint16_t adder[7] PROGMEM = { 0, 32, 160, 672, 4768 }; // no more used
|
||||||
|
|
||||||
int encodeCount(char *out, int ol, int count) {
|
void Unishox::encodeCount(int32_t count) {
|
||||||
int till = 0;
|
int till = 0;
|
||||||
int base = 0;
|
int base = 0;
|
||||||
for (int i = 0; i < sizeof(bit_len); i++) {
|
for (uint32_t i = 0; i < sizeof(bit_len); i++) {
|
||||||
uint32_t bit_len_i = pgm_read_byte(&bit_len[i]);
|
uint32_t bit_len_i = pgm_read_byte(&bit_len[i]);
|
||||||
till += (1 << bit_len_i);
|
till += (1 << bit_len_i);
|
||||||
if (count < till) {
|
if (count < till) {
|
||||||
byte codes_i = pgm_read_byte(&codes[i]);
|
byte codes_i = pgm_read_byte(&codes[i]);
|
||||||
ol = append_bits(out, ol, (codes_i & 0xF8) << 8, codes_i & 0x07, 1);
|
append_bits((codes_i & 0xF8) << 8, codes_i & 0x07);
|
||||||
// ol = append_bits(out, ol, (count - pgm_read_word(&adder[i])) << (16 - bit_len_i), bit_len_i, 1);
|
// ol = append_bits(out, ol, (count - pgm_read_word(&adder[i])) << (16 - bit_len_i), bit_len_i, 1);
|
||||||
ol = append_bits(out, ol, (count - base) << (16 - bit_len_i), bit_len_i, 1);
|
append_bits((count - base) << (16 - bit_len_i), bit_len_i);
|
||||||
return ol;
|
return;
|
||||||
}
|
}
|
||||||
base = till;
|
base = till;
|
||||||
}
|
}
|
||||||
return ol;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
int matchOccurance(const char *in, int len, int l, char *out, int *ol, byte *state, byte *is_all_upper) {
|
bool Unishox::matchOccurance(void) {
|
||||||
int j, k;
|
int32_t j, k;
|
||||||
int longest_dist = 0;
|
uint32_t longest_dist = 0;
|
||||||
int longest_len = 0;
|
uint32_t longest_len = 0;
|
||||||
for (j = l - NICE_LEN; j >= 0; j--) {
|
for (j = l - NICE_LEN; j >= 0; j--) {
|
||||||
for (k = l; k < len && j + k - l < l; k++) {
|
for (k = l; k < len && j + k - l < l; k++) {
|
||||||
if (in[k] != in[j + k - l])
|
if (in[k] != in[j + k - l])
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
// while ((((unsigned char) in[k]) >> 6) == 2)
|
|
||||||
// k--; // Skip partial UTF-8 matches
|
|
||||||
//if ((in[k - 1] >> 3) == 0x1E || (in[k - 1] >> 4) == 0x0E || (in[k - 1] >> 5) == 0x06)
|
|
||||||
// k--;
|
|
||||||
if (k - l > NICE_LEN - 1) {
|
if (k - l > NICE_LEN - 1) {
|
||||||
int match_len = k - l - NICE_LEN;
|
uint32_t match_len = k - l - NICE_LEN;
|
||||||
int match_dist = l - j - NICE_LEN + 1;
|
uint32_t match_dist = l - j - NICE_LEN + 1;
|
||||||
if (match_len > longest_len) {
|
if (match_len > longest_len) {
|
||||||
longest_len = match_len;
|
longest_len = match_len;
|
||||||
longest_dist = match_dist;
|
longest_dist = match_dist;
|
||||||
|
@ -251,19 +242,18 @@ int matchOccurance(const char *in, int len, int l, char *out, int *ol, byte *sta
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (longest_len) {
|
if (longest_len) {
|
||||||
if (*state == SHX_STATE_2 || *is_all_upper) {
|
if (state == SHX_STATE_2 || is_all_upper) {
|
||||||
*is_all_upper = 0;
|
is_all_upper = 0;
|
||||||
*state = SHX_STATE_1;
|
state = SHX_STATE_1;
|
||||||
*ol = append_bits(out, *ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, *state);
|
append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN);
|
||||||
}
|
}
|
||||||
*ol = append_bits(out, *ol, DICT_CODE, DICT_CODE_LEN, 1);
|
append_bits(DICT_CODE, DICT_CODE_LEN);
|
||||||
*ol = encodeCount(out, *ol, longest_len);
|
encodeCount(longest_len);
|
||||||
*ol = encodeCount(out, *ol, longest_dist);
|
encodeCount(longest_dist);
|
||||||
l += (longest_len + NICE_LEN);
|
l += longest_len + NICE_LEN - 1;
|
||||||
l--;
|
return true;
|
||||||
return l;
|
|
||||||
}
|
}
|
||||||
return -l;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Compress a buffer.
|
// Compress a buffer.
|
||||||
|
@ -275,15 +265,18 @@ int matchOccurance(const char *in, int len, int l, char *out, int *ol, byte *sta
|
||||||
// Output:
|
// Output:
|
||||||
// - if >= 0: size of the compressed buffer. The output buffer does not contain NULL bytes, and it is not NULL terminated
|
// - if >= 0: size of the compressed buffer. The output buffer does not contain NULL bytes, and it is not NULL terminated
|
||||||
// - if < 0: an error occured, most certainly the output buffer was not large enough
|
// - if < 0: an error occured, most certainly the output buffer was not large enough
|
||||||
int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out) {
|
int32_t Unishox::unishox_compress(const char *p_in, size_t p_len, char *p_out, size_t p_len_out) {
|
||||||
|
in = p_in;
|
||||||
|
len = p_len;
|
||||||
|
out = p_out;
|
||||||
|
len_out = p_len_out;
|
||||||
|
|
||||||
char *ptr;
|
char *ptr;
|
||||||
byte bits;
|
byte bits;
|
||||||
byte state;
|
|
||||||
|
|
||||||
int l, ll, ol;
|
int ll;
|
||||||
char c_in, c_next;
|
char c_in, c_next;
|
||||||
byte is_upper, is_all_upper;
|
byte is_upper;
|
||||||
|
|
||||||
ol = 0;
|
ol = 0;
|
||||||
state = SHX_STATE_1;
|
state = SHX_STATE_1;
|
||||||
|
@ -302,23 +295,20 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
|
||||||
if (state == SHX_STATE_2 || is_all_upper) {
|
if (state == SHX_STATE_2 || is_all_upper) {
|
||||||
is_all_upper = 0;
|
is_all_upper = 0;
|
||||||
state = SHX_STATE_1;
|
state = SHX_STATE_1;
|
||||||
ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state); // back to lower case and Set1
|
append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN); // back to lower case and Set1
|
||||||
}
|
}
|
||||||
// ol = append_bits(out, ol, RPT_CODE, RPT_CODE_LEN, 1);
|
// ol = append_bits(out, ol, RPT_CODE, RPT_CODE_LEN, 1);
|
||||||
ol = append_bits(out, ol, RPT_CODE_TASMOTA, RPT_CODE_TASMOTA_LEN, 1); // reusing CRLF for RPT
|
append_bits(RPT_CODE_TASMOTA, RPT_CODE_TASMOTA_LEN); // reusing CRLF for RPT
|
||||||
ol = encodeCount(out, ol, rpt_count - 4);
|
encodeCount(rpt_count - 4);
|
||||||
l += rpt_count;
|
l += rpt_count - 1;
|
||||||
l--;
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (l < (len - NICE_LEN + 1)) {
|
if (l < (len - NICE_LEN + 1)) {
|
||||||
l = matchOccurance(in, len, l, out, &ol, &state, &is_all_upper);
|
if (matchOccurance()) {
|
||||||
if (l > 0) {
|
continue;
|
||||||
continue;
|
}
|
||||||
}
|
|
||||||
l = -l;
|
|
||||||
}
|
}
|
||||||
if (state == SHX_STATE_2) { // if Set2
|
if (state == SHX_STATE_2) { // if Set2
|
||||||
if ((c_in >= ' ' && c_in <= '@') ||
|
if ((c_in >= ' ' && c_in <= '@') ||
|
||||||
|
@ -326,7 +316,7 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
|
||||||
(c_in >= '{' && c_in <= '~')) {
|
(c_in >= '{' && c_in <= '~')) {
|
||||||
} else {
|
} else {
|
||||||
state = SHX_STATE_1; // back to Set1 and lower case
|
state = SHX_STATE_1; // back to Set1 and lower case
|
||||||
ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state);
|
append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -336,7 +326,7 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
|
||||||
else {
|
else {
|
||||||
if (is_all_upper) {
|
if (is_all_upper) {
|
||||||
is_all_upper = 0;
|
is_all_upper = 0;
|
||||||
ol = append_bits(out, ol, BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN, state);
|
append_bits(BACK2_STATE1_CODE, BACK2_STATE1_CODE_LEN);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -351,37 +341,30 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
if (ll == l-1) {
|
if (ll == l-1) {
|
||||||
ol = append_bits(out, ol, ALL_UPPER_CODE, ALL_UPPER_CODE_LEN, state); // CapsLock
|
append_bits(ALL_UPPER_CODE, ALL_UPPER_CODE_LEN); // CapsLock
|
||||||
is_all_upper = 1;
|
is_all_upper = 1;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (state == SHX_STATE_1 && c_in >= '0' && c_in <= '9') {
|
if (state == SHX_STATE_1 && c_in >= '0' && c_in <= '9') {
|
||||||
ol = append_bits(out, ol, SW2_STATE2_CODE, SW2_STATE2_CODE_LEN, state); // Switch to sticky Set2
|
append_bits(SW2_STATE2_CODE, SW2_STATE2_CODE_LEN); // Switch to sticky Set2
|
||||||
state = SHX_STATE_2;
|
state = SHX_STATE_2;
|
||||||
}
|
}
|
||||||
c_in -= 32;
|
c_in -= 32;
|
||||||
if (is_all_upper && is_upper)
|
if (is_all_upper && is_upper)
|
||||||
c_in += 32;
|
c_in += 32;
|
||||||
if (c_in == 0 && state == SHX_STATE_2)
|
if (c_in == 0 && state == SHX_STATE_2)
|
||||||
ol = append_bits(out, ol, ST2_SPC_CODE, ST2_SPC_CODE_LEN, state); // space from Set2 ionstead of Set1
|
append_bits(ST2_SPC_CODE, ST2_SPC_CODE_LEN); // space from Set2 ionstead of Set1
|
||||||
else {
|
else {
|
||||||
// ol = append_bits(out, ol, pgm_read_word(&c_95[c_in]), pgm_read_byte(&l_95[c_in]), state); // original version with c/l in split arrays
|
|
||||||
uint16_t cl = pgm_read_word(&cl_95[c_in]);
|
uint16_t cl = pgm_read_word(&cl_95[c_in]);
|
||||||
ol = append_bits(out, ol, cl & 0xFFF0, cl & 0x000F, state);
|
append_bits(cl & 0xFFF0, cl & 0x000F);
|
||||||
}
|
}
|
||||||
} else
|
} else if (c_in == 10) {
|
||||||
// if (c_in == 13 && c_next == 10) { // CRLF disabled
|
append_bits(LF_CODE, LF_CODE_LEN); // LF
|
||||||
// ol = append_bits(out, ol, CRLF_CODE, CRLF_CODE_LEN, state); // CRLF
|
} else if (c_in == '\t') {
|
||||||
// l++;
|
append_bits(TAB_CODE, TAB_CODE_LEN); // TAB
|
||||||
// } else
|
|
||||||
if (c_in == 10) {
|
|
||||||
ol = append_bits(out, ol, LF_CODE, LF_CODE_LEN, state); // LF
|
|
||||||
} else
|
|
||||||
if (c_in == '\t') {
|
|
||||||
ol = append_bits(out, ol, TAB_CODE, TAB_CODE_LEN, state); // TAB
|
|
||||||
} else {
|
} else {
|
||||||
ol = append_bits(out, ol, BIN_CODE_TASMOTA, BIN_CODE_TASMOTA_LEN, state); // Binary, we reuse the Unicode marker which 3 bits instead of 9
|
append_bits(BIN_CODE_TASMOTA, BIN_CODE_TASMOTA_LEN); // Binary, we reuse the Unicode marker which 3 bits instead of 9
|
||||||
ol = encodeCount(out, ol, (unsigned char) 255 - c_in);
|
encodeCount((unsigned char) 255 - c_in);
|
||||||
}
|
}
|
||||||
|
|
||||||
// check that we have some headroom in the output buffer
|
// check that we have some headroom in the output buffer
|
||||||
|
@ -392,50 +375,46 @@ int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out)
|
||||||
|
|
||||||
bits = ol % 8;
|
bits = ol % 8;
|
||||||
if (bits) {
|
if (bits) {
|
||||||
ol = append_bits(out, ol, TERM_CODE, 8 - bits, 1); // 0011 0111 1100 0000 TERM = 0011 0111 11
|
state = SHX_STATE_1;
|
||||||
|
append_bits(TERM_CODE, 8 - bits); // 0011 0111 1100 0000 TERM = 0011 0111 11
|
||||||
}
|
}
|
||||||
return ol/8+(ol%8?1:0);
|
return ol/8+(ol%8?1:0);
|
||||||
}
|
}
|
||||||
|
|
||||||
int getBitVal(const char *in, int bit_no, int count) {
|
uint32_t Unishox::getNextBit(void) {
|
||||||
char c_in = in[bit_no >> 3];
|
if (8 == bit_no) {
|
||||||
if ((bit_no >> 3) && (ESCAPE_MARKER == in[(bit_no >> 3) - 1])) { // if previous byte is a marker, decrement
|
byte_in = in[byte_no++];
|
||||||
c_in--;
|
if (ESCAPE_MARKER == byte_in) {
|
||||||
|
byte_in = in[byte_no++] - 1;
|
||||||
|
}
|
||||||
|
bit_no = 0;
|
||||||
}
|
}
|
||||||
return (c_in & (0x80 >> (bit_no % 8)) ? 1 << count : 0);
|
return byte_in & (0x80 >> bit_no++) ? 1 : 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Returns:
|
// Returns:
|
||||||
// 0..11
|
// 0..11
|
||||||
// or -1 if end of stream
|
// or -1 if end of stream
|
||||||
int getCodeIdx(char *code_type, const char *in, int len, int *bit_no_p) {
|
int32_t Unishox::getCodeIdx(const char *code_type) {
|
||||||
int code = 0;
|
int32_t code = 0;
|
||||||
int count = 0;
|
int32_t count = 0;
|
||||||
do {
|
do {
|
||||||
// detect marker
|
if (bit_no >= len)
|
||||||
if (ESCAPE_MARKER == in[*bit_no_p >> 3]) {
|
|
||||||
*bit_no_p += 8; // skip marker
|
|
||||||
}
|
|
||||||
if (*bit_no_p >= len)
|
|
||||||
return -1; // invalid state
|
return -1; // invalid state
|
||||||
code += getBitVal(in, *bit_no_p, count);
|
code += getNextBit() << count;
|
||||||
(*bit_no_p)++;
|
|
||||||
count++;
|
count++;
|
||||||
uint8_t code_type_code = pgm_read_byte(&code_type[code]);
|
uint8_t code_type_code = pgm_read_byte(&code_type[code]);
|
||||||
if (code_type_code && (code_type_code & 0x07) == count) {
|
if (code_type_code && (code_type_code & 0x07) == count) {
|
||||||
return code_type_code >> 3;
|
return code_type_code >> 3;
|
||||||
}
|
}
|
||||||
} while (count < 5);
|
} while (count < 5);
|
||||||
return 1; // skip if code not found
|
return -1; // skip if code not found
|
||||||
}
|
}
|
||||||
|
|
||||||
int getNumFromBits(const char *in, int bit_no, int count) {
|
int32_t Unishox::getNumFromBits(uint32_t count) {
|
||||||
int ret = 0;
|
int ret = 0;
|
||||||
while (count--) {
|
while (count--) {
|
||||||
if (ESCAPE_MARKER == in[bit_no >> 3]) {
|
ret += getNextBit() << count;
|
||||||
bit_no += 8; // skip marker
|
|
||||||
}
|
|
||||||
ret += getBitVal(in, bit_no++, count);
|
|
||||||
}
|
}
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
@ -452,8 +431,8 @@ int getNumFromBits(const char *in, int bit_no, int count) {
|
||||||
// uint16_t adder_read[] PROGMEM = {0, 32, 160, 672, 4768 };
|
// uint16_t adder_read[] PROGMEM = {0, 32, 160, 672, 4768 };
|
||||||
|
|
||||||
// Code size optimized, recalculate adder[] like in encodeCount
|
// Code size optimized, recalculate adder[] like in encodeCount
|
||||||
int readCount(const char *in, int *bit_no_p, int len) {
|
uint32_t Unishox::readCount(void) {
|
||||||
int idx = getCodeIdx(us_hcode, in, len, bit_no_p);
|
int32_t idx = getCodeIdx(us_hcode);
|
||||||
if (idx >= 1) idx--; // we skip v = 1 (code '0') since we no more accept 2 bits encoding
|
if (idx >= 1) idx--; // we skip v = 1 (code '0') since we no more accept 2 bits encoding
|
||||||
if ((idx >= sizeof(bit_len)) || (idx < 0)) return 0; // unsupported or end of stream
|
if ((idx >= sizeof(bit_len)) || (idx < 0)) return 0; // unsupported or end of stream
|
||||||
|
|
||||||
|
@ -465,44 +444,41 @@ int readCount(const char *in, int *bit_no_p, int len) {
|
||||||
bit_len_idx = pgm_read_byte(&bit_len[i]);
|
bit_len_idx = pgm_read_byte(&bit_len[i]);
|
||||||
till += (1 << bit_len_idx);
|
till += (1 << bit_len_idx);
|
||||||
}
|
}
|
||||||
int count = getNumFromBits(in, *bit_no_p, bit_len_idx) + base;
|
int count = getNumFromBits(bit_len_idx) + base;
|
||||||
|
|
||||||
(*bit_no_p) += bit_len_idx;
|
|
||||||
return count;
|
return count;
|
||||||
}
|
}
|
||||||
|
|
||||||
int decodeRepeat(const char *in, int len, char *out, int ol, int *bit_no) {
|
void Unishox::decodeRepeat(void) {
|
||||||
int dict_len = readCount(in, bit_no, len) + NICE_LEN;
|
uint32_t dict_len = readCount() + NICE_LEN;
|
||||||
int dist = readCount(in, bit_no, len) + NICE_LEN - 1;
|
uint32_t dist = readCount() + NICE_LEN - 1;
|
||||||
memcpy(out + ol, out + ol - dist, dict_len);
|
memcpy(out + ol, out + ol - dist, dict_len);
|
||||||
ol += dict_len;
|
ol += dict_len;
|
||||||
|
|
||||||
return ol;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out) {
|
int32_t Unishox::unishox_decompress(const char *p_in, size_t p_len, char *p_out, size_t p_len_out) {
|
||||||
|
in = p_in;
|
||||||
|
len = p_len;
|
||||||
|
out = p_out;
|
||||||
|
len_out = p_len_out;
|
||||||
|
|
||||||
int dstate;
|
ol = 0;
|
||||||
int bit_no;
|
bit_no = 8; // force load of first byte, pretending we expired the last one
|
||||||
byte is_all_upper;
|
byte_no = 0;
|
||||||
|
|
||||||
int ol = 0;
|
|
||||||
bit_no = 0;
|
|
||||||
dstate = SHX_SET1;
|
dstate = SHX_SET1;
|
||||||
is_all_upper = 0;
|
is_all_upper = 0;
|
||||||
|
|
||||||
len <<= 3; // *8, len in bits
|
len <<= 3; // *8, len in bits
|
||||||
out[ol] = 0;
|
out[ol] = 0;
|
||||||
while (bit_no < len) {
|
while (bit_no < len) {
|
||||||
int h, v;
|
int32_t h, v;
|
||||||
char c = 0;
|
char c = 0;
|
||||||
byte is_upper = is_all_upper;
|
byte is_upper = is_all_upper;
|
||||||
int orig_bit_no = bit_no;
|
v = getCodeIdx(us_vcode); // read vCode
|
||||||
v = getCodeIdx(us_vcode, in, len, &bit_no); // read vCode
|
|
||||||
if (v < 0) break; // end of stream
|
if (v < 0) break; // end of stream
|
||||||
h = dstate; // Set1 or Set2
|
h = dstate; // Set1 or Set2
|
||||||
if (v == 0) { // Switch which is common to Set1 and Set2, first entry
|
if (v == 0) { // Switch which is common to Set1 and Set2, first entry
|
||||||
h = getCodeIdx(us_hcode, in, len, &bit_no); // read hCode
|
h = getCodeIdx(us_hcode); // read hCode
|
||||||
if (h < 0) break; // end of stream
|
if (h < 0) break; // end of stream
|
||||||
if (h == SHX_SET1) { // target is Set1
|
if (h == SHX_SET1) { // target is Set1
|
||||||
if (dstate == SHX_SET1) { // Switch from Set1 to Set1 us UpperCase
|
if (dstate == SHX_SET1) { // Switch from Set1 to Set1 us UpperCase
|
||||||
|
@ -510,10 +486,10 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
|
||||||
is_upper = is_all_upper = 0;
|
is_upper = is_all_upper = 0;
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
v = getCodeIdx(us_vcode, in, len, &bit_no); // read again vCode
|
v = getCodeIdx(us_vcode); // read again vCode
|
||||||
if (v < 0) break; // end of stream
|
if (v < 0) break; // end of stream
|
||||||
if (v == 0) {
|
if (v == 0) {
|
||||||
h = getCodeIdx(us_hcode, in, len, &bit_no); // read second hCode
|
h = getCodeIdx(us_hcode); // read second hCode
|
||||||
if (h < 0) break; // end of stream
|
if (h < 0) break; // end of stream
|
||||||
if (h == SHX_SET1) { // If double Switch Set1, the CapsLock
|
if (h == SHX_SET1) { // If double Switch Set1, the CapsLock
|
||||||
is_all_upper = 1;
|
is_all_upper = 1;
|
||||||
|
@ -532,23 +508,23 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (h != SHX_SET1) { // all other Sets (why not else)
|
if (h != SHX_SET1) { // all other Sets (why not else)
|
||||||
v = getCodeIdx(us_vcode, in, len, &bit_no); // we changed set, now read vCode for char
|
v = getCodeIdx(us_vcode); // we changed set, now read vCode for char
|
||||||
if (v < 0) break; // end of stream
|
if (v < 0) break; // end of stream
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (v == 0 && h == SHX_SET1A) {
|
if (v == 0 && h == SHX_SET1A) {
|
||||||
if (is_upper) {
|
if (is_upper) {
|
||||||
out[ol++] = 255 - readCount(in, &bit_no, len); // binary
|
out[ol++] = 255 - readCount(); // binary
|
||||||
} else {
|
} else {
|
||||||
ol = decodeRepeat(in, len, out, ol, &bit_no); // dist
|
decodeRepeat(); // dist
|
||||||
}
|
}
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (h == SHX_SET1 && v == 3) {
|
if (h == SHX_SET1 && v == 3) {
|
||||||
// was Unicode, will do Binary instead
|
// was Unicode, will do Binary instead
|
||||||
out[ol++] = 255 - readCount(in, &bit_no, len); // binary
|
out[ol++] = 255 - readCount(); // binary
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (h < 7 && v < 11) // TODO: are these the actual limits? Not 11x7 ?
|
if (h < 7 && v < 11) // TODO: are these the actual limits? Not 11x7 ?
|
||||||
|
@ -561,22 +537,11 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
|
||||||
c = '\t'; // If UpperCase Space, change to TAB
|
c = '\t'; // If UpperCase Space, change to TAB
|
||||||
if (h == SHX_SET1B) {
|
if (h == SHX_SET1B) {
|
||||||
if (8 == v) { // was LF or RPT, now only LF
|
if (8 == v) { // was LF or RPT, now only LF
|
||||||
// if (is_upper) { // rpt
|
|
||||||
// int count = readCount(in, &bit_no, len);
|
|
||||||
// count += 4;
|
|
||||||
// char rpt_c = out[ol - 1];
|
|
||||||
// while (count--)
|
|
||||||
// out[ol++] = rpt_c;
|
|
||||||
// } else {
|
|
||||||
out[ol++] = '\n';
|
out[ol++] = '\n';
|
||||||
// }
|
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
if (9 == v) { // was CRLF, now RPT
|
if (9 == v) { // was CRLF, now RPT
|
||||||
// out[ol++] = '\r'; // CRLF removed
|
uint32_t count = readCount() + 4;
|
||||||
// out[ol++] = '\n';
|
|
||||||
int count = readCount(in, &bit_no, len);
|
|
||||||
count += 4;
|
|
||||||
if (ol + count >= len_out) {
|
if (ol + count >= len_out) {
|
||||||
return -1; // overflow
|
return -1; // overflow
|
||||||
}
|
}
|
||||||
|
@ -598,5 +563,4 @@ int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out
|
||||||
}
|
}
|
||||||
|
|
||||||
return ol;
|
return ol;
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
|
@ -20,7 +20,45 @@
|
||||||
#define unishox
|
#define unishox
|
||||||
|
|
||||||
extern int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out);
|
extern int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out);
|
||||||
extern int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out);
|
//extern int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out);
|
||||||
|
|
||||||
|
class Unishox {
|
||||||
|
|
||||||
|
public:
|
||||||
|
Unishox() {};
|
||||||
|
|
||||||
|
int32_t unishox_decompress(const char *in, size_t len, char *out, size_t len_out);
|
||||||
|
int32_t unishox_compress(const char *in, size_t len, char *out, size_t len_out);
|
||||||
|
|
||||||
|
private:
|
||||||
|
|
||||||
|
void append_bits(unsigned int code, int clen);
|
||||||
|
void encodeCount(int32_t count);
|
||||||
|
bool matchOccurance(void);
|
||||||
|
|
||||||
|
uint32_t getNextBit(void);
|
||||||
|
int32_t getCodeIdx(const char *code_type);
|
||||||
|
uint32_t readCount(void);
|
||||||
|
void decodeRepeat(void);
|
||||||
|
int32_t getNumFromBits(uint32_t count);
|
||||||
|
|
||||||
|
inline void writeOut(char c) { out[ol++] = c; }
|
||||||
|
|
||||||
|
int32_t l;
|
||||||
|
uint32_t ol;
|
||||||
|
int32_t bit_no;
|
||||||
|
uint32_t byte_no;
|
||||||
|
const char * in;
|
||||||
|
char * out;
|
||||||
|
size_t len;
|
||||||
|
size_t len_out;
|
||||||
|
|
||||||
|
uint8_t dstate;
|
||||||
|
unsigned char byte_in;
|
||||||
|
uint8_t state;
|
||||||
|
uint8_t is_all_upper;
|
||||||
|
|
||||||
|
};
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
|
|
@ -397,7 +397,7 @@
|
||||||
// -- Rules or Script ----------------------------
|
// -- Rules or Script ----------------------------
|
||||||
// Select none or only one of the below defines USE_RULES or USE_SCRIPT
|
// Select none or only one of the below defines USE_RULES or USE_SCRIPT
|
||||||
#define USE_RULES // Add support for rules (+8k code)
|
#define USE_RULES // Add support for rules (+8k code)
|
||||||
#define USE_RULES_COMPRESSION // Compresses rules in Flash at about ~50% (+3.8k code)
|
#define USE_RULES_COMPRESSION // Compresses rules in Flash at about ~50% (+3.3k code)
|
||||||
//#define USE_SCRIPT // Add support for script (+17k code)
|
//#define USE_SCRIPT // Add support for script (+17k code)
|
||||||
//#define USE_SCRIPT_FATFS 4 // Script: Add FAT FileSystem Support
|
//#define USE_SCRIPT_FATFS 4 // Script: Add FAT FileSystem Support
|
||||||
|
|
||||||
|
|
|
@ -213,6 +213,7 @@ char rules_vars[MAX_RULE_VARS][33] = {{ 0 }};
|
||||||
#ifdef USE_RULES_COMPRESSION
|
#ifdef USE_RULES_COMPRESSION
|
||||||
// Statically allocate one String per rule
|
// Statically allocate one String per rule
|
||||||
String k_rules[MAX_RULE_SETS] = { String(), String(), String() }; // Strings are created empty
|
String k_rules[MAX_RULE_SETS] = { String(), String(), String() }; // Strings are created empty
|
||||||
|
Unishox compressor; // singleton
|
||||||
#endif // USE_RULES_COMPRESSION
|
#endif // USE_RULES_COMPRESSION
|
||||||
|
|
||||||
// Returns whether the rule is uncompressed, which means the first byte is not NULL
|
// Returns whether the rule is uncompressed, which means the first byte is not NULL
|
||||||
|
@ -256,6 +257,7 @@ size_t GetRuleLenStorage(uint32_t idx) {
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef USE_RULES_COMPRESSION
|
||||||
// internal function, do the actual decompression
|
// internal function, do the actual decompression
|
||||||
void GetRule_decompress(String &rule, const char *rule_head) {
|
void GetRule_decompress(String &rule, const char *rule_head) {
|
||||||
size_t buf_len = 1 + *rule_head * 8; // the first byte contains size of buffer for uncompressed rule / 8, buf_len may overshoot by 7
|
size_t buf_len = 1 + *rule_head * 8; // the first byte contains size of buffer for uncompressed rule / 8, buf_len may overshoot by 7
|
||||||
|
@ -268,12 +270,13 @@ void GetRule_decompress(String &rule, const char *rule_head) {
|
||||||
rule.reserve(buf_len);
|
rule.reserve(buf_len);
|
||||||
char* buf = rule.begin();
|
char* buf = rule.begin();
|
||||||
|
|
||||||
int32_t len_decompressed = unishox_decompress(rule_head, strlen(rule_head), buf, buf_len);
|
int32_t len_decompressed = compressor.unishox_decompress(rule_head, strlen(rule_head), buf, buf_len);
|
||||||
buf[len_decompressed] = 0; // add NULL terminator
|
buf[len_decompressed] = 0; // add NULL terminator
|
||||||
|
|
||||||
// AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Rawdecompressed: %d"), len_decompressed);
|
// AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Rawdecompressed: %d"), len_decompressed);
|
||||||
rule = buf; // assign the raw string to the String object (in reality re-writing the same data in the same place)
|
rule = buf; // assign the raw string to the String object (in reality re-writing the same data in the same place)
|
||||||
}
|
}
|
||||||
|
#endif // USE_RULES_COMPRESSION
|
||||||
|
|
||||||
//
|
//
|
||||||
// Read rule in memory, uncompress if needed
|
// Read rule in memory, uncompress if needed
|
||||||
|
@ -308,7 +311,7 @@ String GetRule(uint32_t idx) {
|
||||||
// If out == nullptr, we are in dry-run mode, so don't keep rule in cache
|
// If out == nullptr, we are in dry-run mode, so don't keep rule in cache
|
||||||
int32_t SetRule_compress(uint32_t idx, const char *in, size_t in_len, char *out, size_t out_len) {
|
int32_t SetRule_compress(uint32_t idx, const char *in, size_t in_len, char *out, size_t out_len) {
|
||||||
int32_t len_compressed;
|
int32_t len_compressed;
|
||||||
len_compressed = unishox_compress(in, in_len, out, out_len);
|
len_compressed = compressor.unishox_compress(in, in_len, out, out_len);
|
||||||
|
|
||||||
if (len_compressed >= 0) { // negative means compression failed because of buffer too small, we leave the rule untouched
|
if (len_compressed >= 0) { // negative means compression failed because of buffer too small, we leave the rule untouched
|
||||||
// check if we need to store in cache
|
// check if we need to store in cache
|
||||||
|
@ -357,7 +360,7 @@ int32_t SetRule(uint32_t idx, const char *content, bool append = false) {
|
||||||
int32_t len_compressed, len_uncompressed;
|
int32_t len_compressed, len_uncompressed;
|
||||||
|
|
||||||
len_uncompressed = strlen(Settings.rules[idx]);
|
len_uncompressed = strlen(Settings.rules[idx]);
|
||||||
len_compressed = unishox_compress(Settings.rules[idx], len_uncompressed, nullptr /* dry-run */, MAX_RULE_SIZE + 8);
|
len_compressed = compressor.unishox_compress(Settings.rules[idx], len_uncompressed, nullptr /* dry-run */, MAX_RULE_SIZE + 8);
|
||||||
AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Stored uncompressed, would compress from %d to %d (-%d%%)"), len_uncompressed, len_compressed, 100 - changeUIntScale(len_compressed, 0, len_uncompressed, 0, 100));
|
AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Stored uncompressed, would compress from %d to %d (-%d%%)"), len_uncompressed, len_compressed, 100 - changeUIntScale(len_compressed, 0, len_uncompressed, 0, 100));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Loading…
Reference in New Issue