diff --git a/lib/Unishox-1.0-shadinger/src/unishox.cpp b/lib/Unishox-1.0-shadinger/src/unishox.cpp index 2de356d3d..743eed38c 100644 --- a/lib/Unishox-1.0-shadinger/src/unishox.cpp +++ b/lib/Unishox-1.0-shadinger/src/unishox.cpp @@ -57,7 +57,7 @@ typedef unsigned char byte; // we squeeze both c_95[] and l_95[] in a sinle array. // c_95[] uses only the 3 upper nibbles (or 12 most signifcant bits), while the last nibble encodes length (3..13) -uint16_t cl_95[95] PROGMEM = {0x4000 + 3, 0x3F80 + 11, 0x3D80 + 11, 0x3C80 + 10, 0x3BE0 + 12, 0x3E80 + 10, 0x3F40 + 11, 0x3EC0 + 10, 0x3BA0 + 11, 0x3BC0 + 11, 0x3D60 + 11, 0x3B60 + 11, 0x3A80 + 10, 0x3AC0 + 10, 0x3A00 + 9, 0x3B00 + 10, 0x38C0 + 10, 0x3900 + 10, 0x3940 + 11, 0x3960 + 11, 0x3980 + 11, 0x39A0 + 11, 0x39C0 + 11, 0x39E0 + 12, 0x39F0 + 12, 0x3880 + 10, 0x3CC0 + 10, 0x3C00 + 9, 0x3D00 + 10, 0x3E00 + 9, 0x3F00 + 10, 0x3B40 + 11, 0x3BF0 + 12, 0x2B00 + 8, 0x21C0 + 11, 0x20C0 + 10, 0x2100 + 10, 0x2600 + 7, 0x2300 + 11, 0x21E0 + 12, 0x2140 + 11, 0x2D00 + 8, 0x2358 + 13, 0x2340 + 12, 0x2080 + 10, 0x21A0 + 11, 0x2E00 + 8, 0x2C00 + 8, 0x2180 + 11, 0x2350 + 13, 0x2F80 + 9, 0x2F00 + 9, 0x2A00 + 8, 0x2160 + 11, 0x2330 + 12, 0x21F0 + 12, 0x2360 + 13, 0x2320 + 12, 0x2368 + 13, 0x3DE0 + 12, 0x3FA0 + 11, 0x3DF0 + 12, 0x3D40 + 11, 0x3F60 + 11, 0x3FF0 + 12, 0xB000 + 4, 0x1C00 + 7, 0x0C00 + 6, 0x1000 + 6, 0x6000 + 3, 0x3000 + 7, 0x1E00 + 8, 0x1400 + 7, 0xD000 + 4, 0x3580 + 9, 0x3400 + 8, 0x0800 + 6, 0x1A00 + 7, 0xE000 + 4, 0xC000 + 4, 0x1800 + 7, 0x3500 + 9, 0xF800 + 5, 0xF000 + 5, 0xA000 + 4, 0x1600 + 7, 0x3300 + 8, 0x1F00 + 8, 0x3600 + 9, 0x3200 + 8, 0x3680 + 9, 0x3DA0 + 11, 0x3FC0 + 11, 0x3DC0 + 11, 0x3FE0 + 12 }; +static uint16_t cl_95[95] PROGMEM = {0x4000 + 3, 0x3F80 + 11, 0x3D80 + 11, 0x3C80 + 10, 0x3BE0 + 12, 0x3E80 + 10, 0x3F40 + 11, 0x3EC0 + 10, 0x3BA0 + 11, 0x3BC0 + 11, 0x3D60 + 11, 0x3B60 + 11, 0x3A80 + 10, 0x3AC0 + 10, 0x3A00 + 9, 0x3B00 + 10, 0x38C0 + 10, 0x3900 + 10, 0x3940 + 11, 0x3960 + 11, 0x3980 + 11, 0x39A0 + 11, 0x39C0 + 11, 0x39E0 + 12, 0x39F0 + 12, 0x3880 + 10, 0x3CC0 + 10, 0x3C00 + 9, 0x3D00 + 10, 0x3E00 + 9, 0x3F00 + 10, 0x3B40 + 11, 0x3BF0 + 12, 0x2B00 + 8, 0x21C0 + 11, 0x20C0 + 10, 0x2100 + 10, 0x2600 + 7, 0x2300 + 11, 0x21E0 + 12, 0x2140 + 11, 0x2D00 + 8, 0x2358 + 13, 0x2340 + 12, 0x2080 + 10, 0x21A0 + 11, 0x2E00 + 8, 0x2C00 + 8, 0x2180 + 11, 0x2350 + 13, 0x2F80 + 9, 0x2F00 + 9, 0x2A00 + 8, 0x2160 + 11, 0x2330 + 12, 0x21F0 + 12, 0x2360 + 13, 0x2320 + 12, 0x2368 + 13, 0x3DE0 + 12, 0x3FA0 + 11, 0x3DF0 + 12, 0x3D40 + 11, 0x3F60 + 11, 0x3FF0 + 12, 0xB000 + 4, 0x1C00 + 7, 0x0C00 + 6, 0x1000 + 6, 0x6000 + 3, 0x3000 + 7, 0x1E00 + 8, 0x1400 + 7, 0xD000 + 4, 0x3580 + 9, 0x3400 + 8, 0x0800 + 6, 0x1A00 + 7, 0xE000 + 4, 0xC000 + 4, 0x1800 + 7, 0x3500 + 9, 0xF800 + 5, 0xF000 + 5, 0xA000 + 4, 0x1600 + 7, 0x3300 + 8, 0x1F00 + 8, 0x3600 + 9, 0x3200 + 8, 0x3680 + 9, 0x3DA0 + 11, 0x3FC0 + 11, 0x3DC0 + 11, 0x3FE0 + 12 }; // Original version with c/l separate // uint16_t c_95[95] PROGMEM = {0x4000, 0x3F80, 0x3D80, 0x3C80, 0x3BE0, 0x3E80, 0x3F40, 0x3EC0, 0x3BA0, 0x3BC0, 0x3D60, 0x3B60, 0x3A80, 0x3AC0, 0x3A00, 0x3B00, 0x38C0, 0x3900, 0x3940, 0x3960, 0x3980, 0x39A0, 0x39C0, 0x39E0, 0x39F0, 0x3880, 0x3CC0, 0x3C00, 0x3D00, 0x3E00, 0x3F00, 0x3B40, 0x3BF0, 0x2B00, 0x21C0, 0x20C0, 0x2100, 0x2600, 0x2300, 0x21E0, 0x2140, 0x2D00, 0x2358, 0x2340, 0x2080, 0x21A0, 0x2E00, 0x2C00, 0x2180, 0x2350, 0x2F80, 0x2F00, 0x2A00, 0x2160, 0x2330, 0x21F0, 0x2360, 0x2320, 0x2368, 0x3DE0, 0x3FA0, 0x3DF0, 0x3D40, 0x3F60, 0x3FF0, 0xB000, 0x1C00, 0x0C00, 0x1000, 0x6000, 0x3000, 0x1E00, 0x1400, 0xD000, 0x3580, 0x3400, 0x0800, 0x1A00, 0xE000, 0xC000, 0x1800, 0x3500, 0xF800, 0xF000, 0xA000, 0x1600, 0x3300, 0x1F00, 0x3600, 0x3200, 0x3680, 0x3DA0, 0x3FC0, 0x3DC0, 0x3FE0 }; // uint8_t l_95[95] PROGMEM = { 3, 11, 11, 10, 12, 10, 11, 10, 11, 11, 11, 11, 10, 10, 9, 10, 10, 10, 11, 11, 11, 11, 11, 12, 12, 10, 10, 9, 10, 9, 10, 11, 12, 8, 11, 10, 10, 7, 11, 12, 11, 8, 13, 12, 10, 11, 8, 8, 11, 13, 9, 9, 8, 11, 12, 12, 13, 12, 13, 12, 11, 12, 11, 11, 12, 4, 7, 6, 6, 3, 7, 8, 7, 4, 9, 8, 6, 7, 4, 4, 7, 9, 5, 5, 4, 7, 8, 8, 9, 8, 9, 11, 11, 11, 12 }; @@ -66,7 +66,7 @@ enum {SHX_STATE_1 = 1, SHX_STATE_2}; // removed Unicode state enum {SHX_SET1 = 0, SHX_SET1A, SHX_SET1B, SHX_SET2, SHX_SET3, SHX_SET4, SHX_SET4A}; // changed mapping in Set3, Set4, Set4A to accomodate frequencies in Rules and Javascript -char sets[][11] PROGMEM = +static char sets[][11] PROGMEM = {{ 0, ' ', 'e', 0, 't', 'a', 'o', 'i', 'n', 's', 'r'}, { 0, 'l', 'c', 'd', 'h', 'u', 'p', 'm', 'b', 'g', 'w'}, {'f', 'y', 'v', 'k', 'q', 'j', 'x', 'z', 0, 0, 0}, @@ -87,7 +87,7 @@ char sets[][11] PROGMEM = // First 2 bits 00, Next 3 bits indicate index of code from 0, // last 3 bits indicate code length in bits // 0, 1, 2, 3, 4, -char us_vcode[32] PROGMEM = +static char us_vcode[32] PROGMEM = {2 + (0 << 3), 3 + (3 << 3), 3 + (1 << 3), 4 + (6 << 3), 0, // 5, 6, 7, 8, 9, 10 4 + (4 << 3), 3 + (2 << 3), 4 + (8 << 3), 0, 0, 0, @@ -98,7 +98,7 @@ char us_vcode[32] PROGMEM = // 24, 25, 26, 27, 28, 29, 30, 31 0, 0, 0, 0, 0, 0, 0, 5 + (10 << 3)}; // 0, 1, 2, 3, 4, 5, 6, 7, -char us_hcode[32] PROGMEM = +static char us_hcode[32] PROGMEM = {1 + (1 << 3), 2 + (0 << 3), 0, 3 + (2 << 3), 0, 0, 0, 5 + (3 << 3), // 8, 9, 10, 11, 12, 13, 14, 15, 0, 0, 0, 0, 0, 0, 0, 5 + (5 << 3), @@ -107,28 +107,28 @@ char us_hcode[32] PROGMEM = // 24, 25, 26, 27, 28, 29, 30, 31 0, 0, 0, 0, 0, 0, 0, 5 + (6 << 3)}; -const char ESCAPE_MARKER = 0x2A; // Escape any null char +static const char ESCAPE_MARKER = 0x2A; // Escape any null char -const uint16_t TERM_CODE = 0x37C0; // 0b0011011111000000 -const uint16_t TERM_CODE_LEN = 10; -const uint16_t DICT_CODE = 0x0000; -const uint16_t DICT_CODE_LEN = 5; -const uint16_t DICT_OTHER_CODE = 0x0000; // not used -const uint16_t DICT_OTHER_CODE_LEN = 6; +static const uint16_t TERM_CODE = 0x37C0; // 0b0011011111000000 +static const uint16_t TERM_CODE_LEN = 10; +static const uint16_t DICT_CODE = 0x0000; +static const uint16_t DICT_CODE_LEN = 5; +static const uint16_t DICT_OTHER_CODE = 0x0000; // not used +static const uint16_t DICT_OTHER_CODE_LEN = 6; // const uint16_t RPT_CODE = 0x2370; // const uint16_t RPT_CODE_LEN = 13; -const uint16_t RPT_CODE_TASMOTA = 0x3780; -const uint16_t RPT_CODE_TASMOTA_LEN = 10; -const uint16_t BACK2_STATE1_CODE = 0x2000; // 0010 = back to lower case -const uint16_t BACK2_STATE1_CODE_LEN = 4; -const uint16_t BACK_FROM_UNI_CODE = 0xFE00; -const uint16_t BACK_FROM_UNI_CODE_LEN = 8; +static const uint16_t RPT_CODE_TASMOTA = 0x3780; +static const uint16_t RPT_CODE_TASMOTA_LEN = 10; +static const uint16_t BACK2_STATE1_CODE = 0x2000; // 0010 = back to lower case +static const uint16_t BACK2_STATE1_CODE_LEN = 4; +static const uint16_t BACK_FROM_UNI_CODE = 0xFE00; +static const uint16_t BACK_FROM_UNI_CODE_LEN = 8; // const uint16_t CRLF_CODE = 0x3780; // const uint16_t CRLF_CODE_LEN = 10; -const uint16_t LF_CODE = 0x3700; -const uint16_t LF_CODE_LEN = 9; -const uint16_t TAB_CODE = 0x2400; -const uint16_t TAB_CODE_LEN = 7; +static const uint16_t LF_CODE = 0x3700; +static const uint16_t LF_CODE_LEN = 9; +static const uint16_t TAB_CODE = 0x2400; +static const uint16_t TAB_CODE_LEN = 7; // const uint16_t UNI_CODE = 0x8000; // Unicode disabled // const uint16_t UNI_CODE_LEN = 3; // const uint16_t UNI_STATE_SPL_CODE = 0xF800; @@ -137,21 +137,21 @@ const uint16_t TAB_CODE_LEN = 7; // const uint16_t UNI_STATE_DICT_CODE_LEN = 7; // const uint16_t CONT_UNI_CODE = 0x2800; // const uint16_t CONT_UNI_CODE_LEN = 7; -const uint16_t ALL_UPPER_CODE = 0x2200; -const uint16_t ALL_UPPER_CODE_LEN = 8; -const uint16_t SW2_STATE2_CODE = 0x3800; -const uint16_t SW2_STATE2_CODE_LEN = 7; -const uint16_t ST2_SPC_CODE = 0x3B80; -const uint16_t ST2_SPC_CODE_LEN = 11; -const uint16_t BIN_CODE_TASMOTA = 0x8000; -const uint16_t BIN_CODE_TASMOTA_LEN = 3; +static const uint16_t ALL_UPPER_CODE = 0x2200; +static const uint16_t ALL_UPPER_CODE_LEN = 8; +static const uint16_t SW2_STATE2_CODE = 0x3800; +static const uint16_t SW2_STATE2_CODE_LEN = 7; +static const uint16_t ST2_SPC_CODE = 0x3B80; +static const uint16_t ST2_SPC_CODE_LEN = 11; +static const uint16_t BIN_CODE_TASMOTA = 0x8000; +static const uint16_t BIN_CODE_TASMOTA_LEN = 3; // const uint16_t BIN_CODE = 0x2000; // const uint16_t BIN_CODE_LEN = 9; #define NICE_LEN 5 // uint16_t mask[] PROGMEM = {0x8000, 0xC000, 0xE000, 0xF000, 0xF800, 0xFC00, 0xFE00, 0xFF00}; -uint8_t mask[] PROGMEM = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF}; +static const uint8_t mask[] PROGMEM = {0x80, 0xC0, 0xE0, 0xF0, 0xF8, 0xFC, 0xFE, 0xFF}; @@ -378,14 +378,19 @@ int32_t Unishox::unishox_compress(const char *p_in, size_t p_len, char *p_out, s state = SHX_STATE_1; append_bits(TERM_CODE, 8 - bits); // 0011 0111 1100 0000 TERM = 0011 0111 11 } - return ol/8+(ol%8?1:0); + return ol / 8; // we already arrived to a byte boundary + // return ol/8+(ol%8?1:0); } uint32_t Unishox::getNextBit(void) { if (8 == bit_no) { + if (byte_no >= len) { + in_eof = true; + return 1; // return only 1s, which appends 'r' in worst case + } byte_in = in[byte_no++]; if (ESCAPE_MARKER == byte_in) { - byte_in = in[byte_no++] - 1; + byte_in = in[byte_no++] - 1; // we shouldn't need to test if byte_no >= len, because it should not be possible to end with ESCAPE_MARKER } bit_no = 0; } @@ -399,8 +404,7 @@ int32_t Unishox::getCodeIdx(const char *code_type) { int32_t code = 0; int32_t count = 0; do { - if (bit_no >= len) - return -1; // invalid state + if (in_eof) return -1; // invalid state code += getNextBit() << count; count++; uint8_t code_type_code = pgm_read_byte(&code_type[code]); @@ -416,6 +420,7 @@ int32_t Unishox::getNumFromBits(uint32_t count) { while (count--) { ret += getNextBit() << count; } + if (in_eof) return 0; return ret; } @@ -452,9 +457,11 @@ uint32_t Unishox::readCount(void) { void Unishox::decodeRepeat(void) { uint32_t dict_len = readCount() + NICE_LEN; uint32_t dist = readCount() + NICE_LEN - 1; + if (ol + dict_len <= len_out) { memcpy(out + ol, out + ol - dist, dict_len); ol += dict_len; } +} int32_t Unishox::unishox_decompress(const char *p_in, size_t p_len, char *p_out, size_t p_len_out) { in = p_in; @@ -462,15 +469,16 @@ int32_t Unishox::unishox_decompress(const char *p_in, size_t p_len, char *p_out, out = p_out; len_out = p_len_out; + in_eof = false; ol = 0; bit_no = 8; // force load of first byte, pretending we expired the last one byte_no = 0; dstate = SHX_SET1; is_all_upper = 0; - len <<= 3; // *8, len in bits out[ol] = 0; - while ((byte_no << 3) + bit_no - 8 < len) { + // while ((byte_no << 3) + bit_no - 8 < len) { + while (!in_eof) { int32_t h, v; char c = 0; byte is_upper = is_all_upper; diff --git a/lib/Unishox-1.0-shadinger/src/unishox.h b/lib/Unishox-1.0-shadinger/src/unishox.h index d1cda8976..9b6ba1329 100644 --- a/lib/Unishox-1.0-shadinger/src/unishox.h +++ b/lib/Unishox-1.0-shadinger/src/unishox.h @@ -48,6 +48,7 @@ private: uint32_t ol; int32_t bit_no; uint32_t byte_no; + bool in_eof; // have we reached end of file for compressed input const char * in; char * out; size_t len; diff --git a/tasmota/CHANGELOG.md b/tasmota/CHANGELOG.md index 4b8d81fe7..54e1f63e9 100644 --- a/tasmota/CHANGELOG.md +++ b/tasmota/CHANGELOG.md @@ -5,6 +5,7 @@ - Change IRremoteESP8266 library updated to v2.7.7 - Add command ``Rule0`` to change global rule parameters - Add more functionality to ``Switchmode`` 11 and 12 (#8450) +- Add dump of compressed rules over 512 chars and unishox decompress fix ## Released diff --git a/tasmota/xdrv_10_rules.ino b/tasmota/xdrv_10_rules.ino index d23f3e122..25a62d288 100644 --- a/tasmota/xdrv_10_rules.ino +++ b/tasmota/xdrv_10_rules.ino @@ -2017,7 +2017,25 @@ void CmndRule(void) } String rule = GetRule(index - 1); size_t rule_len = rule.length(); - if (rule_len >= MAX_RULE_SIZE) { + if (rule_len > MAX_RULE_SIZE - 3) { + + size_t start_index = 0; // start from 0 + while (start_index < rule_len) { // until we reached end of rule + size_t last_index = start_index + MAX_RULE_SIZE - 3; // set max length to what would fit uncompressed, i.e. MAX_RULE_SIZE - 3 (first NULL + length + last NULL) + if (last_index < rule_len) { // if we didn't reach the end, try to shorten to last space character + int32_t next_index = rule.lastIndexOf(" ", last_index); + if (next_index > 0) { // if space was found and is not at the first position (i.e. we are progressing) + last_index = next_index; // shrink to the last space + } // otherwise it means there are no spaces, we need to cut somewhere even if the result cannot be entered back + } else { + last_index = rule_len; // until the end of the rule + } + AddLog_P2(LOG_LEVEL_INFO, PSTR("RUL: Rule%d %s%s"), + index, 0 == start_index ? PSTR("") : PSTR("+"), + rule.substring(start_index, last_index).c_str()); + start_index = last_index + 1; + } + // we need to split the rule in chunks rule = rule.substring(0, MAX_RULE_SIZE); rule += F("...");