mirror of https://github.com/arendst/Tasmota.git
217 lines
6.8 KiB
Python
217 lines
6.8 KiB
Python
###############################################################
|
|
# compresses all files found in ..\..\tasmota\html_uncompressed
|
|
# write compressed C code to ..\..\tasmota\html_compressed
|
|
# Instructions:
|
|
# open a console, e.g. in vscode, open a 'terminal'
|
|
# cd .\tools\unishox
|
|
# run:
|
|
# python compress-html-uncompressed.py
|
|
#
|
|
# The intent it to commit both uncompressed and compressed to the repo
|
|
# else this script would need to be run at build.
|
|
#
|
|
# Example Tasmota code:
|
|
# #ifdef USE_UNISHOX_COMPRESSION
|
|
# #include "./html_compressed/HTTP_SCRIPT_CONSOL.h"
|
|
# #else
|
|
# #include "./html_uncompressed/HTTP_SCRIPT_CONSOL.h"
|
|
# #endif
|
|
#
|
|
###############################################################
|
|
|
|
import unishox
|
|
from sys import argv
|
|
from datetime import datetime
|
|
from pathlib import Path
|
|
from hashlib import sha256
|
|
|
|
self_dir = Path(__file__).absolute().parent
|
|
base_dir = self_dir.parent.parent
|
|
|
|
def extract_c_string(s: str) -> str:
|
|
state = 0
|
|
escape = False
|
|
out = ""
|
|
for c in s:
|
|
if state == 0: # before string
|
|
if c == '"': # entering string
|
|
out = '"'
|
|
state = 1
|
|
elif c == '/': # start of comment before entering string
|
|
state = 99 # we're done
|
|
elif state == 1: # in string
|
|
if escape: # escaped char
|
|
out += '\\' + c
|
|
escape = False
|
|
elif c == '\\': # escaped char
|
|
escape = True
|
|
elif c == '"': # end of string
|
|
out += '"'
|
|
state = 99 # we're done
|
|
else:
|
|
out += c
|
|
return out
|
|
|
|
def compress_html(source, target, argv=None, verbose=False):
|
|
if argv is None: argv = []
|
|
|
|
with open(source, "r") as f:
|
|
text = f.read()
|
|
|
|
src_sha, old_sha = sha256(text.encode()).hexdigest(), None
|
|
|
|
if not ('--force' in argv):
|
|
with open(target, "r") as f:
|
|
for line in f:
|
|
prefix = line[:17]
|
|
if prefix == '// input sha256: ':
|
|
old_sha = line[17:17+64]
|
|
break
|
|
|
|
if src_sha == old_sha:
|
|
return (0, 0)
|
|
|
|
#text = Tk().clipboard_get()
|
|
# print(text)
|
|
|
|
# parsing and cleaning
|
|
text_list = text.splitlines()
|
|
text = '' #just reuse the string
|
|
const_name = '<var>' #default if no name will be found
|
|
|
|
line_number = 0
|
|
for line in text_list:
|
|
pos = line.find("const char")
|
|
# print(pos, line)
|
|
if pos > -1:
|
|
line_list = line.rsplit(" ")
|
|
for el in line_list:
|
|
if el.find('[]') > -1:
|
|
const_name = el[:-2] #extract the "const char" variable name
|
|
line_list.pop(line_number)
|
|
else: # remove line comments
|
|
line_el = extract_c_string(line)
|
|
# print(line_el)
|
|
text = text + line_el
|
|
line_number = line_number +1
|
|
|
|
# print const_name
|
|
# print text
|
|
|
|
#remove unwanted quotation marks
|
|
qm = []
|
|
pos =0
|
|
last_char = ""
|
|
for char in text:
|
|
if char == "\"":
|
|
if last_char != "\\":
|
|
qm.append(pos) #find all quotation marks without preceding backslash
|
|
last_char = char
|
|
pos = pos + 1
|
|
# print(qm)
|
|
lastel = 0
|
|
input = ""
|
|
for pos in qm:
|
|
sub = text[lastel+1:pos:]
|
|
if not sub.isspace() and pos-lastel > 1:
|
|
# print(lastel, pos)
|
|
input = input + sub #only copy substrings that are not whitespace
|
|
# print(text[lastel+1:pos:])
|
|
lastel = pos
|
|
|
|
if verbose:
|
|
print("####### Parsing input from " + str(source.relative_to(base_dir)))
|
|
print(" Const char name: "+const_name)
|
|
#print('####### Cleaned input:')
|
|
#print(input)
|
|
|
|
#construct output (taken from shadinger)
|
|
input = input.replace("\\t", "\t")
|
|
input = input.replace("\\n", "\n")
|
|
input = input.replace("\\r", "\r")
|
|
input = input.replace("\\f", "\f")
|
|
input = input.replace("\\b", "\b")
|
|
input = input.replace("\\\"", u"\u0022")
|
|
|
|
in_bytes = bytearray(input, 'utf-8')
|
|
in_len = len(in_bytes)
|
|
out_bytes = bytearray(in_len * 2)
|
|
|
|
UNISHOX = unishox.Unishox()
|
|
out_len = UNISHOX.compress(in_bytes, len(in_bytes), out_bytes, len(out_bytes))
|
|
if verbose:
|
|
print(" ####### Compression result:")
|
|
reduction = 100-(float(out_len)/float(in_len)*100)
|
|
print(f" Compressed from {in_len} to {out_len}, -{reduction:.1f}%")
|
|
out_bytes = out_bytes[:out_len] # truncate to right size
|
|
|
|
#PROGMEM is growing in steps 0,8,24,40,56,... bytes of data resulting in size of 0,16,32,48,64,... bytes
|
|
for in_real in range(8,in_len+16,16):
|
|
if in_real>=in_len:
|
|
if verbose:
|
|
print(f" Old real PROGMEM-size:{in_real+8}(unused bytes:{in_real-in_len})")
|
|
break
|
|
for out_real in range(8,out_len+16,16):
|
|
if out_real>=out_len:
|
|
if verbose:
|
|
print(f" New real PROGMEM-size:{out_real+8}(unused bytes:{out_real-out_len})")
|
|
break
|
|
|
|
if verbose:
|
|
print(f" the optimal case would be raw bytes + 8, real difference: {in_real - out_real}bytes")
|
|
|
|
# https://www.geeksforgeeks.org/break-list-chunks-size-n-python/
|
|
def chunked(my_list, n):
|
|
return [my_list[i * n:(i + 1) * n] for i in range((len(my_list) + n - 1) // n )]
|
|
|
|
# split in chunks of 20 characters
|
|
chunks = chunked(out_bytes, 20)
|
|
|
|
lines_raw = [ "\"\\x" + "\\x".join( [ '{:02X}'.format(b) for b in chunk ] ) + "\"" for chunk in chunks ]
|
|
line_complete = f"const char {const_name}_COMPRESSED[] PROGMEM = " + ("\n" + " "*29).join(lines_raw) + ";"
|
|
lines = f"\nconst size_t {const_name}_SIZE = {in_len};\n{line_complete}\n\n"
|
|
|
|
#print('####### Final output:')
|
|
#print(lines)
|
|
|
|
definition = f"#define {const_name} Decompress({const_name}_COMPRESSED,{const_name}_SIZE).c_str()"
|
|
#print(definition)
|
|
|
|
now = datetime.now() # current date and time
|
|
percent = int((float(out_real)/float(in_real))*100.0)
|
|
saving = in_real - out_real
|
|
#totalIn = totalIn + in_real
|
|
#totalSaved = totalSaved + saving
|
|
comment = "/////////////////////////////////////////////////////////////////////\n"
|
|
comment += "// compressed by tools/unishox/compress-html-uncompressed.py\n"
|
|
comment += f"// input sha256: {src_sha}\n"
|
|
comment += "/////////////////////////////////////////////////////////////////////\n"
|
|
|
|
with open(target, "w") as f:
|
|
f.write(comment + lines + definition)
|
|
|
|
return (in_real, saving)
|
|
|
|
if verbose:
|
|
print("####### Wrote output to " + str(target.relative_to(base_dir)))
|
|
|
|
def compress_dir(source_dir, target_dir, argv=None, verbose=False):
|
|
totalIn, totalSaved = 0, 0
|
|
|
|
for source in source_dir.iterdir():
|
|
target = Path(target_dir, source.stem + ".h")
|
|
bytesIn, bytesSaved = compress_html(source, target, argv, verbose)
|
|
totalIn += bytesIn
|
|
totalSaved += bytesSaved
|
|
|
|
return (totalIn, totalSaved)
|
|
|
|
if __name__ == '__main__':
|
|
path_uncompressed = Path(base_dir, 'tasmota', 'html_uncompressed')
|
|
path_compressed = Path(base_dir, 'tasmota', 'html_compressed')
|
|
|
|
totalIn, totalSaved = compress_dir(path_uncompressed, path_compressed, argv, True)
|
|
|
|
if totalSaved > 0:
|
|
print(f"If all files are in use, total saving was {totalSaved} out of {totalIn}")
|