############################################################### # compresses all files found in ..\..\tasmota\html_uncompressed # write compressed C code to ..\..\tasmota\html_compressed # Instructions: # open a console, e.g. in vscode, open a 'terminal' # cd .\tools\unishox # run: # python compress-html-uncompressed.py # # The intent it to commit both uncompressed and compressed to the repo # else this script would need to be run at build. # # Example Tasmota code: # #ifdef USE_UNISHOX_COMPRESSION # #include "./html_compressed/HTTP_SCRIPT_CONSOL.h" # #else # #include "./html_uncompressed/HTTP_SCRIPT_CONSOL.h" # #endif # ############################################################### import unishox from os import listdir from os import path from datetime import datetime def extract_c_string(s: str) -> str: state = 0 escape = False out = "" for c in s: if state == 0: # before string if c == '"': # entering string out = '"' state = 1 elif c == '/': # start of comment before entering string state = 99 # we're done elif state == 1: # in string if escape: # escaped char out += '\\' + c escape = False elif c == '\\': # escaped char escape = True elif c == '"': # end of string out += '"' state = 99 # we're done else: out += c return out path_compressed = path.join('..','..','tasmota','html_compressed') path_uncompressed = path.join('..','..','tasmota','html_uncompressed') files = listdir(path_uncompressed) totalIn = 0 totalSaved = 0 for file in files: f = open(path_uncompressed + path.sep + file, "r") text = f.read() f.close() #text = Tk().clipboard_get() # print(text) # parsing and cleaning text_list = text.splitlines() text = '' #just reuse the string const_name = '' #default if no name will be found line_number = 0 for line in text_list: pos = line.find("const char") # print(pos, line) if pos > -1: line_list = line.rsplit(" ") for el in line_list: if el.find('[]') > -1: const_name = el[:-2] #extract the "const char" variable name line_list.pop(line_number) else: # remove line comments line_el = extract_c_string(line) # print(line_el) text = text + line_el line_number = line_number +1 # print const_name # print text #remove unwanted quotation marks qm = [] pos =0 last_char = "" for char in text: if char == "\"": if last_char != "\\": qm.append(pos) #find all quotation marks without preceding backslash last_char = char pos = pos + 1 # print(qm) lastel = 0 input = "" for pos in qm: sub = text[lastel+1:pos:] if not sub.isspace() and pos-lastel > 1: # print(lastel, pos) input = input + sub #only copy substrings that are not whitespace # print(text[lastel+1:pos:]) lastel = pos print("####### Parsing input from " + path_uncompressed + path.sep + file) print(" Const char name: "+const_name) #print('####### Cleaned input:') #print(input) #construct output (taken from shadinger) input = input.replace("\\t", "\t") input = input.replace("\\n", "\n") input = input.replace("\\r", "\r") input = input.replace("\\f", "\f") input = input.replace("\\b", "\b") input = input.replace("\\\"", u"\u0022") in_bytes = bytearray(input, 'utf-8') in_len = len(in_bytes) out_bytes = bytearray(in_len * 2) UNISHOX = unishox.Unishox() out_len = UNISHOX.compress(in_bytes, len(in_bytes), out_bytes, len(out_bytes)) print(" ####### Compression result:") print(" Compressed from {i} to {o}, -{p:.1f}%".format(i=in_len, o=out_len, p=(100-(float(out_len)/float(in_len)*100)))) out_bytes = out_bytes[:out_len] # truncate to right size #PROGMEM is growing in steps 0,8,24,40,56,... bytes of data resulting in size of 0,16,32,48,64,... bytes for in_real in range(8,in_len+16,16): if in_real>=in_len: print(" Old real PROGMEM-size:"+str(in_real+8)+"(unused bytes:"+str(in_real-in_len)+")") break for out_real in range(8,out_len+16,16): if out_real>=out_len: print(" New real PROGMEM-size:"+str(out_real+8)+"(unused bytes:"+str(out_real-out_len)+")") break print(" the optimal case would be raw bytes + 8, real difference: "+str(in_real - out_real)+ "bytes") # https://www.geeksforgeeks.org/break-list-chunks-size-n-python/ def chunked(my_list, n): return [my_list[i * n:(i + 1) * n] for i in range((len(my_list) + n - 1) // n )] # split in chunks of 20 characters chunks = chunked(out_bytes, 20) lines_raw = [ "\"\\x" + "\\x".join( [ '{:02X}'.format(b) for b in chunk ] ) + "\"" for chunk in chunks ] line_complete = "const char " + const_name + "_COMPRESSED" +"[] PROGMEM = " + ("\n" + " "*29).join(lines_raw) + ";" lines = "\nconst size_t " + const_name +"_SIZE = {size};\n{lines}\n\n".format(size=in_len, lines=line_complete) #print('####### Final output:') #print(lines) definition = "#define " + const_name + " Decompress(" + const_name + "_COMPRESSED" + "," + const_name +"_SIZE" + ").c_str()" #print(definition) now = datetime.now() # current date and time percent = int((float(out_real)/float(in_real))*100.0) saving = in_real - out_real totalIn = totalIn + in_real totalSaved = totalSaved + saving comment = "/////////////////////////////////////////////////////////////////////\n" comment = comment + "// compressed by tools/unishox/compress-html-uncompressed.py\n" comment = comment + "/////////////////////////////////////////////////////////////////////\n" f = open(path_compressed + path.sep + file, "w") f.write(comment + lines + definition) f.close() print("####### Wrote output to " + path_compressed + path.sep + file) print("If all files are in use, total saving was "+str(totalSaved)+" out of "+str(totalIn))