Tasmota/lib/libesp32_epdiy/examples/terminal/main/unicode.c

#include "unicode.h"
#include "esp_log.h"

typedef struct {
	char mask;    /* char data will be bitwise AND with this */
	char lead;    /* start bytes of current char in utf-8 encoded character */
	uint32_t beg; /* beginning of codepoint range */
	uint32_t end; /* end of codepoint range */
	int bits_stored; /* the number of bits from the codepoint that fits in char */
}utf_t;

static utf_t * utf[] = {
	/*             mask        lead        beg      end       bits */
	[0] = &(utf_t){0b00111111, 0b10000000, 0,       0,        6    },
	[1] = &(utf_t){0b01111111, 0b00000000, 0000,    0177,     7    },
	[2] = &(utf_t){0b00011111, 0b11000000, 0200,    03777,    5    },
	[3] = &(utf_t){0b00001111, 0b11100000, 04000,   0177777,  4    },
	[4] = &(utf_t){0b00000111, 0b11110000, 0200000, 04177777, 3    },
	      &(utf_t){0},
};

int codepoint_len(const uint32_t cp)
{
	int len = 0;
	for(utf_t **u = utf; *u; ++u) {
		if((cp >= (*u)->beg) && (cp <= (*u)->end)) {
			break;
		}
		++len;
	}
	if(len > 4) {
		ESP_LOGE("unicode", "invalid unicode cp %d!", cp);
        return 0;
    }

	return len;
}

void to_utf8(char chr[5], const uint32_t cp)
{
	const int bytes = codepoint_len(cp);

	int shift = utf[0]->bits_stored * (bytes - 1);
	chr[0] = (cp >> shift & utf[bytes]->mask) | utf[bytes]->lead;
	shift -= utf[0]->bits_stored;
	for(int i = 1; i < bytes; ++i) {
		chr[i] = (cp >> shift & utf[0]->mask) | utf[0]->lead;
		shift -= utf[0]->bits_stored;
	}
	chr[bytes] = '\0';
}

int utf8_len(uint8_t ch)
{
	int len = 0;
	for(utf_t **u = utf; *u; ++u) {
		if((ch & ~(*u)->mask) == (*u)->lead) {
			break;
		}
		++len;
	}
	if(len > 4) { /* Malformed leading byte */
		exit(1);
	}
	return len;
}

uint32_t to_cp(const char chr[4])
{
	int bytes = utf8_len(*chr);
	int shift = utf[0]->bits_stored * (bytes - 1);
	uint32_t codep = (*chr++ & utf[bytes]->mask) << shift;

	for(int i = 1; i < bytes; ++i, ++chr) {
		shift -= utf[0]->bits_stored;
		codep |= ((char)*chr & utf[0]->mask) << shift;
	}

	return codep;
}
liligo4.7epd support 2021-04-03 15:58:47 +01:00			`#include "unicode.h"`
			`#include "esp_log.h"`

			`typedef struct {`
			`char mask; /* char data will be bitwise AND with this */`
			`char lead; /* start bytes of current char in utf-8 encoded character */`
			`uint32_t beg; /* beginning of codepoint range */`
			`uint32_t end; /* end of codepoint range */`
			`int bits_stored; /* the number of bits from the codepoint that fits in char */`
			`}utf_t;`

			`static utf_t * utf[] = {`
			`/* mask lead beg end bits */`
			`[0] = &(utf_t){0b00111111, 0b10000000, 0, 0, 6 },`
			`[1] = &(utf_t){0b01111111, 0b00000000, 0000, 0177, 7 },`
			`[2] = &(utf_t){0b00011111, 0b11000000, 0200, 03777, 5 },`
			`[3] = &(utf_t){0b00001111, 0b11100000, 04000, 0177777, 4 },`
			`[4] = &(utf_t){0b00000111, 0b11110000, 0200000, 04177777, 3 },`
			`&(utf_t){0},`
			`};`

			`int codepoint_len(const uint32_t cp)`
			`{`
			`int len = 0;`
			`for(utf_t *u = utf; u; ++u) {`
			`if((cp >= (u)->beg) && (cp <= (u)->end)) {`
			`break;`
			`}`
			`++len;`
			`}`
			`if(len > 4) {`
			`ESP_LOGE("unicode", "invalid unicode cp %d!", cp);`
			`return 0;`
			`}`

			`return len;`
			`}`

			`void to_utf8(char chr[5], const uint32_t cp)`
			`{`
			`const int bytes = codepoint_len(cp);`

			`int shift = utf[0]->bits_stored * (bytes - 1);`
			`chr[0] = (cp >> shift & utf[bytes]->mask) \| utf[bytes]->lead;`
			`shift -= utf[0]->bits_stored;`
			`for(int i = 1; i < bytes; ++i) {`
			`chr[i] = (cp >> shift & utf[0]->mask) \| utf[0]->lead;`
			`shift -= utf[0]->bits_stored;`
			`}`
			`chr[bytes] = '\0';`
			`}`

			`int utf8_len(uint8_t ch)`
			`{`
			`int len = 0;`
			`for(utf_t *u = utf; u; ++u) {`
			`if((ch & ~(u)->mask) == (u)->lead) {`
			`break;`
			`}`
			`++len;`
			`}`
			`if(len > 4) { /* Malformed leading byte */`
			`exit(1);`
			`}`
			`return len;`
			`}`

			`uint32_t to_cp(const char chr[4])`
			`{`
			`int bytes = utf8_len(*chr);`
			`int shift = utf[0]->bits_stored * (bytes - 1);`
			`uint32_t codep = (*chr++ & utf[bytes]->mask) << shift;`

			`for(int i = 1; i < bytes; ++i, ++chr) {`
			`shift -= utf[0]->bits_stored;`
			`codep \|= ((char)*chr & utf[0]->mask) << shift;`
			`}`

			`return codep;`
			`}`