PNGdec: Sync with upstream.
* Fixed pixel conversion of 1-bpp grayscale to RGB565 * Improved perf for systems which allow unaligned memory accesses * corrected optimized code to not go past buffer end * speed improvements
This commit is contained in:
parent
6eb0f90e53
commit
fca0bb076a
|
@ -8,6 +8,20 @@
|
|||
#include "inflate.h"
|
||||
#include "inffast.h"
|
||||
|
||||
#if (INTPTR_MAX == INT64_MAX) || defined(HAL_ESP32_HAL_H_) || defined(TEENSYDUINO) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM7)
|
||||
#define ALLOWS_UNALIGNED
|
||||
#endif
|
||||
|
||||
#if INTPTR_MAX == INT64_MAX
|
||||
#define REGISTER_WIDTH 64
|
||||
typedef uint64_t BIGUINT;
|
||||
typedef uint32_t SMALLUINT;
|
||||
#else
|
||||
#define REGISTER_WIDTH 32
|
||||
typedef uint32_t BIGUINT;
|
||||
typedef uint16_t SMALLUINT;
|
||||
#endif // native register size
|
||||
|
||||
#ifdef ASMINF
|
||||
# pragma message("Assembler code may have bugs -- use at your own risk")
|
||||
#else
|
||||
|
@ -64,7 +78,8 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
unsigned whave; /* valid bytes in the window */
|
||||
unsigned wnext; /* window write index */
|
||||
unsigned char FAR *window; /* allocated sliding window, if wsize != 0 */
|
||||
unsigned long hold; /* local strm->hold */
|
||||
BIGUINT hold, tmpbits; /* local strm->hold */
|
||||
// unsigned long hold; /* local strm->hold */
|
||||
unsigned bits; /* local strm->bits */
|
||||
code const FAR *lcode; /* local strm->lencode */
|
||||
code const FAR *dcode; /* local strm->distcode */
|
||||
|
@ -101,11 +116,18 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
/* decode literals and length/distances until end-of-block or not enough
|
||||
input data or output space */
|
||||
do {
|
||||
if (bits < 15) {
|
||||
if (bits < (REGISTER_WIDTH/2)) { // helps on 32 and 64-bit CPUs
|
||||
#ifdef ALLOWS_UNALIGNED
|
||||
tmpbits = *(SMALLUINT *)in;
|
||||
hold |= (BIGUINT)(tmpbits << bits);
|
||||
in += sizeof(SMALLUINT);
|
||||
bits += (REGISTER_WIDTH / 2);
|
||||
#else
|
||||
hold += (unsigned long)(*in++) << bits;
|
||||
bits += 8;
|
||||
hold += (unsigned long)(*in++) << bits;
|
||||
bits += 8;
|
||||
#endif
|
||||
}
|
||||
here = lcode[hold & lmask];
|
||||
dolen:
|
||||
|
@ -123,20 +145,29 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
len = (unsigned)(here.val);
|
||||
op &= 15; /* number of extra bits */
|
||||
if (op) {
|
||||
#if REGISTER_WIDTH == 32
|
||||
if (bits < op) {
|
||||
hold += (unsigned long)(*in++) << bits;
|
||||
hold += (uint32_t)(*in++) << bits;
|
||||
bits += 8;
|
||||
}
|
||||
#endif
|
||||
len += (unsigned)hold & ((1U << op) - 1);
|
||||
hold >>= op;
|
||||
bits -= op;
|
||||
}
|
||||
Tracevv((stderr, "inflate: length %u\n", len));
|
||||
if (bits < 15) {
|
||||
if (bits < (REGISTER_WIDTH/2)) { // helps on 32 and 64-bit CPUs
|
||||
#ifdef UNALIGNED_OK
|
||||
tmpbits = *(SMALLUINT *)in;
|
||||
hold |= (BIGUINT)(tmpbits << bits);
|
||||
in += sizeof(SMALLUINT);
|
||||
bits += (REGISTER_WIDTH / 2);
|
||||
#else
|
||||
hold += (unsigned long)(*in++) << bits;
|
||||
bits += 8;
|
||||
hold += (unsigned long)(*in++) << bits;
|
||||
bits += 8;
|
||||
#endif
|
||||
}
|
||||
here = dcode[hold & dmask];
|
||||
dodist:
|
||||
|
@ -147,14 +178,22 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
if (op & 16) { /* distance base */
|
||||
dist = (unsigned)(here.val);
|
||||
op &= 15; /* number of extra bits */
|
||||
#if REGISTER_WIDTH == 32
|
||||
if (bits < op) {
|
||||
#ifdef ALLOWS_UNALIGNED
|
||||
hold |= (*(uint16_t *)in << bits);
|
||||
bits += 16;
|
||||
in += 2;
|
||||
#else
|
||||
hold += (unsigned long)(*in++) << bits;
|
||||
bits += 8;
|
||||
if (bits < op) {
|
||||
if (bits < op) { // this is NEVER true
|
||||
hold += (unsigned long)(*in++) << bits;
|
||||
bits += 8;
|
||||
}
|
||||
#endif // ALLOWS_UNALIGNED
|
||||
}
|
||||
#endif // 32-bit CPU
|
||||
dist += (unsigned)hold & ((1U << op) - 1);
|
||||
#ifdef INFLATE_STRICT
|
||||
if (dist > dmax) {
|
||||
|
@ -236,12 +275,18 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
from = out - dist; /* rest from output */
|
||||
}
|
||||
}
|
||||
// if (len > 50 && len < dist) {
|
||||
// memmove(out, from, len);
|
||||
// out += len;
|
||||
// from += len;
|
||||
// len = 0;
|
||||
// } else {
|
||||
#ifdef ALLOWS_UNALIGNED
|
||||
{
|
||||
uint8_t *pEnd = out+len;
|
||||
while (out < pEnd) {
|
||||
*(uint32_t *)out = *(uint32_t *)from;
|
||||
out += 4;
|
||||
from += 4;
|
||||
}
|
||||
// correct for possible overshoot of destination ptr
|
||||
out = pEnd;
|
||||
}
|
||||
#else
|
||||
while (len > 2) {
|
||||
*out++ = *from++;
|
||||
*out++ = *from++;
|
||||
|
@ -253,22 +298,38 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
if (len > 1)
|
||||
*out++ = *from++;
|
||||
}
|
||||
// }
|
||||
#endif // ALLOWS_UNALIGNED
|
||||
}
|
||||
else {
|
||||
from = out - dist; /* copy direct from output */
|
||||
// Larry Bank added -
|
||||
// For relatively large runs, it's faster to let memmove
|
||||
// use whatever code is efficient on the target platform
|
||||
// if (dist == 1) { // frequent case for images
|
||||
// memset(out, *from, len);
|
||||
// out += len;
|
||||
// } else if (len > 50 && len < dist) {
|
||||
// memmove(out, from, len);
|
||||
// out += len;
|
||||
// from += len;
|
||||
// len = 0;
|
||||
// } else {
|
||||
#ifdef ALLOWS_UNALIGNED
|
||||
{
|
||||
uint8_t *pEnd = out+len;
|
||||
int overlap = (int)(intptr_t)(out-from);
|
||||
if (overlap >= 4) { // overlap of source/dest won't impede normal copy
|
||||
while (out < pEnd) {
|
||||
*(uint32_t *)out = *(uint32_t *)from;
|
||||
out += 4;
|
||||
from += 4;
|
||||
}
|
||||
// correct for possible overshoot of destination ptr
|
||||
out = pEnd;
|
||||
} else if (overlap == 1) { // copy 1-byte pattern
|
||||
uint32_t pattern = *from;
|
||||
pattern = pattern | (pattern << 8);
|
||||
pattern = pattern | (pattern << 16);
|
||||
while (out < pEnd) {
|
||||
*(uint32_t *)out = pattern;
|
||||
out += 4;
|
||||
}
|
||||
out = pEnd; // correct possible overshoot
|
||||
} else { // overlap of 2 or 3
|
||||
while (out < pEnd) {
|
||||
*out++ = *from++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
do { /* minimum length is three */
|
||||
*out++ = *from++;
|
||||
*out++ = *from++;
|
||||
|
@ -280,7 +341,7 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
if (len > 1)
|
||||
*out++ = *from++;
|
||||
}
|
||||
// }
|
||||
#endif // ALLOWS_UNALIGNED
|
||||
}
|
||||
}
|
||||
else if ((op & 64) == 0) { /* 2nd level distance code */
|
||||
|
@ -310,10 +371,10 @@ unsigned start; /* inflate()'s starting value for strm->avail_out */
|
|||
} while (in < last && out < end);
|
||||
|
||||
/* return unused bytes (on entry, bits < 8, so in won't go too far back) */
|
||||
len = bits >> 3;
|
||||
in -= len;
|
||||
bits -= len << 3;
|
||||
hold &= (1U << bits) - 1;
|
||||
// len = bits >> 3;
|
||||
// in -= len;
|
||||
// bits -= len << 3;
|
||||
// hold &= (1 << bits) - 1;
|
||||
|
||||
/* update state and return */
|
||||
strm->next_in = in;
|
||||
|
|
|
@ -85,6 +85,10 @@
|
|||
#include "inflate.h"
|
||||
#include "inffast.h"
|
||||
|
||||
#if (INTPTR_MAX == INT64_MAX) || defined(HAL_ESP32_HAL_H_) || defined(TEENSYDUINO) || defined(ARM_MATH_CM4) || defined(ARM_MATH_CM7)
|
||||
#define ALLOWS_UNALIGNED
|
||||
#endif
|
||||
|
||||
#ifdef MAKEFIXED
|
||||
# ifndef BUILDFIXED
|
||||
# define BUILDFIXED
|
||||
|
@ -262,7 +266,8 @@ int value;
|
|||
state->bits = 0;
|
||||
return Z_OK;
|
||||
}
|
||||
if (bits > 16 || state->bits + (uInt)bits > 32) return Z_STREAM_ERROR;
|
||||
if (bits > 16 || state->bits + (uInt)bits > 32)
|
||||
return Z_STREAM_ERROR;
|
||||
value &= (1L << bits) - 1;
|
||||
state->hold += (unsigned)value << state->bits;
|
||||
state->bits += (uInt)bits;
|
||||
|
@ -1191,9 +1196,39 @@ int check_crc;
|
|||
if (copy > left) copy = left;
|
||||
left -= copy;
|
||||
state->length -= copy;
|
||||
#ifdef ALLOWS_UNALIGNED
|
||||
{
|
||||
uint8_t *pEnd = put+copy;
|
||||
int overlap = (int)(intptr_t)(put-from);
|
||||
if (overlap >= 4) { // overlap of source/dest won't impede normal copy
|
||||
while (put < pEnd-3) { // overwriting the output buffer here would be bad, so respect the true length
|
||||
*(uint32_t *)put = *(uint32_t *)from;
|
||||
put += 4;
|
||||
from += 4;
|
||||
}
|
||||
while (put < pEnd) { // tail end
|
||||
*put++ = *from++;
|
||||
}
|
||||
} else if (overlap == 1) { // copy 1-byte pattern
|
||||
uint32_t pattern = *from;
|
||||
pattern = pattern | (pattern << 8);
|
||||
pattern = pattern | (pattern << 16);
|
||||
while (put < pEnd) {
|
||||
*(uint32_t *)put = pattern;
|
||||
put += 4;
|
||||
}
|
||||
put = pEnd; // correct possible overshoot
|
||||
} else { // overlap of 2 or 3
|
||||
while (put < pEnd) {
|
||||
*put++ = *from++;
|
||||
}
|
||||
}
|
||||
}
|
||||
#else
|
||||
do {
|
||||
*put++ = *from++;
|
||||
} while (--copy);
|
||||
#endif // ALLOWS_UNALIGNED
|
||||
if (state->length == 0) state->mode = LEN;
|
||||
break;
|
||||
case LIT:
|
||||
|
|
|
@ -98,7 +98,8 @@ struct inflate_state {
|
|||
unsigned wnext; /* window write index */
|
||||
unsigned char FAR *window; /* allocated sliding window, if needed */
|
||||
/* bit accumulator */
|
||||
unsigned long hold; /* input bit accumulator */
|
||||
uint64_t hold; /* input bit accumulator */
|
||||
// unsigned long hold; /* input bit accumulator */
|
||||
unsigned bits; /* number of bits in "in" */
|
||||
/* for string and stored block copying */
|
||||
unsigned length; /* literal or length of data to copy */
|
||||
|
|
|
@ -243,15 +243,33 @@ PNG_STATIC void PNGRGB565(PNGDRAW *pDraw, uint16_t *pPixels, int iEndiannes, uin
|
|||
}
|
||||
break;
|
||||
case PNG_PIXEL_GRAYSCALE:
|
||||
for (x=0; x<pDraw->iWidth; x++) {
|
||||
c = *s++;
|
||||
usPixel = (c >> 3); // blue
|
||||
usPixel |= ((c >> 2) << 5); // green
|
||||
usPixel |= ((c >> 3) << 11); // red
|
||||
if (iEndiannes == PNG_RGB565_BIG_ENDIAN)
|
||||
usPixel = __builtin_bswap16(usPixel);
|
||||
*pDest++ = usPixel;
|
||||
}
|
||||
switch (pDraw->iBpp) {
|
||||
case 8:
|
||||
for (x=0; x<pDraw->iWidth; x++) {
|
||||
c = *s++;
|
||||
usPixel = (c >> 3); // blue
|
||||
usPixel |= ((c >> 2) << 5); // green
|
||||
usPixel |= ((c >> 3) << 11); // red
|
||||
if (iEndiannes == PNG_RGB565_BIG_ENDIAN)
|
||||
usPixel = __builtin_bswap16(usPixel);
|
||||
*pDest++ = usPixel;
|
||||
}
|
||||
break;
|
||||
case 1:
|
||||
for (x=0; x<pDraw->iWidth; x++) {
|
||||
if ((x & 7) == 0) {
|
||||
c = *s++;
|
||||
}
|
||||
if (c & 0x80) {
|
||||
usPixel = 0xffff;
|
||||
} else {
|
||||
usPixel = 0;
|
||||
}
|
||||
*pDest++ = usPixel;
|
||||
c <<= 1;
|
||||
}
|
||||
break;
|
||||
} // switch on bpp
|
||||
break;
|
||||
case PNG_PIXEL_TRUECOLOR:
|
||||
for (x=0; x<pDraw->iWidth; x++) {
|
||||
|
@ -302,15 +320,15 @@ PNG_STATIC void PNGRGB565(PNGDRAW *pDraw, uint16_t *pPixels, int iEndiannes, uin
|
|||
}
|
||||
break;
|
||||
case 1:
|
||||
for (x=0; x<pDraw->iWidth; x+=4) {
|
||||
c = *s++;
|
||||
for (j=0; j<8; j++) { // work on pairs of bits
|
||||
usPixel = pDraw->pFastPalette[c >> 7];
|
||||
if (iEndiannes == PNG_RGB565_BIG_ENDIAN)
|
||||
usPixel = __builtin_bswap16(usPixel);
|
||||
*pDest++ = usPixel;
|
||||
c <<= 1;
|
||||
for (x=0; x<pDraw->iWidth; x++) {
|
||||
if ((x & 7) == 0) {
|
||||
c = *s++;
|
||||
}
|
||||
usPixel = pDraw->pFastPalette[c >> 7];
|
||||
if (iEndiannes == PNG_RGB565_BIG_ENDIAN)
|
||||
usPixel = __builtin_bswap16(usPixel);
|
||||
*pDest++ = usPixel;
|
||||
c <<= 1;
|
||||
}
|
||||
break;
|
||||
} // switch on bpp
|
||||
|
@ -379,18 +397,18 @@ PNG_STATIC void PNGRGB565(PNGDRAW *pDraw, uint16_t *pPixels, int iEndiannes, uin
|
|||
}
|
||||
break;
|
||||
case 1:
|
||||
for (x=0; x<pDraw->iWidth; x+=4) {
|
||||
c = *s++;
|
||||
for (j=0; j<8; j++) { // work on pairs of bits
|
||||
pPal = &pDraw->pPalette[(c >> 7) * 3];
|
||||
usPixel = (pPal[2] >> 3); // blue
|
||||
usPixel |= ((pPal[1] >> 2) << 5); // green
|
||||
usPixel |= ((pPal[0] >> 3) << 11); // red
|
||||
if (iEndiannes == PNG_RGB565_BIG_ENDIAN)
|
||||
usPixel = __builtin_bswap16(usPixel);
|
||||
*pDest++ = usPixel;
|
||||
c <<= 1;
|
||||
for (x=0; x<pDraw->iWidth; x++) {
|
||||
if ((x & 7) == 0) {
|
||||
c = *s++;
|
||||
}
|
||||
pPal = &pDraw->pPalette[(c >> 7) * 3];
|
||||
usPixel = (pPal[2] >> 3); // blue
|
||||
usPixel |= ((pPal[1] >> 2) << 5); // green
|
||||
usPixel |= ((pPal[0] >> 3) << 11); // red
|
||||
if (iEndiannes == PNG_RGB565_BIG_ENDIAN)
|
||||
usPixel = __builtin_bswap16(usPixel);
|
||||
*pDest++ = usPixel;
|
||||
c <<= 1;
|
||||
}
|
||||
break;
|
||||
} // switch on bits per pixel
|
||||
|
|
Loading…
Reference in New Issue