From 07268cbf36f749c058e9dde1728fd10d4bec02d0 Mon Sep 17 00:00:00 2001 From: Fabian Giesen Date: Tue, 2 May 2023 01:17:05 -0700 Subject: [PATCH] stb_image: New Paeth filter This formulation is equivalent to the original (reference) implementation but runs _significantly_ faster - this speeds up the filtering portion of a Paeth-heavy 8192x8192 16-bit/channel image by a factor of more than 2 on a Zen2 CPU. I'm investigating doing a more thorough restructuring of this pass, but this seems like a good first step. --- stb_image.h | 16 +++++++------- tests/test_png_paeth.c | 47 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 56 insertions(+), 7 deletions(-) create mode 100644 tests/test_png_paeth.c diff --git a/stb_image.h b/stb_image.h index 50aea70..c59df5b 100644 --- a/stb_image.h +++ b/stb_image.h @@ -4654,13 +4654,15 @@ static stbi_uc first_row_filter[5] = static int stbi__paeth(int a, int b, int c) { - int p = a + b - c; - int pa = abs(p-a); - int pb = abs(p-b); - int pc = abs(p-c); - if (pa <= pb && pa <= pc) return a; - if (pb <= pc) return b; - return c; + // This formulation looks very different from the reference in the PNG spec, but is + // actually equivalent and has favorable data dependencies and admits straightforward + // generation of branch-free code, which helps performance significantly. + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; } static const stbi_uc stbi__depth_scale_table[9] = { 0, 0xff, 0x55, 0, 0x11, 0,0,0, 0x01 }; diff --git a/tests/test_png_paeth.c b/tests/test_png_paeth.c new file mode 100644 index 0000000..69ba37f --- /dev/null +++ b/tests/test_png_paeth.c @@ -0,0 +1,47 @@ +#include +#include + +// Reference Paeth filter as per PNG spec +static int ref_paeth(int a, int b, int c) +{ + int p = a + b - c; + int pa = abs(p-a); + int pb = abs(p-b); + int pc = abs(p-c); + if (pa <= pb && pa <= pc) return a; + if (pb <= pc) return b; + return c; +} + +// Optimized Paeth filter +static int opt_paeth(int a, int b, int c) +{ + int thresh = c*3 - (a + b); + int lo = a < b ? a : b; + int hi = a < b ? b : a; + int t0 = (hi <= thresh) ? lo : c; + int t1 = (thresh <= lo) ? hi : t0; + return t1; +} + +int main() +{ + // Exhaustively test the functions match for all byte inputs a, b,c in [0,255] + for (int i = 0; i < (1 << 24); ++i) { + int a = i & 0xff; + int b = (i >> 8) & 0xff; + int c = (i >> 16) & 0xff; + + int ref = ref_paeth(a, b, c); + int opt = opt_paeth(a, b, c); + if (ref != opt) { + fprintf(stderr, "mismatch at a=%3d b=%3d c=%3d: ref=%3d opt=%3d\n", a, b, c, ref, opt); + return 1; + } + } + + printf("all ok!\n"); + return 0; +} + +// vim:sw=3:sts=3:et