stb_image_resize: 2.04

This commit is contained in:
Sean Barrett 2023-12-14 03:09:00 -08:00
parent 4da08a1dbd
commit 9d924f8a47
1 changed files with 47 additions and 25 deletions

View File

@ -1,4 +1,4 @@
/* stb_image_resize2 - v2.01 - public domain image resizing
/* stb_image_resize2 - v2.04 - public domain image resizing
by Jeff Roberts (v2) and Jorge L Rodriguez
http://github.com/nothings/stb
@ -328,9 +328,11 @@
Nathan Reed: warning fixes for 1.0
REVISIONS
2.00 (2022-02-20) mostly new source: new api, optimizations, simd, vertical-first, etc
(2x-5x faster without simd, 4x-12x faster with simd)
(in some cases, 20x to 40x faster - resizing to very small for example)
2.04 (2023-11-17) Fix for rare AVX bug, shadowed symbol (thanks Nikola Smiljanic).
2.03 (2023-11-01) ASAN and TSAN warnings fixed, minor tweaks.
2.00 (2023-10-10) mostly new source: new api, optimizations, simd, vertical-first, etc
(2x-5x faster without simd, 4x-12x faster with simd)
(in some cases, 20x to 40x faster - resizing to very small for example)
0.96 (2019-03-04) fixed warnings
0.95 (2017-07-23) fixed warnings
0.94 (2017-03-18) fixed warnings
@ -450,25 +452,33 @@ typedef uint64_t stbir_uint64;
// for back compatibility, you can cast the old channel count to an stbir_pixel_layout
typedef enum
{
STBIR_BGR = 0, // 3-chan, with order specified (for channel flipping)
STBIR_1CHANNEL = 1,
STBIR_2CHANNEL = 2,
STBIR_RGB = 3, // 3-chan, with order specified (for channel flipping)
STBIR_RGBA = 4, // alpha formats, alpha is NOT premultiplied into color channels
STBIR_BGR = 0, // 3-chan, with order specified (for channel flipping)
STBIR_4CHANNEL = 5,
STBIR_RGBA = 4, // alpha formats, where alpha is NOT premultiplied into color channels
STBIR_BGRA = 6,
STBIR_ARGB = 7,
STBIR_ABGR = 8,
STBIR_RA = 9,
STBIR_AR = 10,
STBIR_RGBA_PM = 11, // alpha formats, alpha is premultiplied into color channels
STBIR_RGBA_PM = 11, // alpha formats, where alpha is premultiplied into color channels
STBIR_BGRA_PM = 12,
STBIR_ARGB_PM = 13,
STBIR_ABGR_PM = 14,
STBIR_RA_PM = 15,
STBIR_AR_PM = 16,
STBIR_RGBA_NO_AW = 11, // alpha formats, where NO alpha weighting is applied at all!
STBIR_BGRA_NO_AW = 12, // these are just synonyms for the _PM flags (which also do
STBIR_ARGB_NO_AW = 13, // no alpha weighting). These names just make it more clear
STBIR_ABGR_NO_AW = 14, // for some folks).
STBIR_RA_NO_AW = 15,
STBIR_AR_NO_AW = 16,
} stbir_pixel_layout;
//===============================================================
@ -1172,6 +1182,10 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
#define STBIR_FORCE_GATHER_FILTER_SCANLINES_AMOUNT 32 // when downsampling and <= 32 scanlines of buffering, use gather. gather used down to 1/8th scaling for 25% win.
#endif
#ifndef STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS
#define STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS 4 // when threading, what is the minimum number of scanlines for a split?
#endif
// restrict pointers for the output pointers
#if defined( _MSC_VER ) && !defined(__clang__)
#define STBIR_STREAMOUT_PTR( star ) star __restrict
@ -1549,7 +1563,6 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
#define stbir__simdf8_0123to2222( out, in ) (out) = stbir__simdf_swiz(_mm256_castps256_ps128(in), 2,2,2,2 )
#define stbir__simdf8_load2( out, ptr ) (out) = _mm256_castsi256_ps(_mm256_castsi128_si256( _mm_loadl_epi64( (__m128i*)(ptr)) )) // top values can be random (not denormal or nan for perf)
#define stbir__simdf8_load4b( out, ptr ) (out) = _mm256_broadcast_ps( (__m128 const *)(ptr) )
static __m256i stbir_00112233 = { STBIR__CONST_4d_32i( 0, 0, 1, 1 ), STBIR__CONST_4d_32i( 2, 2, 3, 3 ) };
@ -1582,11 +1595,11 @@ static stbir__inline stbir_uint8 stbir__linear_to_srgb_uchar(float in)
#ifdef STBIR_USE_FMA // not on by default to maintain bit identical simd to non-simd
#define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_fmadd_ps( mul1, mul2, add )
#define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ), add )
#define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_fmadd_ps( _mm256_castps128_ps256( mul ), _mm256_castps128_ps256( _mm_loadu_ps( (float const*)(ptr) ) ), add )
#define stbir__simdf8_madd_mem4( out, add, mul, ptr )(out) = _mm256_fmadd_ps( _mm256_setr_m128( mul, _mm_setzero_ps() ), _mm256_setr_m128( _mm_loadu_ps( (float const*)(ptr) ), _mm_setzero_ps() ), add )
#else
#define stbir__simdf8_madd( out, add, mul1, mul2 ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul1, mul2 ) )
#define stbir__simdf8_madd_mem( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_mul_ps( mul, _mm256_loadu_ps( (float const*)(ptr) ) ) )
#define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_castps128_ps256( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ) ) )
#define stbir__simdf8_madd_mem4( out, add, mul, ptr ) (out) = _mm256_add_ps( add, _mm256_setr_m128( _mm_mul_ps( mul, _mm_loadu_ps( (float const*)(ptr) ) ), _mm_setzero_ps() ) )
#endif
#define stbir__if_simdf8_cast_to_simdf4( val ) _mm256_castps256_ps128( val )
@ -3697,7 +3710,7 @@ static int stbir__pack_coefficients( int num_contributors, stbir__contributors*
float * coeffs = coefficents + widest * ( num_contributors - 1 );
// go until no chance of clipping (this is usually less than 8 lops)
while ( ( ( contribs->n0 + widest*2 ) >= row_width ) && ( contribs >= contributors ) )
while ( ( contribs >= contributors ) && ( ( contribs->n0 + widest*2 ) >= row_width ) )
{
// might we clip??
if ( ( contribs->n0 + widest ) > row_width )
@ -4652,10 +4665,10 @@ static void stbir__decode_scanline(stbir__info const * stbir_info, int n, float
stbir__simdf8_madd( tot0, tot0, c, d ); }
#define stbir__store_output() \
{ stbir__simdf t,c; \
{ stbir__simdf t,d; \
stbir__simdf8_add4halves( t, stbir__if_simdf8_cast_to_simdf4(tot0), tot0 ); \
stbir__simdf_0123to2301( c, t ); \
stbir__simdf_add( t, t, c ); \
stbir__simdf_0123to2301( d, t ); \
stbir__simdf_add( t, t, d ); \
stbir__simdf_store2( output, t ); \
horizontal_coefficients += coefficient_width; \
++horizontal_contributors; \
@ -7389,7 +7402,6 @@ static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layou
resize->output_cb = 0;
resize->user_data = resize;
resize->samplers = 0;
resize->needs_rebuild = 1;
resize->called_alloc = 0;
resize->horizontal_filter = STBIR_FILTER_DEFAULT;
resize->horizontal_filter_kernel = 0; resize->horizontal_filter_support = 0;
@ -7403,6 +7415,7 @@ static void stbir__init_and_set_layout( STBIR_RESIZE * resize, stbir_pixel_layou
resize->output_data_type = data_type;
resize->input_pixel_layout_public = pixel_layout;
resize->output_pixel_layout_public = pixel_layout;
resize->needs_rebuild = 1;
}
STBIRDEF void stbir_resize_init( STBIR_RESIZE * resize,
@ -7428,17 +7441,27 @@ STBIRDEF void stbir_set_datatypes( STBIR_RESIZE * resize, stbir_datatype input_t
{
resize->input_data_type = input_type;
resize->output_data_type = output_type;
if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
stbir__update_info_from_resize( resize->samplers, resize );
}
STBIRDEF void stbir_set_pixel_callbacks( STBIR_RESIZE * resize, stbir_input_callback * input_cb, stbir_output_callback * output_cb ) // no callbacks by default
{
resize->input_cb = input_cb;
resize->output_cb = output_cb;
if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
{
resize->samplers->in_pixels_cb = input_cb;
resize->samplers->out_pixels_cb = output_cb;
}
}
STBIRDEF void stbir_set_user_data( STBIR_RESIZE * resize, void * user_data ) // pass back STBIR_RESIZE* by default
{
resize->user_data = user_data;
if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
resize->samplers->user_data = user_data;
}
STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_pixels, int input_stride_in_bytes, void * output_pixels, int output_stride_in_bytes )
@ -7447,6 +7470,8 @@ STBIRDEF void stbir_set_buffer_ptrs( STBIR_RESIZE * resize, const void * input_p
resize->input_stride_in_bytes = input_stride_in_bytes;
resize->output_pixels = output_pixels;
resize->output_stride_in_bytes = output_stride_in_bytes;
if ( ( resize->samplers ) && ( !resize->needs_rebuild ) )
stbir__update_info_from_resize( resize->samplers, resize );
}
@ -7593,9 +7618,9 @@ static int stbir__perform_build( STBIR_RESIZE * resize, int splits )
stbir__get_conservative_extents( &horizontal, &conservative, resize->user_data );
stbir__set_sampler(&vertical, resize->vertical_filter, resize->horizontal_filter_kernel, resize->vertical_filter_support, resize->vertical_edge, &vertical.scale_info, 0, resize->user_data );
if ( ( vertical.scale_info.output_sub_size / splits ) < 4 ) // each split should be a minimum of 4 scanlines (handwavey choice)
if ( ( vertical.scale_info.output_sub_size / splits ) < STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS ) // each split should be a minimum of 4 scanlines (handwavey choice)
{
splits = vertical.scale_info.output_sub_size / 4;
splits = vertical.scale_info.output_sub_size / STBIR_FORCE_MINIMUM_SCANLINES_FOR_SPLITS;
if ( splits == 0 ) splits = 1;
}
@ -7612,6 +7637,10 @@ static int stbir__perform_build( STBIR_RESIZE * resize, int splits )
#ifdef STBIR_PROFILE
STBIR_MEMCPY( &out_info->profile, &profile_infod.profile, sizeof( out_info->profile ) );
#endif
// update anything that can be changed without recalcing samplers
stbir__update_info_from_resize( out_info, resize );
return splits;
}
@ -7680,10 +7709,6 @@ STBIRDEF int stbir_resize_extended( STBIR_RESIZE * resize )
STBIR_PROFILE_BUILD_CLEAR( resize->samplers );
}
// update anything that can be changed without recalcing samplers
stbir__update_info_from_resize( resize->samplers, resize );
// do resize
result = stbir__perform_resize( resize->samplers, 0, resize->splits );
@ -7712,9 +7737,6 @@ STBIRDEF int stbir_resize_extended_split( STBIR_RESIZE * resize, int split_start
if ( ( split_start >= resize->splits ) || ( split_start < 0 ) || ( ( split_start + split_count ) > resize->splits ) || ( split_count <= 0 ) )
return 0;
// update anything that can be changed without recalcing samplers
stbir__update_info_from_resize( resize->samplers, resize );
// do resize
return stbir__perform_resize( resize->samplers, split_start, split_count );
}