mirror of https://github.com/nothings/stb.git
993 lines
29 KiB
C
993 lines
29 KiB
C
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
//#define HEAVYTM
|
|
#include "tm.h"
|
|
|
|
#ifdef RADUSETM3
|
|
tm_api * g_tm_api;
|
|
//#define PROFILE_MODE
|
|
#endif
|
|
|
|
#include <math.h>
|
|
|
|
#ifdef _MSC_VER
|
|
#define stop() __debugbreak()
|
|
#include <windows.h>
|
|
#define int64 __int64
|
|
#define uint64 unsigned __int64
|
|
#else
|
|
#define stop() __builtin_trap()
|
|
#define int64 long long
|
|
#define uint64 unsigned long long
|
|
#endif
|
|
|
|
#ifdef _MSC_VER
|
|
#pragma warning(disable:4127)
|
|
#endif
|
|
|
|
//#define NOCOMP
|
|
|
|
|
|
//#define PROFILE_NEW_ONLY
|
|
//#define PROFILE_MODE
|
|
|
|
|
|
#if defined(_x86_64) || defined( __x86_64__ ) || defined( _M_X64 ) || defined(__x86_64) || defined(__SSE2__) || defined(STBIR_SSE) || defined( _M_IX86_FP ) || defined(__i386) || defined( __i386__ ) || defined( _M_IX86 ) || defined( _X86_ )
|
|
|
|
#ifdef _MSC_VER
|
|
|
|
uint64 __rdtsc();
|
|
#define __cycles() __rdtsc()
|
|
|
|
#else // non msvc
|
|
|
|
static inline uint64 __cycles()
|
|
{
|
|
unsigned int lo, hi;
|
|
asm volatile ("rdtsc" : "=a" (lo), "=d" (hi) );
|
|
return ( ( (uint64) hi ) << 32 ) | ( (uint64) lo );
|
|
}
|
|
|
|
#endif // msvc
|
|
|
|
#elif defined( _M_ARM64 ) || defined( __aarch64__ ) || defined( __arm64__ ) || defined(__ARM_NEON__)
|
|
|
|
#ifdef _MSC_VER
|
|
|
|
#define __cycles() _ReadStatusReg(ARM64_CNTVCT)
|
|
|
|
#else
|
|
|
|
static inline uint64 __cycles()
|
|
{
|
|
uint64 tsc;
|
|
asm volatile("mrs %0, cntvct_el0" : "=r" (tsc));
|
|
return tsc;
|
|
}
|
|
|
|
#endif
|
|
|
|
#else // x64, arm
|
|
|
|
#error Unknown platform for timing.
|
|
|
|
#endif //x64 and
|
|
|
|
|
|
#ifdef PROFILE_MODE
|
|
|
|
#define STBIR_ASSERT(cond)
|
|
|
|
#endif
|
|
|
|
#ifdef _DEBUG
|
|
#undef STBIR_ASSERT
|
|
#define STBIR_ASSERT(cond) { if (!(cond)) stop(); }
|
|
#endif
|
|
|
|
|
|
#define SHRINKBYW 2
|
|
#define ZOOMBYW 2
|
|
#define SHRINKBYH 2
|
|
#define ZOOMBYH 2
|
|
|
|
|
|
int mem_count = 0;
|
|
|
|
#ifdef TEST_WITH_VALLOC
|
|
|
|
#define STBIR__SEPARATE_ALLOCATIONS
|
|
|
|
#if TEST_WITH_LIMIT_AT_FRONT
|
|
|
|
void * wmalloc(SIZE_T size)
|
|
{
|
|
static unsigned int pagesize=0;
|
|
void* p;
|
|
SIZE_T s;
|
|
|
|
// get the page size, if we haven't yet
|
|
if (pagesize==0)
|
|
{
|
|
SYSTEM_INFO si;
|
|
GetSystemInfo(&si);
|
|
pagesize=si.dwPageSize;
|
|
}
|
|
|
|
// we need room for the size, 8 bytes to hide the original pointer and a
|
|
// validation dword, and enough data to completely fill one page
|
|
s=(size+(pagesize-1))&~(pagesize-1);
|
|
|
|
// allocate the size plus a page (for the guard)
|
|
p=VirtualAlloc(0,(SIZE_T)s,MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE);
|
|
|
|
return p;
|
|
}
|
|
|
|
void wfree(void * ptr)
|
|
{
|
|
if (ptr)
|
|
{
|
|
if ( ((ptrdiff_t)ptr) & 4095 ) stop();
|
|
if ( VirtualFree(ptr,0,MEM_RELEASE) == 0 ) stop();
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
void * wmalloc(SIZE_T size)
|
|
{
|
|
static unsigned int pagesize=0;
|
|
void* p;
|
|
SIZE_T s;
|
|
|
|
// get the page size, if we haven't yet
|
|
if (pagesize==0)
|
|
{
|
|
SYSTEM_INFO si;
|
|
GetSystemInfo(&si);
|
|
pagesize=si.dwPageSize;
|
|
}
|
|
|
|
// we need room for the size, 8 bytes to hide the original pointer and a
|
|
// validation dword, and enough data to completely fill one page
|
|
s=(size+16+(pagesize-1))&~(pagesize-1);
|
|
|
|
// allocate the size plus a page (for the guard)
|
|
p=VirtualAlloc(0,(SIZE_T)(s+pagesize+pagesize),MEM_RESERVE|MEM_COMMIT,PAGE_READWRITE);
|
|
|
|
if (p)
|
|
{
|
|
DWORD oldprot;
|
|
void* orig=p;
|
|
|
|
// protect the first page
|
|
VirtualProtect(((char*)p),pagesize,PAGE_NOACCESS,&oldprot);
|
|
|
|
// protect the final page
|
|
VirtualProtect(((char*)p)+s+pagesize,pagesize,PAGE_NOACCESS,&oldprot);
|
|
|
|
// now move the returned pointer so that it bumps right up against the
|
|
// the next (protected) page (this may result in unaligned return
|
|
// addresses - pre-align the sizes if you always want aligned ptrs)
|
|
//#define ERROR_ON_FRONT
|
|
#ifdef ERROR_ON_FRONT
|
|
p=((char*)p)+pagesize+16;
|
|
#else
|
|
p=((char*)p)+(s-size)+pagesize;
|
|
#endif
|
|
|
|
// hide the validation value and the original pointer (which we'll
|
|
// need used for freeing) right behind the returned pointer
|
|
((unsigned int*)p)[-1]=0x98765432;
|
|
((void**)p)[-2]=orig;
|
|
++mem_count;
|
|
//printf("aloc: %p bytes: %d\n",p,(int)size);
|
|
return(p);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void wfree(void * ptr)
|
|
{
|
|
if (ptr)
|
|
{
|
|
int err=0;
|
|
|
|
// is this one of our allocations?
|
|
if (((((unsigned int*)ptr)[-1])!=0x98765432) || ((((void**)ptr)[-2])==0))
|
|
{
|
|
err=1;
|
|
}
|
|
|
|
if (err)
|
|
{
|
|
__debugbreak();
|
|
}
|
|
else
|
|
{
|
|
|
|
// back up to find the original pointer
|
|
void* p=((void**)ptr)[-2];
|
|
|
|
// clear the validation value and the original pointer
|
|
((unsigned int*)ptr)[-1]=0;
|
|
((void**)ptr)[-2]=0;
|
|
|
|
//printf("free: %p\n",ptr);
|
|
|
|
--mem_count;
|
|
|
|
// now free the pages
|
|
if (p)
|
|
VirtualFree(p,0,MEM_RELEASE);
|
|
|
|
}
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
#define STBIR_MALLOC(size,user_data) ((void)(user_data), wmalloc(size))
|
|
#define STBIR_FREE(ptr,user_data) ((void)(user_data), wfree(ptr))
|
|
|
|
#endif
|
|
|
|
#define STBIR_PROFILE
|
|
//#define STBIR_NO_SIMD
|
|
//#define STBIR_AVX
|
|
//#define STBIR_AVX2
|
|
#define STB_IMAGE_RESIZE_IMPLEMENTATION
|
|
#include "stb_image_resize2.h" // new one!
|
|
|
|
#define STB_IMAGE_WRITE_IMPLEMENTATION
|
|
#include "stb_image_write.h"
|
|
|
|
int tsizes[5] = { 1, 1, 2, 4, 2 };
|
|
int ttypes[5] = { STBIR_TYPE_UINT8, STBIR_TYPE_UINT8_SRGB, STBIR_TYPE_UINT16, STBIR_TYPE_FLOAT, STBIR_TYPE_HALF_FLOAT };
|
|
|
|
int cedges[4] = { STBIR_EDGE_CLAMP, STBIR_EDGE_REFLECT, STBIR_EDGE_ZERO, STBIR_EDGE_WRAP };
|
|
int flts[5] = { STBIR_FILTER_BOX, STBIR_FILTER_TRIANGLE, STBIR_FILTER_CUBICBSPLINE, STBIR_FILTER_CATMULLROM, STBIR_FILTER_MITCHELL };
|
|
int buffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
|
|
STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR,
|
|
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
|
|
STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR,
|
|
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
|
|
};
|
|
int obuffers[20] = { STBIR_1CHANNEL, STBIR_2CHANNEL, STBIR_RGB, STBIR_4CHANNEL,
|
|
STBIR_BGRA, STBIR_ARGB, STBIR_RA, STBIR_AR,
|
|
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
|
|
STBIR_RGBA_PM, STBIR_ARGB_PM, STBIR_RA_PM, STBIR_AR_PM,
|
|
STBIR_RGBA, STBIR_ARGB, STBIR_RA, STBIR_AR,
|
|
};
|
|
|
|
int bchannels[20] = { 1, 2, 3, 4, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2, 4,4, 2,2 };
|
|
int alphapos[20] = { -1, -1, -1, -1, 3,0, 1,0, 3,0, 1,0, 3,0, 1,0,3,0, 1,0 };
|
|
|
|
|
|
char const * buffstrs[20] = { "1ch", "2ch", "3ch", "4ch", "RGBA", "ARGB", "RA", "AR", "RGBA_both_pre", "ARGB_both_pre", "RA_both_pre", "AR_both_pre", "RGBA_out_pre", "ARGB_out_pre", "RA_out_pre", "AR_out_pre", "RGBA_in_pre", "ARGB_in_pre", "RA_in_pre", "AR_in_pre" };
|
|
char const * typestrs[5] = { "Bytes", "BytesSRGB", "Shorts", "Floats", "Half Floats"};
|
|
char const * edgestrs[4] = { "Clamp", "Reflect", "Zero", "Wrap" };
|
|
char const * fltstrs[5] = { "Box", "Triangle", "Cubic", "Catmullrom", "Mitchell" };
|
|
|
|
#ifdef STBIR_PROFILE
|
|
static void do_acc_zones( STBIR_PROFILE_INFO * profile )
|
|
{
|
|
stbir_uint32 j;
|
|
stbir_uint64 start = tmGetAccumulationStart( tm_mask ); start=start;
|
|
|
|
for( j = 0 ; j < profile->count ; j++ )
|
|
{
|
|
if ( profile->clocks[j] )
|
|
tmEmitAccumulationZone( 0, 0, (tm_uint64*)&start, 0, profile->clocks[j], profile->descriptions[j] );
|
|
}
|
|
}
|
|
#else
|
|
#define do_acc_zones(...)
|
|
#endif
|
|
|
|
int64 vert;
|
|
|
|
//#define WINTHREADTEST
|
|
#ifdef WINTHREADTEST
|
|
|
|
static STBIR_RESIZE * thread_resize;
|
|
static LONG which;
|
|
static int threads_started = 0;
|
|
static HANDLE threads[32];
|
|
static HANDLE starts,stops;
|
|
|
|
static DWORD resize_shim( LPVOID p )
|
|
{
|
|
for(;;)
|
|
{
|
|
LONG wh;
|
|
|
|
WaitForSingleObject( starts, INFINITE );
|
|
|
|
wh = InterlockedAdd( &which, 1 ) - 1;
|
|
|
|
ENTER( "Split %d", wh );
|
|
stbir_resize_split( thread_resize, wh, 1 );
|
|
#ifdef STBIR_PROFILE
|
|
{ STBIR_PROFILE_INFO profile; stbir_resize_split_profile_info( &profile, thread_resize, wh, 1 ); do_acc_zones( &profile ); vert = profile.clocks[1]; }
|
|
#endif
|
|
LEAVE();
|
|
|
|
ReleaseSemaphore( stops, 1, 0 );
|
|
}
|
|
}
|
|
|
|
#endif
|
|
|
|
void nresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt )
|
|
{
|
|
STBIR_RESIZE resize;
|
|
|
|
stbir_resize_init( &resize, i, ix, iy, ip, o, ox, oy, op, buffers[buf], ttypes[type] );
|
|
stbir_set_pixel_layouts( &resize, buffers[buf], obuffers[buf] );
|
|
stbir_set_edgemodes( &resize, cedges[edg], cedges[edg] );
|
|
stbir_set_filters( &resize, flts[flt], /*STBIR_FILTER_POINT_SAMPLE */ flts[flt] );
|
|
//stbir_set_input_subrect( &resize, 0.55f,0.333f,0.75f,0.50f);
|
|
//stbir_set_output_pixel_subrect( &resize, 00, 00, ox/2,oy/2);
|
|
//stbir_set_pixel_subrect(&resize, 1430,1361,30,30);
|
|
|
|
ENTER( "Resize" );
|
|
|
|
#ifndef WINTHREADTEST
|
|
|
|
ENTER( "Filters" );
|
|
stbir_build_samplers_with_splits( &resize, 1 );
|
|
#ifdef STBIR_PROFILE
|
|
{ STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); }
|
|
#endif
|
|
LEAVE();
|
|
|
|
ENTER( "Resize" );
|
|
if(!stbir_resize_extended( &resize ) )
|
|
stop();
|
|
#ifdef STBIR_PROFILE
|
|
{ STBIR_PROFILE_INFO profile; stbir_resize_extended_profile_info( &profile, &resize ); do_acc_zones( &profile ); vert = profile.clocks[1]; }
|
|
#endif
|
|
LEAVE();
|
|
|
|
#else
|
|
{
|
|
int c, cnt;
|
|
|
|
ENTER( "Filters" );
|
|
cnt = stbir_build_samplers_with_splits( &resize, 4 );
|
|
#ifdef STBIR_PROFILE
|
|
{ STBIR_PROFILE_INFO profile; stbir_resize_build_profile_info( &profile, &resize ); do_acc_zones( &profile ); }
|
|
#endif
|
|
LEAVE();
|
|
|
|
ENTER( "Thread start" );
|
|
if ( threads_started == 0 )
|
|
{
|
|
starts = CreateSemaphore( 0, 0, 32, 0 );
|
|
stops = CreateSemaphore( 0, 0, 32, 0 );
|
|
}
|
|
for( c = threads_started ; c < cnt ; c++ )
|
|
threads[ c ] = CreateThread( 0, 2048*1024, resize_shim, 0, 0, 0 );
|
|
|
|
threads_started = cnt;
|
|
thread_resize = &resize;
|
|
which = 0;
|
|
LEAVE();
|
|
|
|
// starts the threads
|
|
ReleaseSemaphore( starts, cnt, 0 );
|
|
|
|
ENTER( "Wait" );
|
|
for( c = 0 ; c < cnt; c++ )
|
|
WaitForSingleObject( stops, INFINITE );
|
|
LEAVE();
|
|
}
|
|
#endif
|
|
|
|
ENTER( "Free" );
|
|
stbir_free_samplers( &resize );
|
|
LEAVE();
|
|
LEAVE();
|
|
}
|
|
|
|
|
|
#define STB_IMAGE_IMPLEMENTATION
|
|
#include "stb_image.h"
|
|
|
|
extern void oresize( void * o, int ox, int oy, int op, void * i, int ix, int iy, int ip, int buf, int type, int edg, int flt );
|
|
|
|
|
|
|
|
#define TYPESTART 0
|
|
#define TYPEEND 4
|
|
|
|
#define LAYOUTSTART 0
|
|
#define LAYOUTEND 19
|
|
|
|
#define SIZEWSTART 0
|
|
#define SIZEWEND 2
|
|
|
|
#define SIZEHSTART 0
|
|
#define SIZEHEND 2
|
|
|
|
#define EDGESTART 0
|
|
#define EDGEEND 3
|
|
|
|
#define FILTERSTART 0
|
|
#define FILTEREND 4
|
|
|
|
#define HEIGHTSTART 0
|
|
#define HEIGHTEND 2
|
|
|
|
#define WIDTHSTART 0
|
|
#define WIDTHEND 2
|
|
|
|
|
|
|
|
|
|
static void * convert8to16( unsigned char * i, int w, int h, int c )
|
|
{
|
|
unsigned short * ret;
|
|
int p;
|
|
|
|
ret = malloc( w*h*c*sizeof(short) );
|
|
for(p = 0 ; p < (w*h*c) ; p++ )
|
|
{
|
|
ret[p]=(short)((((int)i[p])<<8)+i[p]);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void * convert8tof( unsigned char * i, int w, int h, int c )
|
|
{
|
|
float * ret;
|
|
int p;
|
|
|
|
ret = malloc( w*h*c*sizeof(float) );
|
|
for(p = 0 ; p < (w*h*c) ; p++ )
|
|
{
|
|
ret[p]=((float)i[p])*(1.0f/255.0f);
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void * convert8tohf( unsigned char * i, int w, int h, int c )
|
|
{
|
|
stbir__FP16 * ret;
|
|
int p;
|
|
|
|
ret = malloc( w*h*c*sizeof(stbir__FP16) );
|
|
for(p = 0 ; p < (w*h*c) ; p++ )
|
|
{
|
|
ret[p]=stbir__float_to_half(((float)i[p])*(1.0f/255.0f));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static void * convert8tohff( unsigned char * i, int w, int h, int c )
|
|
{
|
|
float * ret;
|
|
int p;
|
|
|
|
ret = malloc( w*h*c*sizeof(float) );
|
|
for(p = 0 ; p < (w*h*c) ; p++ )
|
|
{
|
|
ret[p]=stbir__half_to_float(stbir__float_to_half(((float)i[p])*(1.0f/255.0f)));
|
|
}
|
|
|
|
return ret;
|
|
}
|
|
|
|
static int isprime( int v )
|
|
{
|
|
int i;
|
|
|
|
if ( v <= 3 )
|
|
return ( v > 1 );
|
|
if ( ( v & 1 ) == 0 )
|
|
return 0;
|
|
if ( ( v % 3 ) == 0 )
|
|
return 0;
|
|
i = 5;
|
|
while ( (i*i) <= v )
|
|
{
|
|
if ( ( v % i ) == 0 )
|
|
return 0;
|
|
if ( ( v % ( i + 2 ) ) == 0 )
|
|
return 0;
|
|
i += 6;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static int getprime( int v )
|
|
{
|
|
int i;
|
|
i = 0;
|
|
for(;;)
|
|
{
|
|
if ( i >= v )
|
|
return v; // can't find any, just return orig
|
|
if (isprime(v - i))
|
|
return v - i;
|
|
if (isprime(v + i))
|
|
return v + i;
|
|
++i;
|
|
}
|
|
}
|
|
|
|
|
|
int main( int argc, char ** argv )
|
|
{
|
|
int ix, iy, ic;
|
|
unsigned char * input[6];
|
|
char * ir1;
|
|
char * ir2;
|
|
int szhs[3];
|
|
int szws[3];
|
|
int aw, ah, ac;
|
|
unsigned char * correctalpha;
|
|
int layouts, types, heights, widths, edges, filters;
|
|
|
|
if ( argc != 2 )
|
|
{
|
|
printf("command: stbirtest [imagefile]\n");
|
|
exit(1);
|
|
}
|
|
|
|
SetupTM( "127.0.0.1" );
|
|
|
|
correctalpha = stbi_load( "correctalpha.png", &aw, &ah, &ac, 0 );
|
|
|
|
input[0] = stbi_load( argv[1], &ix, &iy, &ic, 0 );
|
|
input[1] = input[0];
|
|
input[2] = convert8to16( input[0], ix, iy, ic );
|
|
input[3] = convert8tof( input[0], ix, iy, ic );
|
|
input[4] = convert8tohf( input[0], ix, iy, ic );
|
|
input[5] = convert8tohff( input[0], ix, iy, ic );
|
|
|
|
printf("Input %dx%d (%d channels)\n",ix,iy,ic);
|
|
|
|
ir1 = malloc( 4 * 4 * 3000 * 3000ULL );
|
|
ir2 = malloc( 4 * 4 * 3000 * 3000ULL );
|
|
|
|
szhs[0] = getprime( iy/SHRINKBYH );
|
|
szhs[1] = iy;
|
|
szhs[2] = getprime( iy*ZOOMBYH );
|
|
|
|
szws[0] = getprime( ix/SHRINKBYW );
|
|
szws[1] = ix;
|
|
szws[2] = getprime( ix*ZOOMBYW );
|
|
|
|
#if 1
|
|
for( types = TYPESTART ; types <= TYPEEND ; types++ )
|
|
#else
|
|
for( types = 1 ; types <= 1 ; types++ )
|
|
#endif
|
|
{
|
|
ENTER( "Test type: %s",typestrs[types]);
|
|
#if 1
|
|
for( layouts = LAYOUTSTART ; layouts <= LAYOUTEND ; layouts++ )
|
|
#else
|
|
for( layouts = 16; layouts <= 16 ; layouts++ )
|
|
#endif
|
|
{
|
|
ENTER( "Test layout: %s",buffstrs[layouts]);
|
|
|
|
#if 0
|
|
for( heights = HEIGHTSTART ; heights <= HEIGHTEND ; heights++ )
|
|
{
|
|
int w, h = szhs[heights];
|
|
#else
|
|
for( heights = 0 ; heights <= 11 ; heights++ )
|
|
{
|
|
static int szhsz[12]={32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 };
|
|
int w, h = szhsz[heights];
|
|
#endif
|
|
|
|
ENTER( "Test height: %d %s %d",iy,(h<iy)?"Down":((h>iy)?"Up":"Same"),h);
|
|
|
|
#if 0
|
|
for( widths = WIDTHSTART ; widths <= WIDTHEND ; widths++ )
|
|
{
|
|
w = szws[widths];
|
|
#else
|
|
for( widths = 0 ; widths <= 12 ; widths++ )
|
|
{
|
|
static int szwsz[13]={2, 32, 200, 350, 400, 450, 509, 532, 624, 700, 824, 1023, 2053 };
|
|
w = szwsz[widths];
|
|
#endif
|
|
|
|
ENTER( "Test width: %d %s %d",ix, (w<ix)?"Down":((w>ix)?"Up":"Same"), w);
|
|
|
|
#if 0
|
|
for( edges = EDGESTART ; edges <= EDGEEND ; edges++ )
|
|
#else
|
|
for( edges = 0 ; edges <= 0 ; edges++ )
|
|
#endif
|
|
{
|
|
ENTER( "Test edge: %s",edgestrs[edges]);
|
|
#if 0
|
|
for( filters = FILTERSTART ; filters <= FILTEREND ; filters++ )
|
|
#else
|
|
for( filters = 3 ; filters <= 3 ; filters++ )
|
|
#endif
|
|
{
|
|
int op, opw, np,npw, c, a;
|
|
#ifdef COMPARE_SAME
|
|
int oldtypes = types;
|
|
#else
|
|
int oldtypes = (types==4)?3:types;
|
|
#endif
|
|
|
|
ENTER( "Test filter: %s",fltstrs[filters]);
|
|
{
|
|
c = bchannels[layouts];
|
|
a = alphapos[layouts];
|
|
|
|
op = w*tsizes[oldtypes]*c + 60;
|
|
opw = w*tsizes[oldtypes]*c;
|
|
|
|
np = w*tsizes[types]*c + 60;
|
|
npw = w*tsizes[types]*c;
|
|
|
|
printf( "%s:layout: %s w: %d h: %d edge: %s filt: %s\n", typestrs[types],buffstrs[layouts], w, h, edgestrs[edges], fltstrs[filters] );
|
|
|
|
|
|
// clear pixel area to different, right edge to zero
|
|
#ifndef NOCLEAR
|
|
ENTER( "Test clear padding" );
|
|
{
|
|
int d;
|
|
for( d = 0 ; d < h ; d++ )
|
|
{
|
|
int oofs = d * op;
|
|
int nofs = d * np;
|
|
memset( ir1 + oofs, 192, opw );
|
|
memset( ir1 + oofs+opw, 79, op-opw );
|
|
memset( ir2 + nofs, 255, npw );
|
|
memset( ir2 + nofs+npw, 79, np-npw );
|
|
}
|
|
}
|
|
LEAVE();
|
|
|
|
#endif
|
|
|
|
#ifdef COMPARE_SAME
|
|
#define TIMINGS 1
|
|
#else
|
|
#define TIMINGS 1
|
|
#endif
|
|
ENTER( "Test both" );
|
|
{
|
|
#ifndef PROFILE_NEW_ONLY
|
|
{
|
|
int ttt, max = 0x7fffffff;
|
|
ENTER( "Test old" );
|
|
for( ttt = 0 ; ttt < TIMINGS ; ttt++ )
|
|
{
|
|
int64 m = __cycles();
|
|
|
|
oresize( ir1, w, h, op,
|
|
#ifdef COMPARE_SAME
|
|
input[types],
|
|
#else
|
|
input[(types==4)?5:types],
|
|
#endif
|
|
ix, iy, ix*ic*tsizes[oldtypes], layouts, oldtypes, edges, filters );
|
|
|
|
m = __cycles() - m;
|
|
if ( ( (int)m ) < max )
|
|
max = (int) m;
|
|
}
|
|
LEAVE();
|
|
printf("old: %d\n", max );
|
|
}
|
|
#endif
|
|
|
|
{
|
|
int ttt, max = 0x7fffffff, maxv = 0x7fffffff;
|
|
ENTER( "Test new" );
|
|
for( ttt = 0 ; ttt < TIMINGS ; ttt++ )
|
|
{
|
|
int64 m = __cycles();
|
|
|
|
nresize( ir2, w, h, np, input[types], ix, iy, ix*ic*tsizes[types], layouts, types, edges, filters );
|
|
|
|
m = __cycles() - m;
|
|
if ( ( (int)m ) < max )
|
|
max = (int) m;
|
|
if ( ( (int)vert ) < maxv )
|
|
maxv = (int) vert;
|
|
}
|
|
LEAVE(); // test new
|
|
printf("new: %d (v: %d)\n", max, maxv );
|
|
}
|
|
}
|
|
LEAVE(); // test both
|
|
|
|
if ( mem_count!= 0 )
|
|
stop();
|
|
|
|
#ifndef NOCOMP
|
|
ENTER( "Test compare" );
|
|
{
|
|
int x,y,ch;
|
|
int nums = 0;
|
|
for( y = 0 ; y < h ; y++ )
|
|
{
|
|
for( x = 0 ; x < w ; x++ )
|
|
{
|
|
switch(types)
|
|
{
|
|
case 0:
|
|
case 1: //SRGB
|
|
{
|
|
unsigned char * p1 = (unsigned char *)&ir1[y*op+x*c];
|
|
unsigned char * p2 = (unsigned char *)&ir2[y*np+x*c];
|
|
for( ch = 0 ; ch < c ; ch++ )
|
|
{
|
|
float pp1,pp2,d;
|
|
float av = (a==-1)?1.0f:((float)p1[a]/255.0f);
|
|
|
|
pp1 = p1[ch];
|
|
pp2 = p2[ch];
|
|
|
|
// compare in premult space
|
|
#ifndef COMPARE_SAME
|
|
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >=16 ) && ( layouts <= 19 ) ) )
|
|
{
|
|
pp1 *= av;
|
|
pp2 *= av;
|
|
}
|
|
#endif
|
|
|
|
d = pp1 - pp2;
|
|
if ( d < 0 ) d = -d;
|
|
|
|
#ifdef COMPARE_SAME
|
|
if ( d > 0 )
|
|
#else
|
|
if ( d > 1 )
|
|
#endif
|
|
{
|
|
printf("Error at %d x %d (chan %d) (d: %g a: %g) [%d %d %d %d] [%d %d %d %d]\n",x,y,ch, d,av, p1[0],p1[1],p1[2],p1[3], p2[0],p2[1],p2[2],p2[3]);
|
|
++nums;
|
|
if ( nums > 16 ) goto ex;
|
|
//if (d) exit(1);
|
|
//goto ex;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 2:
|
|
{
|
|
unsigned short * p1 = (unsigned short *)&ir1[y*op+x*c*sizeof(short)];
|
|
unsigned short * p2 = (unsigned short *)&ir2[y*np+x*c*sizeof(short)];
|
|
for( ch = 0 ; ch < c ; ch++ )
|
|
{
|
|
float thres,pp1,pp2,d;
|
|
float av = (a==-1)?1.0f:((float)p1[a]/65535.0f);
|
|
|
|
pp1 = p1[ch];
|
|
pp2 = p2[ch];
|
|
|
|
// compare in premult space
|
|
#ifndef COMPARE_SAME
|
|
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
|
|
{
|
|
pp1 *= av;
|
|
pp2 *= av;
|
|
}
|
|
#endif
|
|
|
|
d = pp1 - pp2;
|
|
if ( d < 0 ) d = -d;
|
|
|
|
thres=((float)p1[ch]*0.007f)+2.0f;
|
|
if (thres<4) thres = 4;
|
|
|
|
#ifdef COMPARE_SAME
|
|
if ( d > 0 )
|
|
#else
|
|
if ( d > thres)
|
|
#endif
|
|
{
|
|
printf("Error at %d x %d (chan %d) %d %d [df: %g th: %g al: %g] (%d %d %d %d) (%d %d %d %d)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]);
|
|
++nums;
|
|
if ( nums > 16 ) goto ex;
|
|
//if (d) exit(1);
|
|
//goto ex;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 3:
|
|
{
|
|
float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)];
|
|
float * p2 = (float *)&ir2[y*np+x*c*sizeof(float)];
|
|
for( ch = 0 ; ch < c ; ch++ )
|
|
{
|
|
float pp1 = p1[ch], pp2 = p2[ch];
|
|
float av = (a==-1)?1.0f:p1[a];
|
|
float thres, d;
|
|
|
|
// clamp
|
|
if (pp1<=0.0f) pp1 = 0;
|
|
if (pp2<=0.0f) pp2 = 0;
|
|
if (av<=0.0f) av = 0;
|
|
if (pp1>1.0f) pp1 = 1.0f;
|
|
if (pp2>1.0f) pp2 = 1.0f;
|
|
if (av>1.0f) av = 1.0f;
|
|
|
|
// compare in premult space
|
|
#ifndef COMPARE_SAME
|
|
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
|
|
{
|
|
pp1 *= av;
|
|
pp2 *= av;
|
|
}
|
|
#endif
|
|
|
|
d = pp1 - pp2;
|
|
if ( d < 0 ) d = -d;
|
|
|
|
thres=(p1[ch]*0.002f)+0.0002f;
|
|
if ( thres < 0 ) thres = -thres;
|
|
|
|
#ifdef COMPARE_SAME
|
|
if ( d != 0.0f )
|
|
#else
|
|
if ( d > thres )
|
|
#endif
|
|
{
|
|
printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch, p1[ch],p2[ch],d,thres,av,p1[0],p1[1],p1[2],p1[3],p2[0],p2[1],p2[2],p2[3]);
|
|
++nums;
|
|
if ( nums > 16 ) goto ex;
|
|
//if (d) exit(1);
|
|
//goto ex;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
|
|
case 4:
|
|
{
|
|
#ifdef COMPARE_SAME
|
|
stbir__FP16 * p1 = (stbir__FP16 *)&ir1[y*op+x*c*sizeof(stbir__FP16)];
|
|
#else
|
|
float * p1 = (float *)&ir1[y*op+x*c*sizeof(float)];
|
|
#endif
|
|
stbir__FP16 * p2 = (stbir__FP16 *)&ir2[y*np+x*c*sizeof(stbir__FP16)];
|
|
for( ch = 0 ; ch < c ; ch++ )
|
|
{
|
|
#ifdef COMPARE_SAME
|
|
float pp1 = stbir__half_to_float(p1[ch]);
|
|
float av = (a==-1)?1.0f:stbir__half_to_float(p1[a]);
|
|
#else
|
|
float pp1 = stbir__half_to_float(stbir__float_to_half(p1[ch]));
|
|
float av = (a==-1)?1.0f:stbir__half_to_float(stbir__float_to_half(p1[a]));
|
|
#endif
|
|
float pp2 = stbir__half_to_float(p2[ch]);
|
|
float d, thres;
|
|
|
|
// clamp
|
|
if (pp1<=0.0f) pp1 = 0;
|
|
if (pp2<=0.0f) pp2 = 0;
|
|
if (av<=0.0f) av = 0;
|
|
if (pp1>1.0f) pp1 = 1.0f;
|
|
if (pp2>1.0f) pp2 = 1.0f;
|
|
if (av>1.0f) av = 1.0f;
|
|
|
|
thres=(pp1*0.002f)+0.0002f;
|
|
|
|
// compare in premult space
|
|
#ifndef COMPARE_SAME
|
|
if ( ( ( layouts >=4 ) && ( layouts <= 7 ) ) || ( ( layouts >= 16 ) && ( layouts <= 19 ) ) )
|
|
{
|
|
pp1 *= av;
|
|
pp2 *= av;
|
|
}
|
|
#endif
|
|
|
|
d = pp1 - pp2;
|
|
if ( d < 0 ) d = -d;
|
|
|
|
|
|
#ifdef COMPARE_SAME
|
|
if ( d != 0.0f )
|
|
#else
|
|
if ( d > thres )
|
|
#endif
|
|
{
|
|
printf("Error at %d x %d (chan %d) %g %g [df: %g th: %g al: %g] (%g %g %g %g) (%g %g %g %g)\n",x,y,ch,
|
|
#ifdef COMPARE_SAME
|
|
stbir__half_to_float(p1[ch]),
|
|
#else
|
|
p1[ch],
|
|
#endif
|
|
stbir__half_to_float(p2[ch]),
|
|
d,thres,av,
|
|
#ifdef COMPARE_SAME
|
|
stbir__half_to_float(p1[0]),stbir__half_to_float(p1[1]),stbir__half_to_float(p1[2]),stbir__half_to_float(p1[3]),
|
|
#else
|
|
p1[0],p1[1],p1[2],p1[3],
|
|
#endif
|
|
stbir__half_to_float(p2[0]),stbir__half_to_float(p2[1]),stbir__half_to_float(p2[2]),stbir__half_to_float(p2[3]) );
|
|
++nums;
|
|
if ( nums > 16 ) goto ex;
|
|
//if (d) exit(1);
|
|
//goto ex;
|
|
}
|
|
}
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
for( x = (w*c)*tsizes[oldtypes]; x < op; x++ )
|
|
{
|
|
if ( ir1[y*op+x] != 79 )
|
|
{
|
|
printf("Margin error at %d x %d %d (should be 79) OLD!\n",x,y,(unsigned char)ir1[y*op+x]);
|
|
goto ex;
|
|
}
|
|
}
|
|
|
|
for( x = (w*c)*tsizes[types]; x < np; x++ )
|
|
{
|
|
if ( ir2[y*np+x] != 79 )
|
|
{
|
|
printf("Margin error at %d x %d %d (should be 79) NEW\n",x,y,(unsigned char)ir2[y*np+x]);
|
|
goto ex;
|
|
}
|
|
}
|
|
}
|
|
|
|
ex:
|
|
ENTER( "OUTPUT IMAGES" );
|
|
printf(" tot pix: %d, errs: %d\n", w*h*c,nums );
|
|
|
|
if (nums)
|
|
{
|
|
stbi_write_png("old.png", w, h, c, ir1, op);
|
|
stbi_write_png("new.png", w, h, c, ir2, np);
|
|
exit(1);
|
|
}
|
|
|
|
LEAVE(); // output images
|
|
}
|
|
LEAVE(); //test compare
|
|
#endif
|
|
|
|
|
|
|
|
}
|
|
LEAVE(); // test filter
|
|
}
|
|
LEAVE(); // test edge
|
|
}
|
|
LEAVE(); // test width
|
|
}
|
|
LEAVE(); // test height
|
|
}
|
|
LEAVE(); // test type
|
|
}
|
|
LEAVE(); // test layout
|
|
}
|
|
|
|
CloseTM();
|
|
return 0;
|
|
}
|