From 2479bc6c863272d50ff0d8e6cfd87326e4605e1d Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Sat, 12 Oct 2024 16:06:31 +0100 Subject: [PATCH 1/3] Reduce the size of the SDL_blit_0 alpha code --- src/video/SDL_blit_0.c | 67 ++++++++++++------------------------------ 1 file changed, 19 insertions(+), 48 deletions(-) diff --git a/src/video/SDL_blit_0.c b/src/video/SDL_blit_0.c index e7750ddb3d491..3104f4ed6fbc4 100644 --- a/src/video/SDL_blit_0.c +++ b/src/video/SDL_blit_0.c @@ -605,11 +605,8 @@ SDL_FORCE_INLINE void BlitBto4Key(SDL_BlitInfo *info, const Uint32 srcbpp) } } -SDL_FORCE_INLINE void BlitBtoNAlpha(SDL_BlitInfo *info, const Uint32 srcbpp) +static void BlitBtoNAlpha(SDL_BlitInfo *info) { - const Uint32 mask = (1 << srcbpp) - 1; - const Uint32 align = (8 / srcbpp) - 1; - int width = info->dst_w; int height = info->dst_h; Uint8 *src = info->src; @@ -617,15 +614,17 @@ SDL_FORCE_INLINE void BlitBtoNAlpha(SDL_BlitInfo *info, const Uint32 srcbpp) int srcskip = info->src_skip; int dstskip = info->dst_skip; const SDL_Color *srcpal = info->src_fmt->palette->colors; + SDL_PixelFormat *srcfmt = info->src_fmt; SDL_PixelFormat *dstfmt = info->dst_fmt; - int dstbpp; + int srcbpp, dstbpp; int c; - Uint32 pixel; + Uint32 pixel, mask, align; unsigned sR, sG, sB; unsigned dR, dG, dB, dA; const unsigned A = info->a; /* Set up some basic variables */ + srcbpp = srcfmt->BytesPerPixel; dstbpp = dstfmt->BytesPerPixel; if (srcbpp == 4) srcskip += width - (width + 1) / 2; @@ -633,6 +632,8 @@ SDL_FORCE_INLINE void BlitBtoNAlpha(SDL_BlitInfo *info, const Uint32 srcbpp) srcskip += width - (width + 3) / 4; else if (srcbpp == 1) srcskip += width - (width + 7) / 8; + mask = (1 << srcbpp) - 1; + align = (8 / srcbpp) - 1; if (SDL_PIXELORDER(info->src_fmt->format) == SDL_BITMAPORDER_4321) { while (height--) { @@ -681,11 +682,8 @@ SDL_FORCE_INLINE void BlitBtoNAlpha(SDL_BlitInfo *info, const Uint32 srcbpp) } } -SDL_FORCE_INLINE void BlitBtoNAlphaKey(SDL_BlitInfo *info, const Uint32 srcbpp) +static void BlitBtoNAlphaKey(SDL_BlitInfo *info) { - const Uint32 mask = (1 << srcbpp) - 1; - const Uint32 align = (8 / srcbpp) - 1; - int width = info->dst_w; int height = info->dst_h; Uint8 *src = info->src; @@ -695,15 +693,16 @@ SDL_FORCE_INLINE void BlitBtoNAlphaKey(SDL_BlitInfo *info, const Uint32 srcbpp) SDL_PixelFormat *srcfmt = info->src_fmt; SDL_PixelFormat *dstfmt = info->dst_fmt; const SDL_Color *srcpal = srcfmt->palette->colors; - int dstbpp; + int srcbpp, dstbpp; int c; - Uint32 pixel; + Uint32 pixel, mask, align; unsigned sR, sG, sB; unsigned dR, dG, dB, dA; const unsigned A = info->a; Uint32 ckey = info->colorkey; /* Set up some basic variables */ + srcbpp = srcfmt->BytesPerPixel; dstbpp = dstfmt->BytesPerPixel; if (srcbpp == 4) srcskip += width - (width + 1) / 2; @@ -711,6 +710,8 @@ SDL_FORCE_INLINE void BlitBtoNAlphaKey(SDL_BlitInfo *info, const Uint32 srcbpp) srcskip += width - (width + 3) / 4; else if (srcbpp == 1) srcskip += width - (width + 7) / 8; + mask = (1 << srcbpp) - 1; + align = (8 / srcbpp) - 1; if (SDL_PIXELORDER(info->src_fmt->format) == SDL_BITMAPORDER_4321) { while (height--) { @@ -801,16 +802,6 @@ static const SDL_BlitFunc colorkey_blit_1b[] = { (SDL_BlitFunc)NULL, Blit1bto1Key, Blit1bto2Key, Blit1bto3Key, Blit1bto4Key }; -static void Blit1btoNAlpha(SDL_BlitInfo *info) -{ - BlitBtoNAlpha(info, 1); -} - -static void Blit1btoNAlphaKey(SDL_BlitInfo *info) -{ - BlitBtoNAlphaKey(info, 1); -} - static void Blit2bto1(SDL_BlitInfo *info) { @@ -853,16 +844,6 @@ static const SDL_BlitFunc colorkey_blit_2b[] = { (SDL_BlitFunc)NULL, Blit2bto1Key, Blit2bto2Key, Blit2bto3Key, Blit2bto4Key }; -static void Blit2btoNAlpha(SDL_BlitInfo *info) -{ - BlitBtoNAlpha(info, 2); -} - -static void Blit2btoNAlphaKey(SDL_BlitInfo *info) -{ - BlitBtoNAlphaKey(info, 2); -} - static void Blit4bto1(SDL_BlitInfo *info) { @@ -905,16 +886,6 @@ static const SDL_BlitFunc colorkey_blit_4b[] = { (SDL_BlitFunc)NULL, Blit4bto1Key, Blit4bto2Key, Blit4bto3Key, Blit4bto4Key }; -static void Blit4btoNAlpha(SDL_BlitInfo *info) -{ - BlitBtoNAlpha(info, 4); -} - -static void Blit4btoNAlphaKey(SDL_BlitInfo *info) -{ - BlitBtoNAlphaKey(info, 4); -} - SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface *surface) @@ -936,10 +907,10 @@ SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface *surface) return colorkey_blit_1b[which]; case SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: - return which >= 2 ? Blit1btoNAlpha : (SDL_BlitFunc)NULL; + return which >= 2 ? BlitBtoNAlpha : (SDL_BlitFunc)NULL; case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: - return which >= 2 ? Blit1btoNAlphaKey : (SDL_BlitFunc)NULL; + return which >= 2 ? BlitBtoNAlphaKey : (SDL_BlitFunc)NULL; } return NULL; } @@ -953,10 +924,10 @@ SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface *surface) return colorkey_blit_2b[which]; case SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: - return which >= 2 ? Blit2btoNAlpha : (SDL_BlitFunc)NULL; + return which >= 2 ? BlitBtoNAlpha : (SDL_BlitFunc)NULL; case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: - return which >= 2 ? Blit2btoNAlphaKey : (SDL_BlitFunc)NULL; + return which >= 2 ? BlitBtoNAlphaKey : (SDL_BlitFunc)NULL; } return NULL; } @@ -970,10 +941,10 @@ SDL_BlitFunc SDL_CalculateBlit0(SDL_Surface *surface) return colorkey_blit_4b[which]; case SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: - return which >= 2 ? Blit4btoNAlpha : (SDL_BlitFunc)NULL; + return which >= 2 ? BlitBtoNAlpha : (SDL_BlitFunc)NULL; case SDL_COPY_COLORKEY | SDL_COPY_MODULATE_ALPHA | SDL_COPY_BLEND: - return which >= 2 ? Blit4btoNAlphaKey : (SDL_BlitFunc)NULL; + return which >= 2 ? BlitBtoNAlphaKey : (SDL_BlitFunc)NULL; } return NULL; } From 6f836f90736c1e5f4f011c5fed05df263962c77f Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Sat, 12 Oct 2024 17:20:32 +0100 Subject: [PATCH 2/3] Allow for more fine tuning of Duff's device routines --- src/video/SDL_blit.h | 46 +++++++++++++++++++++++++++++++++--------- src/video/SDL_blit_1.c | 16 +++++++-------- src/video/SDL_blit_A.c | 22 ++++++++++---------- src/video/SDL_blit_N.c | 14 ++++++------- 4 files changed, 62 insertions(+), 36 deletions(-) diff --git a/src/video/SDL_blit.h b/src/video/SDL_blit.h index 731ea63753f2e..00c6dd6c345b5 100644 --- a/src/video/SDL_blit.h +++ b/src/video/SDL_blit.h @@ -471,6 +471,15 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); #else #define USE_DUFFS_LOOP #endif + +#define DUFFS_LOOP1(pixel_copy_increment, width) \ + { \ + int n; \ + for (n = width; n > 0; --n) { \ + pixel_copy_increment; \ + } \ + } + #ifdef USE_DUFFS_LOOP /* 8-times unrolled loop */ @@ -527,8 +536,26 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); } \ } -/* Use the 8-times version of the loop by default */ +/* 2-times unrolled loop */ +#define DUFFS_LOOP2(pixel_copy_increment, width) \ + { \ + int n = (width + 1) / 2; \ + switch (width & 1) { \ + case 0: \ + do { \ + pixel_copy_increment; \ + SDL_FALLTHROUGH; \ + case 1: \ + pixel_copy_increment; \ + } while (--n > 0); \ + } \ + } + +/* Use the 4-times version of the loop by default */ #define DUFFS_LOOP(pixel_copy_increment, width) \ + DUFFS_LOOP4(pixel_copy_increment, width) +/* Use the 8-times version of the loop for simple routines */ +#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \ DUFFS_LOOP8(pixel_copy_increment, width) /* Special version of Duff's device for even more optimization */ @@ -562,20 +589,19 @@ extern SDL_BlitFunc SDL_CalculateBlitA(SDL_Surface *surface); /* Don't use Duff's device to unroll loops */ #define DUFFS_LOOP(pixel_copy_increment, width) \ - { \ - int n; \ - for (n = width; n > 0; --n) { \ - pixel_copy_increment; \ - } \ - } + DUFFS_LOOP1(pixel_copy_increment, width) +#define DUFFS_LOOP_TRIVIAL(pixel_copy_increment, width) \ + DUFFS_LOOP1(pixel_copy_increment, width) #define DUFFS_LOOP8(pixel_copy_increment, width) \ - DUFFS_LOOP(pixel_copy_increment, width) + DUFFS_LOOP1(pixel_copy_increment, width) #define DUFFS_LOOP4(pixel_copy_increment, width) \ - DUFFS_LOOP(pixel_copy_increment, width) + DUFFS_LOOP1(pixel_copy_increment, width) +#define DUFFS_LOOP2(pixel_copy_increment, width) \ + DUFFS_LOOP1(pixel_copy_increment, width) #define DUFFS_LOOP_124(pixel_copy_increment1, \ pixel_copy_increment2, \ pixel_copy_increment4, width) \ - DUFFS_LOOP(pixel_copy_increment1, width) + DUFFS_LOOP1(pixel_copy_increment1, width) #endif /* USE_DUFFS_LOOP */ diff --git a/src/video/SDL_blit_1.c b/src/video/SDL_blit_1.c index 93fdb3ec1bc34..6ec6677ecd346 100644 --- a/src/video/SDL_blit_1.c +++ b/src/video/SDL_blit_1.c @@ -50,7 +50,7 @@ static void Blit1to1(SDL_BlitInfo *info) while (height--) { #ifdef USE_DUFFS_LOOP /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = map[*src]; } @@ -102,7 +102,7 @@ static void Blit1to2(SDL_BlitInfo *info) #ifdef USE_DUFFS_LOOP while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *(Uint16 *)dst = map[*src++]; dst += 2; @@ -258,7 +258,7 @@ static void Blit1to4(SDL_BlitInfo *info) while (height--) { #ifdef USE_DUFFS_LOOP /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( *dst++ = map[*src++]; , width); /* *INDENT-ON* */ /* clang-format on */ @@ -299,7 +299,7 @@ static void Blit1to1Key(SDL_BlitInfo *info) if (palmap) { while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dst = palmap[*src]; @@ -315,7 +315,7 @@ static void Blit1to1Key(SDL_BlitInfo *info) } else { while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dst = *src; @@ -347,7 +347,7 @@ static void Blit1to2Key(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dstp=palmap[*src]; @@ -410,7 +410,7 @@ static void Blit1to4Key(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( *src != ckey ) { *dstp = palmap[*src]; @@ -446,7 +446,7 @@ static void Blit1toNAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4( + DUFFS_LOOP( { sR = srcpal[*src].r; sG = srcpal[*src].g; diff --git a/src/video/SDL_blit_A.c b/src/video/SDL_blit_A.c index 96fec1b478fc8..4e3d9ed2c5d4b 100644 --- a/src/video/SDL_blit_A.c +++ b/src/video/SDL_blit_A.c @@ -47,7 +47,7 @@ static void BlitNto1SurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); dR = dstfmt->palette->colors[*dst].r; @@ -92,7 +92,7 @@ static void BlitNto1PixelAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGBA(src,srcbpp,srcfmt,Pixel,sR,sG,sB,sA); dR = dstfmt->palette->colors[*dst].r; @@ -484,7 +484,7 @@ static void BlitRGBtoRGBSurfaceAlpha128(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp++; Uint32 d = *dstp; *dstp++ = ((((s & 0x00fefefe) + (d & 0x00fefefe)) >> 1) @@ -516,7 +516,7 @@ static void BlitRGBtoRGBSurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ + DUFFS_LOOP({ s = *srcp; d = *dstp; s1 = s & 0xff00ff; @@ -1148,7 +1148,7 @@ static void Blit565to565SurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp++; Uint32 d = *dstp; /* @@ -1186,7 +1186,7 @@ static void Blit555to555SurfaceAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp++; Uint32 d = *dstp; /* @@ -1219,7 +1219,7 @@ static void BlitARGBto565PixelAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ + DUFFS_LOOP({ Uint32 s = *srcp; unsigned alpha = s >> 27; /* downscale alpha to 5 bits */ /* Here we special-case opaque alpha since the @@ -1262,7 +1262,7 @@ static void BlitARGBto555PixelAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4({ + DUFFS_LOOP({ unsigned alpha; Uint32 s = *srcp; alpha = s >> 27; /* downscale alpha to 5 bits */ @@ -1315,7 +1315,7 @@ static void BlitNtoNSurfaceAlpha(SDL_BlitInfo *info) if (sA) { while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGB(src, srcbpp, srcfmt, Pixel, sR, sG, sB); DISEMBLE_RGBA(dst, dstbpp, dstfmt, Pixel, dR, dG, dB, dA); @@ -1353,7 +1353,7 @@ static void BlitNtoNSurfaceAlphaKey(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4( + DUFFS_LOOP( { RETRIEVE_RGB_PIXEL(src, srcbpp, Pixel); if (sA && Pixel != ckey) { @@ -1395,7 +1395,7 @@ static void BlitNtoNPixelAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP4( + DUFFS_LOOP( { DISEMBLE_RGBA(src, srcbpp, srcfmt, Pixel, sR, sG, sB, sA); if (sA) { diff --git a/src/video/SDL_blit_N.c b/src/video/SDL_blit_N.c index 123d0aad0395f..1636c17e3e1c3 100644 --- a/src/video/SDL_blit_N.c +++ b/src/video/SDL_blit_N.c @@ -2076,7 +2076,7 @@ static void Blit_RGB555_ARGB1555(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = *src | mask; ++dst; @@ -2200,7 +2200,7 @@ static void Blit4to4MaskAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = *src | mask; ++dst; @@ -2217,7 +2217,7 @@ static void Blit4to4MaskAlpha(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { *dst = *src & mask; ++dst; @@ -2576,7 +2576,7 @@ static void Blit2to2Key(SDL_BlitInfo *info) while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ( (*srcp & rgbmask) != ckey ) { *dstp = *srcp; @@ -2622,7 +2622,7 @@ static void BlitNtoNKey(SDL_BlitInfo *info) Uint32 mask = ((Uint32)info->a) << dstfmt->Ashift; while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ((*src32 & rgbmask) != ckey) { *dst32 = *src32 | mask; @@ -2640,7 +2640,7 @@ static void BlitNtoNKey(SDL_BlitInfo *info) Uint32 mask = srcfmt->Rmask | srcfmt->Gmask | srcfmt->Bmask; while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ((*src32 & rgbmask) != ckey) { *dst32 = *src32 & mask; @@ -2897,7 +2897,7 @@ static void BlitNtoNKeyCopyAlpha(SDL_BlitInfo *info) Uint32 *dst32 = (Uint32 *)dst; while (height--) { /* *INDENT-OFF* */ /* clang-format off */ - DUFFS_LOOP( + DUFFS_LOOP_TRIVIAL( { if ((*src32 & rgbmask) != ckey) { *dst32 = *src32; From 1ce5839cbc424c444174f173d13782c47541294e Mon Sep 17 00:00:00 2001 From: Cameron Cawley Date: Sat, 12 Oct 2024 19:01:24 +0100 Subject: [PATCH 3/3] 3DS: Only bundle resources with tests that need them --- test/CMakeLists.txt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index 8d413c85d571b..d11a6d2b2cec3 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -399,12 +399,11 @@ if(PSP) endif() if(N3DS) - set(ROMFS_DIR "${CMAKE_CURRENT_BINARY_DIR}/romfs") - file(COPY ${RESOURCE_FILES} DESTINATION "${ROMFS_DIR}") - foreach(APP IN LISTS SDL_TEST_EXECUTABLES) get_target_property(TARGET_BINARY_DIR ${APP} BINARY_DIR) + set(ROMFS_DIR "${TARGET_BINARY_DIR}/sdl-${APP}") set(SMDH_FILE "${TARGET_BINARY_DIR}/${APP}.smdh") + file(MAKE_DIRECTORY ${ROMFS_DIR}) ctr_generate_smdh("${SMDH_FILE}" NAME "SDL-${APP}" DESCRIPTION "SDL2 Test suite" @@ -461,7 +460,7 @@ add_custom_target(copy-sdl-test-resources ) foreach(APP IN LISTS SDL_TESTS_NEEDS_RESOURCES) - if(PSP OR PS2) + if(PSP OR PS2 OR N3DS) foreach(RESOURCE_FILE ${RESOURCE_FILES}) add_custom_command(TARGET ${APP} POST_BUILD COMMAND ${CMAKE_COMMAND} ARGS -E copy_if_different ${RESOURCE_FILE} $/sdl-${APP}) endforeach()