Skip to content

Commit

Permalink
[Peanut-GB] Greatly improve performance
Browse files Browse the repository at this point in the history
  • Loading branch information
Yaya-Cout committed Jan 12, 2025
1 parent 1c2fe21 commit e2f0957
Show file tree
Hide file tree
Showing 4 changed files with 133 additions and 57 deletions.
2 changes: 1 addition & 1 deletion apps/Peanut-GB/Makefile
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
API=../../api
CC=arm-none-eabi-gcc
OBJCOPY=arm-none-eabi-objcopy
CFLAGS=-DNDEBUG -ggdb3 -I$(API) -Os -mcpu=cortex-m7 -mthumb -mfpu=fpv5-sp-d16 -mfloat-abi=hard -fno-common -fdata-sections -ffunction-sections -fno-exceptions
CFLAGS=-DNDEBUG -ggdb3 -I$(API) -O3 -mcpu=cortex-m7 -mthumb -mfpu=fpv5-sp-d16 -mfloat-abi=hard -fno-common -fdata-sections -ffunction-sections -fno-exceptions
LDFLAGS=-Wl,-L$(API) -Wl,--gc-sections -Wl,--entry=entrypoint --specs=nosys.specs -nostartfiles -Wl,-Ur -lapi

NES_ROM = epsilon/2048.nes
Expand Down
Binary file modified apps/Peanut-GB/app.elf
Binary file not shown.
180 changes: 128 additions & 52 deletions apps/Peanut-GB/main.c
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,12 @@
#define DUMMY_ROM 0
#define DUMMY_ROM_NAME Tetris

#define ENABLE_FRAME_LIMITER 1
#define TARGET_FRAME_DURATION 16
#define AUTOMATIC_FRAME_SKIPPING 1
// Useful when AUTOMATIC_FRAME_SKIPPING is disabled
#define FRAME_SKIPPING_DEFAULT_STATE false


#ifndef DUMMY_ROM
#define DUMMY_ROM 0
Expand Down Expand Up @@ -70,15 +76,15 @@ void gb_error(struct gb_s *gb, const enum gb_error_e gb_err, const uint16_t val)
"INVALID READ",
"INVALID WRITE"
};

switch (gb_err) {
case GB_INVALID_WRITE:
case GB_INVALID_READ:
return;
default:
running = false;
}

// TODO: Handle errors.
}
const uint16_t palette_peanut_GB[4] = {0x9DE1, 0x8D61, 0x3306, 0x09C1};
Expand All @@ -87,48 +93,37 @@ const uint16_t palette_gray[4] = {0xFFFF, 0xAD55, 0x52AA, 0x0000};
const uint16_t palette_gray_negative[4] = {0x0000, 0x52AA, 0xAD55, 0xFFFF};
const uint16_t * palette = palette_peanut_GB;

uint16_t color_from_gb_pixel(uint8_t gb_pixel) {
inline uint16_t color_from_gb_pixel(uint8_t gb_pixel) {
uint8_t gb_color = gb_pixel & 0x3;
return palette[gb_color];
}

void lcd_draw_line_centered(struct gb_s *gb, const uint8_t pixels[LCD_WIDTH], const uint_fast8_t line) {
struct priv_t *priv = gb->direct.priv;

#pragma unroll 40
for(unsigned int x = 0; x < LCD_WIDTH; x++) {
priv->line_buffer[x] = color_from_gb_pixel(pixels[x]);
}

extapp_pushRect((NW_LCD_WIDTH - LCD_WIDTH) / 2, (NW_LCD_HEIGHT - LCD_HEIGHT) / 2 + line, LCD_WIDTH, 1, priv->line_buffer);
}

static void lcd_draw_line_maximized(struct gb_s * gb, const uint8_t * input_pixels, const uint_fast8_t line) {
// Nearest neighbor scaling of a 160x144 texture to a 320x240 resolution
// Horizontally, we just double
uint16_t output_pixels[2*LCD_WIDTH];
for (int i=0; i<LCD_WIDTH; i++) {
uint16_t color = color_from_gb_pixel(input_pixels[i]);
output_pixels[2*i] = color;
output_pixels[2*i+1] = color;
}
// Vertically, we want to scale by a 5/3 ratio. So we need to make 5 lines out of three: we double two lines out of three.
uint16_t y = (5*line)/3;
extapp_pushRect(0, y, 2*LCD_WIDTH, 1, output_pixels);
if (line%3 != 0) {
extapp_pushRect(0, y+1, 2*LCD_WIDTH, 1, output_pixels);
}
}
void lcd_draw_line_dummy(struct gb_s *gb, const uint8_t pixels[LCD_WIDTH], const uint_fast8_t line) {}

static void lcd_draw_line_maximized_ratio(struct gb_s * gb, const uint8_t * input_pixels, const uint_fast8_t line) {
// Nearest neighbor scaling of a 160x144 texture to a 266x240 resolution (to keep the ratio)
// Horizontally, we multiply by 1.66 (160*1.66 = 266)
uint16_t output_pixels[266];

#pragma unroll 40
for (int i=0; i<LCD_WIDTH; i++) {
uint16_t color = color_from_gb_pixel(input_pixels[i]);
// We can't use floats, so we use a fixed point representation
// We can't use floats for performance reason, so we use a fixed point
// representation
output_pixels[166*i/100] = color;
// This line is useless 1/3 times, but using an if is slower
output_pixels[166*i/100+1] = color;
output_pixels[166*i/100+2] = color;
}

// Vertically, we want to scale by a 5/3 ratio. So we need to make 5 lines out of three: we double two lines out of three.
Expand All @@ -153,14 +148,14 @@ char* read_save_file(const char* name, size_t size) {
char* save_name = malloc(strlen(name) + 3);
strcpy(save_name, name);
osd_newextension(save_name, ".gbs");

char* output = malloc(size);

if (extapp_fileExists(save_name, EXTAPP_RAM_FILE_SYSTEM)) {
size_t file_len = 0;
const char* save_content = extapp_fileRead(save_name, &file_len, EXTAPP_RAM_FILE_SYSTEM);
int error = LZ4_decompress_safe(save_content, output, file_len, size);

// Handling corrupted save.
if (error <= 0) {
memset(output, 0xFF, size);
Expand All @@ -174,19 +169,19 @@ char* read_save_file(const char* name, size_t size) {
}

free(save_name);

return output;
}

void write_save_file(const char* name, char* data, size_t size) {
char* save_name = malloc(strlen(name) + 3);
strcpy(save_name, name);
osd_newextension(save_name, ".gbs");

char* output = malloc((size_t) MAX_SCRIPTSTORE_SIZE);

int compressed_size = LZ4_compress_default(data, output, size, MAX_SCRIPTSTORE_SIZE);

if (compressed_size > 0) {
if (extapp_fileWrite(save_name, output, compressed_size, EXTAPP_RAM_FILE_SYSTEM)) {
saveMessage = SAVE_WRITE_OK;
Expand All @@ -196,7 +191,7 @@ void write_save_file(const char* name, char* data, size_t size) {
} else {
saveMessage = SAVE_COMPRESS_ERR;
}

free(save_name);
free(output);
}
Expand All @@ -222,27 +217,27 @@ void extapp_main() {
const char * file_name = select_rom();
if (!file_name)
return;

size_t file_len = 0;
priv.rom = (const uint8_t*) extapp_fileRead(file_name, &file_len, EXTAPP_FLASH_FILE_SYSTEM);
#endif

// Alloc internal RAM.
gb.wram = malloc(WRAM_SIZE);
gb.vram = malloc(VRAM_SIZE);
gb.hram = malloc(HRAM_SIZE);
gb.oam = malloc(OAM_SIZE);

gb_ret = gb_init(&gb, &gb_rom_read, &gb_cart_ram_read, &gb_cart_ram_write, &gb_error, &priv);

// TODO: Handle init errors.
switch(gb_ret) {
case GB_INIT_NO_ERROR:
break;
default:
return;
}

// Alloc and init save RAM.
size_t save_size = gb_get_save_size(&gb);
priv.cart_ram = read_save_file(file_name, save_size);
Expand All @@ -252,12 +247,28 @@ void extapp_main() {
gb_init_lcd(&gb, &lcd_draw_line_centered);

extapp_pushRectUniform(0, 0, NW_LCD_WIDTH, NW_LCD_HEIGHT, 0);


uint32_t lastMSpF = 0;

#if ENABLE_FRAME_LIMITER
// We use a "smart" frame limiter: for each frame, we add
// `frame duration - target frame time` to our budget. If the frame was faster
// than target, we sleep for (simplified version without taking the case where
// time budget > target frame time - last frame duration):
// target frame time - last frame duration - time budget
// This way, we will keep an average frame duration consistant.
uint32_t timeBudget = 0;
#endif

// Skip 1/2 frame, spare 3 ms/f on my N0110
bool frameSkipping = FRAME_SKIPPING_DEFAULT_STATE;
void * drawLineMode = lcd_draw_line_centered;

running = true;
while(running) {
uint64_t start = extapp_millis();
uint64_t kb = extapp_scanKeyboard();

gb.direct.joypad_bits.a = (kb & SCANCODE_Back) ? 0 : 1;
gb.direct.joypad_bits.b = (kb & SCANCODE_OK) ? 0 : 1;
gb.direct.joypad_bits.select = (kb & ((uint64_t)1 << 8)) ? 0 : 1;
Expand All @@ -266,7 +277,7 @@ void extapp_main() {
gb.direct.joypad_bits.right = (kb & SCANCODE_Right) ? 0 : 1;
gb.direct.joypad_bits.left = (kb & SCANCODE_Left) ? 0 : 1;
gb.direct.joypad_bits.down = (kb & SCANCODE_Down) ? 0 : 1;

if (kb & SCANCODE_Backspace)
gb_reset(&gb);
if (kb & SCANCODE_Toolbox) {
Expand All @@ -278,7 +289,7 @@ void extapp_main() {
} else if (wasSavePressed) {
wasSavePressed = false;
}

if (kb & SCANCODE_Alpha) {
if (!wasMSpFPressed) {
MSpFfCounter = !MSpFfCounter;
Expand All @@ -288,23 +299,26 @@ void extapp_main() {
} else if (wasMSpFPressed) {
wasMSpFPressed = false;
}

if (kb & SCANCODE_Zero) {
running = false;
break;
}

if (kb & SCANCODE_Plus) {
gb.display.lcd_draw_line = lcd_draw_line_maximized;
gb.display.lcd_draw_line = lcd_draw_line_maximized_ratio;
drawLineMode = lcd_draw_line_maximized_ratio;
}
if (kb & SCANCODE_Minus) {
gb.display.lcd_draw_line = lcd_draw_line_centered;
drawLineMode = lcd_draw_line_centered;
extapp_pushRectUniform(0, 0, NW_LCD_WIDTH, NW_LCD_HEIGHT, 0);
}
if (kb & SCANCODE_Multiplication) {
gb.display.lcd_draw_line = lcd_draw_line_maximized_ratio;
extapp_pushRectUniform(0, 0, NW_LCD_WIDTH, NW_LCD_HEIGHT, 0);
}
// if (kb & SCANCODE_Division) {
// gb.display.lcd_draw_line = lcd_draw_line_dummy;
// drawLineMode = lcd_draw_line_centered;
// extapp_pushRectUniform(0, 0, NW_LCD_WIDTH, NW_LCD_HEIGHT, 0);
// }

if (kb & SCANCODE_One) {
palette = palette_peanut_GB;
Expand All @@ -323,7 +337,7 @@ void extapp_main() {
int i = 0;
for(i = 0; !gb.gb_frame && i < 32000; i++)
__gb_step_cpu(&gb);

if (saveCooldown > 1) {
saveCooldown--;
switch(saveMessage) {
Expand All @@ -350,21 +364,83 @@ void extapp_main() {
extapp_pushRectUniform(0, NW_LCD_HEIGHT / 2 + LCD_HEIGHT / 2, NW_LCD_WIDTH, NW_LCD_HEIGHT - (NW_LCD_HEIGHT / 2 + LCD_HEIGHT / 2), 0);
}
uint64_t end = extapp_millis();
if (MSpFfCounter) {

if (gb.gb_frame) {
uint16_t MSpF = (uint16_t)(end - start);
char buffer[30];
sprintf(buffer, "%d ms/f", MSpF);
extapp_drawTextSmall(buffer, 2, NW_LCD_HEIGHT - 10, 65535, 0, false);
}

if (MSpFfCounter) {
// We need to average the MSpF as skipped frames are faster
uint16_t MSpFAverage = (MSpF + lastMSpF) / 2;
char buffer[100];
sprintf(buffer, "%d ms/f", MSpFAverage);
// sprintf(buffer, "%d ms/f, %d ", MSpFAverage, timeBudget);
extapp_drawTextSmall(buffer, 2, NW_LCD_HEIGHT - 10, 65535, 0, false);
}

if (frameSkipping) {
if (gb.display.lcd_draw_line != lcd_draw_line_dummy) {
drawLineMode = gb.display.lcd_draw_line;
gb.display.lcd_draw_line = lcd_draw_line_dummy;
} else {
gb.display.lcd_draw_line = drawLineMode;
}
}

#if ENABLE_FRAME_LIMITER
uint32_t differenceToTarget = abs(TARGET_FRAME_DURATION - MSpF);

if (TARGET_FRAME_DURATION - MSpF > 0) {
// Frame was faster than target, so let's slow down if we have time to
// catch up

// If on previous frames we were
if (timeBudget >= differenceToTarget) {
// We were too slow at previous frames so we have to catch up
timeBudget -= differenceToTarget;
} else if (timeBudget > 0) {
// We can catch up everything on one frame, so let's sleep a bit less
// than what we would have done if we weren't late
uint32_t time_to_sleep = differenceToTarget - timeBudget;
extapp_msleep(time_to_sleep);
timeBudget = 0;
} else {
// We don't have time to catch up, so we just sleep until we get to 16ms/f
extapp_msleep(differenceToTarget);

#if AUTOMATIC_FRAME_SKIPPING
// Disable frame skipping as we are running faster than required
frameSkipping = false;
gb.display.lcd_draw_line = drawLineMode;
#endif
}
} else {
// Comparaison is technically not required, but we do this avoid the
// performance cost of duplicate assignation when we are at the maximum
// time budget, which is often the case when lagging
if (timeBudget < TARGET_FRAME_DURATION) {
// Frame was slower than target, so we need to catch up.
timeBudget += differenceToTarget;

if (timeBudget >= TARGET_FRAME_DURATION) {
timeBudget = TARGET_FRAME_DURATION;

#if AUTOMATIC_FRAME_SKIPPING
// Enable frame skipping in an attempt to speed up emulation
frameSkipping = true;
#endif
}
}
}
#endif
lastMSpF = MSpF;
}
}

free(gb.wram);
free(gb.vram);
free(gb.hram);
free(gb.oam);

write_save_file(file_name, priv.cart_ram, save_size);
free(priv.cart_ram);
}
8 changes: 4 additions & 4 deletions apps/Peanut-GB/selector.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ const char * select_rom() {
extapp_pushRectUniform(0, 0, LCD_WIDTH, LCD_HEIGHT, SELECTOR_COLOR_BG);
extapp_drawTextLarge(" Select a ROM ", 0, 0, SELECTOR_COLOR_HEAD_FG, SELECTOR_COLOR_HEAD_BG, false);

int nb = extapp_fileListWithExtension(filenames, max_roms, "", EXTAPP_FLASH_FILE_SYSTEM);
int nb = extapp_fileListWithExtension((const char **)filenames, max_roms, "", EXTAPP_FLASH_FILE_SYSTEM);

size_t len;
for(int i = 0; i < nb; i++) {
Expand All @@ -87,13 +87,13 @@ const char * select_rom() {
for(uint16_t i = 0x0134; i < 0x014D; i++) {
checksum += ~data[i];
}

if (checksum != data[0x014D]) {
filenames[i] = NULL;
}
}


}
nb = remove(filenames, filenames + nb) - filenames;

Expand Down

0 comments on commit e2f0957

Please sign in to comment.