From 8ad0d64b157f2a609434aba68fffc6a1b98bf2f0 Mon Sep 17 00:00:00 2001 From: ZERICO2005 <71151164+ZERICO2005@users.noreply.github.com> Date: Tue, 15 Jul 2025 22:37:56 -0600 Subject: [PATCH] added imulu_b lmulu_b and smulu_b --- src/crt/imulu_b.src | 66 ++++++ src/crt/imulu_b_fast.src | 38 ++++ src/crt/lmulu_b.src | 61 +++++ src/crt/lmulu_b_fast.src | 53 +++++ src/crt/os.src | 2 - src/crt/smulu_b.src | 25 ++ src/crt/smulu_b_fast.src | 26 +++ test/standalone/mulu_b/autotest.json | 40 ++++ test/standalone/mulu_b/makefile | 19 ++ test/standalone/mulu_b/src/crt_wrap.asm | 100 ++++++++ test/standalone/mulu_b/src/main.c | 288 ++++++++++++++++++++++++ 11 files changed, 716 insertions(+), 2 deletions(-) create mode 100644 src/crt/imulu_b.src create mode 100644 src/crt/imulu_b_fast.src create mode 100644 src/crt/lmulu_b.src create mode 100644 src/crt/lmulu_b_fast.src create mode 100644 src/crt/smulu_b.src create mode 100644 src/crt/smulu_b_fast.src create mode 100644 test/standalone/mulu_b/autotest.json create mode 100644 test/standalone/mulu_b/makefile create mode 100644 test/standalone/mulu_b/src/crt_wrap.asm create mode 100644 test/standalone/mulu_b/src/main.c diff --git a/src/crt/imulu_b.src b/src/crt/imulu_b.src new file mode 100644 index 000000000..47d232c7e --- /dev/null +++ b/src/crt/imulu_b.src @@ -0,0 +1,66 @@ + assume adl=1 + + section .text + + public __imul_b + public __imulu_b + +if PREFER_OS_CRT + +__imul_b := 000150h +__imulu_b := __imul_b + +else + +__imul_b: +__imulu_b: + +; Multiplies UHL by A (unsigned) and returns the 24-bit product uhl. +; I: A=multiplier, UHL=multiplicand, ADL=1 +; O: uhl=UHL*A +; CC: 32*r(PC)+12*r(SPL)+9*w(SPL)+13 +; CC: 31 bytes | 32F + 12R + 9W + 13 +Mul_UHL_A_UHL: + push de + push af ; preserve A + + dec sp + push hl + inc sp + ; (SP + 3) = preserved E + ; (SP + 2) = ? --> UDE + ; (SP + 1) = UHL --> D + ; (SP + 0) = H --> E + ; (SP - 1) = L + pop de ; D = UHL, E = H + + ld e, a + mlt de ; DE = UHL * A + + ld d, l + + ld l, a + mlt hl ; HL = H * A + + ld a, h + add a, e + ld h, a + + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + + pop af ; restore A + ld e, a + mlt de ; DE = L * A + add hl, de + + pop de + ret + +end if diff --git a/src/crt/imulu_b_fast.src b/src/crt/imulu_b_fast.src new file mode 100644 index 000000000..36572b284 --- /dev/null +++ b/src/crt/imulu_b_fast.src @@ -0,0 +1,38 @@ + assume adl=1 + + section .text + + public __imulu_b_fast + +__imulu_b_fast: + +; Multiplies UHL by A (unsigned) and returns the 24-bit product uhl. +; I: A=multiplier, UHL=multiplicand, ADL=1 +; O: uhl=UHL*A +; CC: 28*r(PC)+9*r(SPL)+6*w(SPL)+13 +; CC: 27 bytes | 28F + 9R + 6W + 13 +Mul_UHL_A_UHL_Fast: + dec sp + push hl + inc sp + pop de ; D = UHL, E = H + ld e, a + mlt de ; DE = UHL * A + ld b, l + ld c, a + mlt bc ; BC = L * A + ld l, a + mlt hl ; HL = H * A + ld a, h + add a, e + ld h, a + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, bc + ret diff --git a/src/crt/lmulu_b.src b/src/crt/lmulu_b.src new file mode 100644 index 000000000..3533f96ac --- /dev/null +++ b/src/crt/lmulu_b.src @@ -0,0 +1,61 @@ + assume adl=1 + + section .text + + public __lmulu_b + +__lmulu_b: + +; Multiplies EUHL by A and returns the 32-bit product euhl. +; I: A=multiplier, EUHL=multiplicand, ADL=1 +; O: euhl=EUHL*A +; CC: 43*r(PC)+12*r(SPL)+9*w(SPL)+13 +; CC: 42 bytes | 43F + 12R + 9W + 13 +Mul_EUHL_A_EUHL: + push bc + push de + + dec sp + push hl + inc sp + pop bc ; B = UHL, C = H + + ld c, a + mlt bc ; BC = A * U + + ld d, a ; preserve A + push de ; A * E + ld d, l + ld e, a + + ld l, a + mlt hl ; HL = A * H + + ld a, h + add a, c + ld h, a ; A = H + adc a, b ; A = H + AU.hi + carry + sub a, h ; A = AU.hi + carry + + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + + mlt de ; DE = A * L + add hl, de ; UHL = AH.hi + AU.lo, AH.lo + AL.hi, AL.lo + + pop de + ld b, d ; retrieve A + mlt de ; DE = A * E + adc a, e ; AU.hi + AE.lo + Carry + pop de + ld e, a + ld a, b ; restore A + + pop bc + ret diff --git a/src/crt/lmulu_b_fast.src b/src/crt/lmulu_b_fast.src new file mode 100644 index 000000000..eb93969a0 --- /dev/null +++ b/src/crt/lmulu_b_fast.src @@ -0,0 +1,53 @@ + assume adl=1 + + section .text + + public __lmulu_b_fast + +__lmulu_b_fast: + +; Multiplies EUHL by A and returns the 32-bit product euhl. +; I: A=multiplier, EUHL=multiplicand, ADL=1 +; O: euhl=EUHL*A +; CC: 37*r(PC)+6*r(SPL)+3*w(SPL)+13 +; CC: 36 bytes | 37F + 6R + 3W + 13 +Mul_EUHL_A_EUHL: + dec sp + push hl + inc sp + pop bc ; B = UHL, C = H + + ld c, a + mlt bc ; BC = A * U + + ld d, a + push de ; A * E + ld d, l + ld e, a + + ld l, a + mlt hl ; HL = A * H + + ld a, h + add a, c + ld h, a ; A = H + adc a, b ; A = H + AU.hi + carry + sub a, h ; A = AU.hi + carry + + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + add hl, hl + + mlt de ; DE = A * L + add hl, de ; UHL = AH.hi + AU.lo, AH.lo + AL.hi, AL.lo + + pop de + mlt de ; DE = A * E + adc a, e ; AU.hi + AE.lo + Carry + ld e, a + ret diff --git a/src/crt/os.src b/src/crt/os.src index dcf79294d..69e445aa6 100644 --- a/src/crt/os.src +++ b/src/crt/os.src @@ -16,8 +16,6 @@ __fsub := 000290h __ftol := 00027Ch public __ftoul __ftoul := __ftol - public __imul_b -__imul_b := 000150h public __indcall __indcall := 00015Ch public __ishl_b diff --git a/src/crt/smulu_b.src b/src/crt/smulu_b.src new file mode 100644 index 000000000..ef083bb41 --- /dev/null +++ b/src/crt/smulu_b.src @@ -0,0 +1,25 @@ + assume adl=1 + + section .text + + public __smulu_b + +__smulu_b: + +; Multiplies HL by A (unsigned) and returns the 16-bit product hl. +; I: A=multiplier, HL=multiplicand, ADL=1 +; O: hl=HL*A +; CC: 15*r(PC)+6*r(SPL)+3*w(SPL)+9 +; CC: 14 bytes | 15F + 6R + 3W + 9 +Mul_HL_A_HL: + push de + ld e, a + ld d, h + ld h, e + mlt de ; DE = H * A + mlt hl ; HL = A * L + ld d, e + ld e, 0 + add hl, de + pop de + ret diff --git a/src/crt/smulu_b_fast.src b/src/crt/smulu_b_fast.src new file mode 100644 index 000000000..82cb91739 --- /dev/null +++ b/src/crt/smulu_b_fast.src @@ -0,0 +1,26 @@ + + + assume adl=1 + + section .text + + public __smulu_b_fast + +__smulu_b_fast: + +; Multiplies HL by A (unsigned) and returns the 16-bit product hl. +; I: A=multiplier, HL=multiplicand, ADL=1 +; O: hl=HL*A +; CC: 12*r(PC)+3*r(SPL)+9 +; CC: 11 bytes | 12F + 3R + 9 +Mul_HL_A_HL_Fast: +; destroys DE and A. You can swap DE with BC to destroy BC instead + ld e, a + ld d, h + ld h, e + mlt de ; DE = H * A + mlt hl ; HL = A * L + ld a, e + add a, h + ld h, a + ret diff --git a/test/standalone/mulu_b/autotest.json b/test/standalone/mulu_b/autotest.json new file mode 100644 index 000000000..be5eeed3c --- /dev/null +++ b/test/standalone/mulu_b/autotest.json @@ -0,0 +1,40 @@ +{ + "transfer_files": [ + "bin/DEMO.8xp" + ], + "target": { + "name": "DEMO", + "isASM": true + }, + "sequence": [ + "action|launch", + "delay|1000", + "hashWait|1", + "key|enter", + "delay|300", + "hashWait|2" + ], + "hashes": { + "1": { + "description": "All tests passed", + "timeout": 5000, + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "38E2AD5A" + ] + }, + "2": { + "description": "Exit", + "start": "vram_start", + "size": "vram_16_size", + "expected_CRCs": [ + "FFAF89BA", + "101734A5", + "9DA19F44", + "A32840C8", + "349F4775" + ] + } + } +} diff --git a/test/standalone/mulu_b/makefile b/test/standalone/mulu_b/makefile new file mode 100644 index 000000000..41673220d --- /dev/null +++ b/test/standalone/mulu_b/makefile @@ -0,0 +1,19 @@ +# ---------------------------- +# Makefile Options +# ---------------------------- + +NAME = DEMO +ICON = icon.png +DESCRIPTION = "CE C Toolchain Demo" +COMPRESSED = NO +ARCHIVED = NO + +CFLAGS = -Wall -Wextra -Wshadow -Wconversion -Wformat=2 -Wno-sign-conversion -Oz +CXXFLAGS = -Wall -Wextra -Wshadow -Wconversion -Wformat=2 -Wno-sign-conversion -Oz + +PREFER_OS_LIBC = NO +PREFER_OS_CRT = NO + +# ---------------------------- + +include $(shell cedev-config --makefile) diff --git a/test/standalone/mulu_b/src/crt_wrap.asm b/test/standalone/mulu_b/src/crt_wrap.asm new file mode 100644 index 000000000..9b1bcad1d --- /dev/null +++ b/test/standalone/mulu_b/src/crt_wrap.asm @@ -0,0 +1,100 @@ + assume adl=1 + + section .data + + public _prev_reg +_prev_reg: + ; L H U E D U C B U A X I U Y I U + db 0,0,0, 0,0,0, 0,0,0, 0, 0,0,0, 0,0,0 + + public _next_reg +_next_reg: + ; L H U E D U C B U A X I U Y I U + db 0,0,0, 0,0,0, 0,0,0, 0, 0,0,0, 0,0,0 + + section .text + + private _set_prev_reg +_set_prev_reg: + ld (_prev_reg + 0), hl + ld (_prev_reg + 3), de + ld (_prev_reg + 6), bc + ld (_prev_reg + 9), a + ; ld (_prev_reg + 10), iy + ; ld (_prev_reg + 13), ix + ret + + private _set_next_reg +_set_next_reg: + ld (_next_reg + 0), hl + ld (_next_reg + 3), de + ld (_next_reg + 6), bc + ld (_next_reg + 9), a + ; ld (_next_reg + 10), iy + ; ld (_next_reg + 13), ix + ret + + public _CRT_smulu_b +_CRT_smulu_b: + ld iy, 0 + add iy, sp + ld hl, (iy + 3) + ld a, (iy + 6) + call _set_prev_reg + call __smulu_b + jq _set_next_reg + + public _CRT_smulu_b_fast +_CRT_smulu_b_fast: + ld iy, 0 + add iy, sp + ld hl, (iy + 3) + ld a, (iy + 6) + jp __smulu_b_fast + + public _CRT_imulu_b +_CRT_imulu_b: + ld iy, 0 + add iy, sp + ld hl, (iy + 3) + ld a, (iy + 6) + call _set_prev_reg + call __imulu_b + jq _set_next_reg + + public _CRT_imulu_b_fast +_CRT_imulu_b_fast: + ld iy, 0 + add iy, sp + ld hl, (iy + 3) + ld a, (iy + 6) + jp __imulu_b_fast + + public _CRT_lmulu_b +_CRT_lmulu_b: + ld iy, 0 + add iy, sp + ld hl, (iy + 3) + ld e, (iy + 6) + ld a, (iy + 9) + call _set_prev_reg + call __lmulu_b + jq _set_next_reg + + public _CRT_lmulu_b_fast +_CRT_lmulu_b_fast: + ld iy, 0 + add iy, sp + ld hl, (iy + 3) + ld e, (iy + 6) + ld a, (iy + 9) + jp __lmulu_b_fast + + extern __smulu_b + extern __smulu_b_fast + + extern __imulu_b + extern __imulu_b_fast + + extern __lmulu_b + extern __lmulu_b_fast diff --git a/test/standalone/mulu_b/src/main.c b/test/standalone/mulu_b/src/main.c new file mode 100644 index 000000000..e120881d6 --- /dev/null +++ b/test/standalone/mulu_b/src/main.c @@ -0,0 +1,288 @@ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +//------------------------------------------------------------------------------ +// Config +//------------------------------------------------------------------------------ + +#define RANDOM_TEST_COUNT 256 + +// define to 0 or 1 +#define DEBUG_DIAGNOSTICS 0 + +#define AUTOTEST_SEED 0x7184CE + +//------------------------------------------------------------------------------ +// Tests +//------------------------------------------------------------------------------ + +#define C(expr) if (!(expr)) { return __LINE__; } + +#define TEST(test) { ret = test; if (ret != 0) { return ret; }} + +#ifndef DEBUG_DIAGNOSTICS +#error "DEBUG_DIAGNOSTICS needs to be defined to 0 or 1" +#endif + +#if RANDOM_TEST_COUNT < 4 +#error "RANDOM_TEST_COUNT is out of range" +#endif + +#if DEBUG_DIAGNOSTICS +#define test_printf printf +#else +#define test_printf(...) +#endif + +#define CMP(format, x, y, truth, guess) do { \ + if (truth != guess) { \ + test_printf("I: " format " * %02X\nT: " format "\nG: " format "\n", x, y, truth, guess); \ + return __LINE__; \ + } \ +} while(0) + +static_assert(RAND_MAX == INT_MAX, "RAND_MAX has changed"); + +#define rand8() ((uint8_t)rand()) + +#define rand16() ((uint16_t)rand()) + +__attribute__((__unused__)) static uint24_t rand24(void) { + union { + uint24_t u24; + struct { + uint16_t lo16; + uint8_t hi8; + } part; + } split; + split.part.lo16 = (uint16_t)rand(); + split.part.hi8 = (uint8_t)rand(); + return split.u24; +} + +__attribute__((__unused__)) static uint32_t rand32(void) { + union { + uint32_t u32; + uint16_t u16[2]; + } split; + split.u16[0] = (uint16_t)rand(); + split.u16[1] = (uint16_t)rand(); + return split.u32; +} + +__attribute__((__unused__)) static uint48_t rand48(void) { + union { + uint48_t u48; + uint16_t u16[3]; + } split; + split.u16[0] = (uint16_t)rand(); + split.u16[1] = (uint16_t)rand(); + split.u16[2] = (uint16_t)rand(); + return split.u48; +} + +__attribute__((__unused__)) static uint64_t rand64(void) { + union { + uint64_t u64; + uint16_t u16[4]; + } split; + split.u16[0] = (uint16_t)rand(); + split.u16[1] = (uint16_t)rand(); + split.u16[2] = (uint16_t)rand(); + split.u16[3] = (uint16_t)rand(); + return split.u64; +} + +uint16_t CRT_smulu_b(uint16_t, uint8_t); +uint16_t CRT_smulu_b_fast(uint16_t, uint8_t); + +uint24_t CRT_imulu_b(uint24_t, uint8_t); +uint24_t CRT_imulu_b_fast(uint24_t, uint8_t); + +uint32_t CRT_lmulu_b(uint32_t, uint8_t); +uint32_t CRT_lmulu_b_fast(uint32_t, uint8_t); + +typedef struct reg_group { + union { + struct { + uint24_t HL; + uint24_t DE; + uint24_t BC; + }; + struct { + uint8_t L; + uint8_t H; + uint8_t UHL; + uint8_t E; + uint8_t D; + uint8_t UDE; + uint8_t C; + uint8_t B; + uint8_t UBC; + }; + }; + uint8_t A; + // uint24_t IX; + // uint24_t IY; +} reg_group; +extern reg_group prev_reg; +extern reg_group next_reg; + +void print_reg(void) { + test_printf( + "A: %02X -> %02X\n"\ + "HL: %06X -> %06X\n"\ + "BC: %06X -> %06X\n"\ + "DE: %06X -> %06X\n", + prev_reg.A , next_reg.A , + prev_reg.HL, next_reg.HL, + prev_reg.DE, next_reg.DE, + prev_reg.BC, next_reg.BC + ); +} + +static bool test_A_UBC_UDE(void) { + if ( + (prev_reg.A == next_reg.A ) && + (prev_reg.BC == next_reg.BC ) && + (prev_reg.DE == next_reg.DE ) + ) { + return true; + } + print_reg(); + return false; +} + +static bool test_A_UBC_UD(void) { + if ( + (prev_reg.A == next_reg.A ) && + (prev_reg.BC == next_reg.BC ) && + (prev_reg.UDE == next_reg.UDE) && + (prev_reg.D == next_reg.D ) + ) { + return true; + } + print_reg(); + return false; +} + +int test_smulu_b(void) { + for (int i = 0; i < RANDOM_TEST_COUNT; i++) { + uint16_t truth, guess, x; + uint8_t y; + x = rand16(); + y = rand8(); + truth = x * (uint16_t)y; + guess = CRT_smulu_b(x, y); + CMP("%04X", x, y, truth, guess); + C((test_A_UBC_UDE())); + } + return 0; +} + +int test_smulu_b_fast(void) { + for (int i = 0; i < RANDOM_TEST_COUNT; i++) { + uint16_t truth, guess, x; + uint8_t y; + x = rand16(); + y = rand8(); + truth = x * (uint16_t)y; + guess = CRT_smulu_b_fast(x, y); + CMP("%04X", x, y, truth, guess); + } + return 0; +} + +int test_imulu_b(void) { + for (int i = 0; i < RANDOM_TEST_COUNT; i++) { + uint24_t truth, guess, x; + uint8_t y; + x = rand24(); + y = rand8(); + truth = x * (uint24_t)y; + guess = CRT_imulu_b(x, y); + CMP("%06X", x, y, truth, guess); + C((test_A_UBC_UDE())); + } + return 0; +} + +int test_imulu_b_fast(void) { + for (int i = 0; i < RANDOM_TEST_COUNT; i++) { + uint24_t truth, guess, x; + uint8_t y; + x = rand24(); + y = rand8(); + truth = x * (uint24_t)y; + guess = CRT_imulu_b_fast(x, y); + CMP("%06X", x, y, truth, guess); + } + return 0; +} + +int test_lmulu_b(void) { + for (int i = 0; i < RANDOM_TEST_COUNT; i++) { + uint32_t truth, guess, x; + uint8_t y; + x = rand32(); + y = rand8(); + truth = x * (uint32_t)y; + guess = CRT_lmulu_b(x, y); + CMP("%08lX", x, y, truth, guess); + C((test_A_UBC_UD())); + } + return 0; +} + +int test_lmulu_b_fast(void) { + for (int i = 0; i < RANDOM_TEST_COUNT; i++) { + uint32_t truth, guess, x; + uint8_t y; + x = rand32(); + y = rand8(); + truth = x * (uint32_t)y; + guess = CRT_lmulu_b_fast(x, y); + CMP("%08lX", x, y, truth, guess); + } + return 0; +} + +int run_tests(void) { + srand(AUTOTEST_SEED); + int ret = 0; + TEST(test_smulu_b()); + TEST(test_smulu_b_fast()); + TEST(test_imulu_b()); + TEST(test_imulu_b_fast()); + TEST(test_lmulu_b()); + TEST(test_lmulu_b_fast()); + + return ret; +} + +int main(void) { + os_ClrHome(); + int failed_test = run_tests(); + if (failed_test != 0) { + char buf[sizeof("Failed test L-8388608\n")]; + boot_sprintf(buf, "Failed test L%d\n", failed_test); + fputs(buf, stdout); + } else { + fputs("All tests passed", stdout); + } + + while (!os_GetCSC()); + + return 0; +}