diff --git a/.github/scripts/test.sh b/.github/scripts/test.sh new file mode 100755 index 000000000..60eca5947 --- /dev/null +++ b/.github/scripts/test.sh @@ -0,0 +1,19 @@ +#!/bin/bash +runmats() { + echo make allxhelp "$@" + make -C ${MACH}/mats allxhelp "$@" 2>&1 | tee -a Make.out | grep '^matting ' +} + +# Split these out so that we get output every 10 minutes on Windows builds. +runmats o=0 +runmats o=3 +runmats o=3 cp0=t +runmats o=3 cp0=t eval=interpret + +if [ -f ${MACH}/mats/summary ]; then + cat ${MACH}/mats/summary + diff -q .github/scripts/summary ${MACH}/mats/summary + exit $? +else + exit 1 +fi diff --git a/c/random.c b/c/random.c index d496d3a36..c7d80a718 100644 --- a/c/random.c +++ b/c/random.c @@ -22,12 +22,14 @@ /* Representation is arecord with 6 `double` fields: */ -#define RANDSTATEX10(x) (((double*)&RECORDINSTIT(x, 0))[0]) -#define RANDSTATEX11(x) (((double*)&RECORDINSTIT(x, 0))[1]) -#define RANDSTATEX12(x) (((double*)&RECORDINSTIT(x, 0))[2]) -#define RANDSTATEX20(x) (((double*)&RECORDINSTIT(x, 0))[3]) -#define RANDSTATEX21(x) (((double*)&RECORDINSTIT(x, 0))[4]) -#define RANDSTATEX22(x) (((double*)&RECORDINSTIT(x, 0))[5]) +#define RECORDINSTDBLA(x) ((double *)((uptr)&RECORDINSTIT(x, 0) + (max_float_alignment - ptr_bytes))) + +#define RANDSTATEX10(x) (RECORDINSTDBLA(x)[0]) +#define RANDSTATEX11(x) (RECORDINSTDBLA(x)[1]) +#define RANDSTATEX12(x) (RECORDINSTDBLA(x)[2]) +#define RANDSTATEX20(x) (RECORDINSTDBLA(x)[3]) +#define RANDSTATEX21(x) (RECORDINSTDBLA(x)[4]) +#define RANDSTATEX22(x) (RECORDINSTDBLA(x)[5]) /* The Generator ============= diff --git a/c/schlib.c b/c/schlib.c index 4a73c560b..772e95487 100644 --- a/c/schlib.c +++ b/c/schlib.c @@ -216,6 +216,8 @@ void S_call_help(tc_in, singlep, lock_ts) ptr tc_in; IBOOL singlep; IBOOL lock_t the C stack and we may end up in a garbage collection */ code = CP(tc); if (Sprocedurep(code)) code = CLOSCODE(code); + if (!IMMEDIATE(code) && !Scodep(code)) + S_error_abort("S_call_help: invalid code pointer"); S_immobilize_object(code); CP(tc) = AC1(tc); diff --git a/c/vfasl.c b/c/vfasl.c index a873eff68..f2c6f27cd 100644 --- a/c/vfasl.c +++ b/c/vfasl.c @@ -167,6 +167,7 @@ static ptr copy(vfasl_info *vfi, ptr pp, seginfo *si); static uptr sweep(vfasl_info *vfi, ptr p); static int is_rtd(ptr tf, vfasl_info *vfi); +static IFASLCODE abs_reloc_variant(IFASLCODE type); static ptr vfasl_encode_relocation(vfasl_info *vfi, ptr obj); static void relink_code(ptr co, ptr sym_base, ptr *vspaces, uptr *vspace_offsets, IBOOL to_static); static ptr find_pointer_from_offset(uptr p_off, ptr *vspaces, uptr *vspace_offsets); @@ -1083,6 +1084,20 @@ static int is_rtd(ptr tf, vfasl_info *vfi) #define VFASL_RELOC_TAG(p) (UNFIX(p) & ((1 << VFASL_RELOC_TAG_BITS) - 1)) #define VFASL_RELOC_POS(p) (UNFIX(p) >> VFASL_RELOC_TAG_BITS) +/* Picks a relocation variant that fits into the actual relocation's + shape, but holds an absolue value */ +static IFASLCODE abs_reloc_variant(IFASLCODE type) { + if (type == reloc_abs) + return reloc_abs; +#if defined(I386) || defined(X86_64) + return reloc_abs; +#elif defined(ARMV6) + return reloc_arm32_abs; +#else + >> need to fill in for this platform << +#endif +} + static ptr vfasl_encode_relocation(vfasl_info *vfi, ptr obj) { ptr pos; int which_singleton; @@ -1142,7 +1157,7 @@ static void relink_code(ptr co, ptr sym_base, ptr *vspaces, uptr *vspace_offsets code_off = RELOC_CODE_OFFSET(entry); } a += code_off; - obj = S_get_code_obj(reloc_abs, co, a, item_off); + obj = S_get_code_obj(abs_reloc_variant(RELOC_TYPE(entry)), co, a, item_off); if (IMMEDIATE(obj)) { if (Sfixnump(obj)) { diff --git a/csug/foreign.stex b/csug/foreign.stex index 879bd12d0..606554ea9 100644 --- a/csug/foreign.stex +++ b/csug/foreign.stex @@ -220,7 +220,10 @@ Windows: \scheme{__stdcall}, \scheme{__cdecl}, and \scheme{__com} (32-bit only). Since \scheme{__cdecl} is the default, specifying \scheme{__cdecl} is equivalent to specifying \scheme{#f} or no convention. Finally, \var{conv} can be \scheme{__collect_safe} to indicate that garbage -collection is allowed concurrent to a call of the foreign procedure. +collection is allowed concurrent to a call of the foreign procedure, or it +can be \var{conv} can be \scheme{__varargs} to indicate that the procedure +uses a convention that works with a variable number of arguments (which +differs from the fixed-argument variant for some platforms and conventions). Use \scheme{__stdcall} to access most Windows API procedures. Use \scheme{__cdecl} for Windows API varargs procedures, diff --git a/mats/fl.ms b/mats/fl.ms index 2dd2a3760..cd9a8a635 100644 --- a/mats/fl.ms +++ b/mats/fl.ms @@ -26,6 +26,13 @@ (flonum->fixnum (* (+ (ash (most-positive-fixnum) -1) 1) 1.0))) (eq? (most-negative-fixnum) (flonum->fixnum (* (most-negative-fixnum) 1.0))) + (eq? (+ (ash (most-positive-fixnum) -1) 1) + (flonum->fixnum (fl+ (* (+ (ash (most-positive-fixnum) -1) 1) 1.0) 0.5))) + (or (not (fixnum? (inexact->exact (exact->inexact (most-positive-fixnum))))) + (eq? (most-positive-fixnum) + (flonum->fixnum (fl+ (* (most-positive-fixnum) 1.0) 0.5)))) + (eq? (most-negative-fixnum) + (flonum->fixnum (fl- (* (most-negative-fixnum) 1.0) 0.5))) (eq? (flonum->fixnum 0.0) 0) (eq? (flonum->fixnum 1.0) 1) (eq? (flonum->fixnum +4.5) +4) diff --git a/mats/foreign.ms b/mats/foreign.ms index 92e911396..ad5fb5170 100644 --- a/mats/foreign.ms +++ b/mats/foreign.ms @@ -1038,6 +1038,12 @@ (define call-u64 (foreign-procedure "call_u64" (ptr unsigned-64 int int) unsigned-64)) (define call-sf (foreign-procedure "call_sf" (ptr single-float int int) single-float)) (define call-df (foreign-procedure "call_df" (ptr double-float int int) double-float)) + (define call-varargs-df (foreign-procedure "call_varargs_df" (ptr double-float int int) double-float)) + (define call-varargs-i7df (foreign-procedure "call_varargs_i7df" (ptr int + double-float double-float double-float + double-float double-float double-float + double-float) + double-float)) (define ($test-call-int signed? size call-int make-fc) (define n10000 (expt 256 size)) (define nffff (- n10000 1)) @@ -1151,6 +1157,22 @@ (double-float) double-float) 73.25 7 23) 108.25) + (equal? + (call-varargs-df + (foreign-callable + __varargs + (lambda (x y) (+ x y 5)) + (double-float double-float) double-float) + 10.25 20 300) + 325.5) + (equal? + (call-varargs-i7df + (foreign-callable + __varargs + (lambda (i a b c d e f g) (+ i a b c d e f g 7)) + (int double-float double-float double-float double-float double-float double-float double-float) double-float) + 1 2.2 3.2 4.5 6.7 8.9 10.1 11.5) + 55.1) (error? (call-i8 @@ -1212,6 +1234,13 @@ (lambda (x) '(- x 7)) (double-float) double-float) 73.25 0 0)) + (error? + (call-varargs-df + (foreign-callable + __varargs + (lambda (x y) '(- x 7)) + (double-float double-float) double-float) + 73.25 0 0)) (begin (define u32xu32->u64 @@ -2540,9 +2569,33 @@ (with-object-kept-live handler (call_many_times (foreign-callable-entry-point handler))) + (unlock-object handler) v) 14995143) + (equal? + (let () + (define v 0) + (define call_many_times_bv (foreign-procedure "call_many_times_bv" (void*) void)) + (define work + (lambda (bv) + (set! v (+ v (bytevector-u8-ref bv 0))) + ;; Varying work, as above: + (let loop ([n (bitwise-and (bytevector-u8-ref bv 1) #xFFFF)]) + (unless (zero? n) + (set! v (add1 v)) + (loop (bitwise-arithmetic-shift-right n 1)))))) + (define handlers (list (foreign-callable work (u8*) void) + (foreign-callable work (u16*) void) + (foreign-callable work (u32*) void))) + (map lock-object handlers) + (for-each (lambda (handler) + (call_many_times_bv (foreign-callable-entry-point handler))) + handlers) + (map unlock-object handlers) + v) + 103500000) + ;; regression test related to saving registers that hold allocated ;; callable argument (let* ([call-with-many-args (foreign-procedure "call_with_many_args" (void*) boolean)] @@ -2687,7 +2740,7 @@ (separate-eval `(begin ,load-libc - (define f (foreign-procedure "printf" (string double) int)) + (define f (foreign-procedure __varargs "printf" (string double) int)) (f "(%g)" 3.5) (void))) read) @@ -2697,7 +2750,7 @@ (separate-eval `(begin ,load-libc - (define f (foreign-procedure "printf" (string double double double double double double) int)) + (define f (foreign-procedure __varargs "printf" (string double double double double double double) int)) (f "(%g %g %g %g %g %g)" 3.5 2.5 -1.5 6.75 8.25 -9.5) (void))) read) @@ -2707,7 +2760,7 @@ (separate-eval `(begin ,load-libc - (define f (foreign-procedure "printf" (string double double double double double double double double) int)) + (define f (foreign-procedure __varargs "printf" (string double double double double double double double double) int)) (f "(%g %g %g %g %g %g %g %g)" 3.5 2.5 -1.5 6.75 8.25 -9.5 1e32 -4.5) (void))) read) @@ -2717,11 +2770,30 @@ (separate-eval `(begin ,load-libc - (define f (foreign-procedure "printf" (string double double double double double double double double double double) int)) + (define f (foreign-procedure __varargs "printf" (string double double double double double double double double double double) int)) (f "(%g %g %g %g %g %g %g %g %g %g)" 3.5 2.5 -1.5 6.75 8.25 -9.5 1e32 -4.5 7.25 -0.5) (void))) read) '(3.5 2.5 -1.5 6.75 8.25 -9.5 1e32 -4.5 7.25 -0.5)) + + (equal? (let ([cb (foreign-callable __varargs + (lambda (x y) (+ x y 5)) + (double-float double-float) double-float)]) + (with-object-kept-live + cb + ((foreign-procedure __varargs (foreign-callable-entry-point cb) + (double-float double-float) double-float) + 3.4 5.5))) + 13.9) + (equal? (let ([cb (foreign-callable __varargs + (lambda (x y) (+ x y 5)) + (double-float double-float) single-float)]) + (with-object-kept-live + cb + ((foreign-procedure __varargs (foreign-callable-entry-point cb) + (double-float double-float) single-float) + 3.5 -5.25))) + 3.25) ) (mat structs diff --git a/mats/foreign2.c b/mats/foreign2.c index 3e12cf1ff..56b2f5b2d 100644 --- a/mats/foreign2.c +++ b/mats/foreign2.c @@ -244,6 +244,17 @@ EXPORT double_float call_df(ptr code, double_float x, int m, int k) { return (*((double_float (*) (double_float))Sforeign_callable_entry_point(code)))(x + m) + k; } +EXPORT double_float call_varargs_df(ptr code, double_float x, int m, int k) { + return (*((double_float (*) (double, ...))Sforeign_callable_entry_point(code)))(x - m, x + m) + k; +} + +EXPORT double_float call_varargs_i7df(ptr code, int i, + double_float a, double_float b, double_float c, + double_float d, double_float e, double_float f, + double_float g) { + return (*((double_float (*) (int, ...))Sforeign_callable_entry_point(code)))(i, a, b, c, d, e, f, g); +} + EXPORT u8 *u8_star_to_u8_star(u8 *s) { return s == (u8 *)0 ? (u8 *)0 : s + 1; } @@ -444,6 +455,18 @@ EXPORT void call_many_times(void (*f)(iptr)) } } +EXPORT void call_many_times_bv(void (*f)(char *s)) +{ + /* make this sensible as u8*, u16*, and u32* */ + char buf[8] = { 1, 2, 3, 4, 0, 0, 0, 0 }; + int x; + + for (x = 0; x < 1000000; x++) { + buf[0] = (x & 63) + 1; + f(buf); + } +} + typedef void (*many_arg_callback_t)(int i, const char* s1, const char* s2, const char* s3, const char* s4, int i2, const char* s6, const char* s7, int i3); EXPORT void call_with_many_args(many_arg_callback_t callback) diff --git a/mats/hash.ms b/mats/hash.ms index 92b832494..a61f7c764 100644 --- a/mats/hash.ms +++ b/mats/hash.ms @@ -1674,7 +1674,10 @@ [wht (make-weak-eq-hashtable)] [eht (make-ephemeron-eq-hashtable)]) (let ([ls2 (map (lambda (a1) (eq-hashtable-cell ht (car a1) (cdr a1))) ls1)] - [ls2-2 (map (lambda (a1) (eq-hashtable-try-atomic-cell ht (car a1) (cdr a1))) ls1)] + [ls2-2 (map (lambda (a1) (let loop () + (define c (eq-hashtable-try-atomic-cell ht (car a1) (cdr a1))) + (or c (loop)))) + ls1)] [ls3 (map (lambda (a1) (hashtable-cell wht (car a1) (cdr a1))) ls1)] [ls4 (map (lambda (a1) (hashtable-cell eht (car a1) (cdr a1))) ls1)]) (let ([ls2* (map (lambda (a1) (eq-hashtable-ref-cell ht (car a1))) ls1)] diff --git a/mats/misc.ms b/mats/misc.ms index d74521334..d5d9826d0 100644 --- a/mats/misc.ms +++ b/mats/misc.ms @@ -4694,34 +4694,35 @@ (#2%display 1)))) ) -(mat $read-time-stamp-counter +(unless (memq (machine-type) '(arm32le tarm32le)) ; timestamp counter tends to be priviledged on arm32le + (mat $read-time-stamp-counter - (let ([t (#%$read-time-stamp-counter)]) - (and (integer? t) (exact? t))) + (let ([t (#%$read-time-stamp-counter)]) + (and (integer? t) (exact? t))) - (let () - ;; NB: pulled from thread.ms, to use as a delay - (define fat+ - (lambda (x y) - (if (zero? y) - x - (fat+ (1+ x) (1- y))))) - (define fatfib - (lambda (x) - (if (< x 2) - 1 - (fat+ (fatfib (1- x)) (fatfib (1- (1- x))))))) - (let loop ([count 10] [success 0]) - (if (fx= count 0) - (>= success 9) - (let ([t0 (#%$read-time-stamp-counter)]) - (fatfib 26) - (let ([t1 (#%$read-time-stamp-counter)]) - (loop (fx- count 1) - (if (< t0 t1) - (fx+ success 1) - success))))))) -) + (let () + ;; NB: pulled from thread.ms, to use as a delay + (define fat+ + (lambda (x y) + (if (zero? y) + x + (fat+ (1+ x) (1- y))))) + (define fatfib + (lambda (x) + (if (< x 2) + 1 + (fat+ (fatfib (1- x)) (fatfib (1- (1- x))))))) + (let loop ([count 10] [success 0]) + (if (fx= count 0) + (>= success 9) + (let ([t0 (#%$read-time-stamp-counter)]) + (fatfib 26) + (let ([t1 (#%$read-time-stamp-counter)]) + (loop (fx- count 1) + (if (< t0 t1) + (fx+ success 1) + success))))))) + )) (mat procedure-arity-mask (equal? (procedure-arity-mask (lambda () #f)) 1) diff --git a/mats/root-experr-compile-0-f-f-f b/mats/root-experr-compile-0-f-f-f index 79018982c..9b3d5453b 100644 --- a/mats/root-experr-compile-0-f-f-f +++ b/mats/root-experr-compile-0-f-f-f @@ -9950,6 +9950,7 @@ foreign.mo:Expected error in mat foreign-fixed-types: "foreign-callable: invalid foreign.mo:Expected error in mat foreign-fixed-types: "foreign-callable: invalid return value (- x 7) from #". foreign.mo:Expected error in mat foreign-fixed-types: "foreign-callable: invalid return value (- x 7) from #". foreign.mo:Expected error in mat foreign-fixed-types: "foreign-callable: invalid return value (- x 7) from #". +foreign.mo:Expected error in mat foreign-fixed-types: "foreign-callable: invalid return value (- x 7) from #". foreign.mo:Expected error in mat foreign-C-types: "int-to-int: invalid foreign-procedure argument qqq". foreign.mo:Expected error in mat foreign-C-types: "unsigned-to-unsigned: invalid foreign-procedure argument qqq". foreign.mo:Expected error in mat foreign-C-types: "unsigned-int-to-unsigned-int: invalid foreign-procedure argument qqq". diff --git a/mats/thread.ms b/mats/thread.ms index 68c042682..9c361330d 100644 --- a/mats/thread.ms +++ b/mats/thread.ms @@ -1569,6 +1569,18 @@ (condition-broadcast c)) (equal? gc-ids (list (get-thread-id))))) ) - + +(mat wait-for-threads + (begin + ;; To avoid breaking later tests that use `(collect)`, + ;; wait for any threads created here to exit + (let () + (define $threads (foreign-procedure "(cs)threads" () scheme-object)) + (let loop () + (unless (= 1 (length ($threads))) + (sleep (make-time 'time-duration 10000 0)) + (loop)))) + #t) +) ) diff --git a/s/Mf-rv64le b/s/Mf-rv64le index 87777e405..edb46aff4 100644 --- a/s/Mf-rv64le +++ b/s/Mf-rv64le @@ -14,6 +14,6 @@ # limitations under the License. m = rv64le -archincludes = rv64le.ss +archincludes = rv64.ss include Mf-base diff --git a/s/arm32.ss b/s/arm32.ss index 3a2e371b8..ecc9439f3 100644 --- a/s/arm32.ss +++ b/s/arm32.ss @@ -77,11 +77,11 @@ #;[%yp] [ %r0 %Carg1 %Cretval #f 0 uptr] [ %r1 %Carg2 #f 1 uptr] - [ %r2 %Carg3 #f 2 uptr] - [ %r3 %Carg4 #f 3 uptr] + [ %r2 %Carg3 %reify1 #f 2 uptr] + [ %r3 %Carg4 %reify2 #f 3 uptr] [ %lr #f 14 uptr] ; %lr is trashed by 'c' calls including calls to hand-coded routines like get-room - [%fp1 %Cfparg5 %d4 %s8 #f 8 fp] - [%fp2 %Cfparg6 %d5 %s10 #f 10 fp] + [%fp1 %d8 %s16 #t 16 fp] ; allocable fp regs must not overlap with any half registers + [%fp2 %d9 %s18 #t 18 fp] ) (machine-dependent [%sp #t 13 uptr] @@ -94,18 +94,19 @@ [%Cfparg3b %s5 #f 5 fp] [%Cfparg4 %d3 %s6 #f 6 fp] [%Cfparg4b %s7 #f 7 fp] + [%Cfparg5 %d4 %s8 #f 8 fp] [%Cfparg5b %s9 #f 9 fp] + [%Cfparg6 %d5 %s10 #f 10 fp] [%Cfparg6b %s11 #f 11 fp] - [%Cfparg7 %fptmp1 %d6 %s12 #f 12 fp] - [%Cfparg7b %fptmp2 %s13 #f 13 fp] + [%Cfparg7 %d6 %s12 #f 12 fp] + [%Cfparg7b %s13 #f 13 fp] [%Cfparg8 %d7 %s14 #f 14 fp] [%Cfparg8b %s15 #f 15 fp] ;; etc., but other FP registers are preserved #;[ %d16 #t 32 fp] ; >= 32: high bit goes in D, N, or M bit, low bits go in Vd, Vn, Vm #;[ %d17 #t 33 fp] ; etc. - ) - (reify-support %ts %lr %r3 %r2)) + )) ;;; SECTION 2: instructions (module (md-handle-jump) ; also sets primitive handlers @@ -230,20 +231,18 @@ (cond [(and (eq? x1 %zero) (or (unsigned12? imm) (unsigned12? (- imm)))) (return x0 %zero imm type)] - [(funky12 imm) => + [(funky12 imm) ; NB: dubious value? check to see if it's exercised - (lambda (imm) - (let ([u (make-tmp 'u)]) - (seq - (build-set! ,u (asm ,null-info ,(asm-add #f) ,x0 (immediate ,imm))) - (return u x1 0 type))))] - [(funky12 (- imm)) => + (let ([u (make-tmp 'u)]) + (seq + (build-set! ,u (asm ,null-info ,(asm-add #f) ,x0 (immediate ,imm))) + (return u x1 0 type)))] + [(funky12 (- imm)) ; NB: dubious value? check to see if it's exercised - (lambda (imm) - (let ([u (make-tmp 'u)]) - (seq - (build-set! ,u (asm ,null-info ,(asm-sub #f) ,x0 (immediate ,imm))) - (return u x1 0 type))))] + (let ([u (make-tmp 'u)]) + (seq + (build-set! ,u (asm ,null-info ,(asm-sub #f) ,x0 (immediate ,(- imm)))) + (return u x1 0 type)))] [else (let ([u (make-tmp 'u)]) (seq @@ -264,7 +263,7 @@ (k (with-output-language (L15d Lvalue) `(mref ,u ,%zero 0 uptr)))))] [else (mref->mref a k)]))) - (define fpmem->fpmem + (define fpmem->fpmem ; allows mem argument, too (lambda (a k) (define return (lambda (x0 x1 imm) @@ -295,6 +294,10 @@ [else (return x0 %zero imm)])))))]))) + (define mem->fpmem + (lambda (a k) + (fpmem->fpmem a k))) + (define-syntax coercible? (syntax-rules () [(_ ?a ?aty*) @@ -309,7 +312,8 @@ (and (memq 'unsigned12 aty*) (imm-unsigned12? a)) (and (memq 'imm-constant aty*) (imm-constant? a)) (and (memq 'uword8 aty*) (imm-uword8? a)) - (and (memq 'mem aty*) (mem? a))))])) + (and (memq 'mem aty*) (mem? a)) + (and (memq 'fpmem aty*) (fpmem? a))))])) (define-syntax coerce-opnd ; passes k something compatible with aty* (syntax-rules () @@ -317,6 +321,7 @@ (let ([a ?a] [aty* ?aty*] [k ?k]) (cond [(and (memq 'mem aty*) (mem? a)) (mem->mem a k)] + [(and (memq 'fpmem aty*) (fpmem? a)) (fpmem->fpmem a k)] [(and (memq 'funky12 aty*) (imm-funky12? a)) (k (imm->imm a))] [(and (memq 'negate-funky12 aty*) (imm-negate-funky12? a)) (k (imm->negate-imm a))] [(and (memq 'lognot-funky12 aty*) (imm-lognot-funky12? a)) (k (imm->lognot-imm a))] @@ -387,12 +392,6 @@ (define-syntax define-instruction (lambda (x) - (define mem-type? - (lambda (t) - (syntax-case t (mem fpmem) - [mem #t] - [fpmem #t] - [else #f]))) (define make-value-clause (lambda (fmt) (syntax-case fmt (mem fpmem ur fpur) @@ -724,52 +723,33 @@ `(asm ,null-info ,(asm-store type) ,x ,y ,w ,u))) `(asm ,null-info ,(asm-store type) ,x ,y ,w ,z)))))])) - (let () - (define pick-asm-op - (lambda (op info) - (let ([flreg (info-loadfl-flreg info)]) - (case op - [(load-single->double load-double->single) (asm-fl-load/cvt op flreg)] - [(store-single->double) (asm-fl-store/cvt op flreg)] - [else (asm-fl-load/store op flreg)])))) - (define-instruction effect (load-single->double load-double->single store-single->double - store-single store-double - load-single load-double) - [(op (x ur) (y ur) (z uword8)) - (if (eq? y %zero) - `(asm ,info ,(pick-asm-op op info) ,x ,z) - (let ([u (make-tmp 'u)]) - (seq - `(set! ,(make-live-info) ,u (asm ,info ,(asm-add #f) ,x ,y)) - `(asm ,info ,(pick-asm-op op info) ,u ,z))))] - [(op (x ur) (y ur) (z ur)) - (let ([u (make-tmp 'u)]) - (seq - `(set! ,(make-live-info) ,u (asm ,info ,(asm-add #f) ,x ,z)) - (if (eq? y %zero) - `(asm ,info ,(pick-asm-op op info) ,u (immediate 0)) - (seq - `(set! ,(make-live-info) ,u (asm ,info ,(asm-add #f) ,u ,y)) - `(asm ,info ,(pick-asm-op op info) ,u (immediate 0))))))])) + (define-instruction value (load-single->double) + [(op (x fpur) (y fpmem)) + (let ([u (make-tmp 'u 'fp)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,asm-fpmove-single ,y)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fl-cvt 'single->double) ,u))))]) - (let () - ; vldr, vstr allow only word offsets, and we require byte offset due to the type tag - (module (with-flonum-data-pointers) - (define $flonum-data-pointer - (lambda (x p) - (with-output-language (L15d Effect) - (let ([u (make-tmp 'u)]) - (seq - `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-add #f) ,x (immediate ,(constant flonum-data-disp)))) - (p u)))))) - (define-syntax with-flonum-data-pointers - (syntax-rules () - [(_ () e1 e2 ...) (begin e1 e2 ...)] - [(_ (x1 x2 ...) e1 e2 ...) - ($flonum-data-pointer x1 - (lambda (x1) - (with-flonum-data-pointers (x2 ...) e1 e2 ...)))]))) + (define-instruction effect (store-double->single) + [(op (x fpmem) (y fpur)) + (let ([u (make-tmp 'u 'fp)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-fl-cvt 'double->single) ,y)) + `(asm ,info ,asm-fpmove-single ,x ,u)))]) + + (define-instruction effect (store-single) + [(op (x fpmem) (y fpur)) + `(asm ,info ,asm-fpmove-single ,x ,y)]) + + (define-instruction value (load-single) + [(op (x fpur) (y fpmem)) + `(set! ,(make-live-info) ,x (asm ,info ,asm-fpmove-single ,y))]) + + (define-instruction value (single->double double->single) + [(op (x fpur) (y fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fl-cvt op) ,y))]) + (let () (define (fpmem->mem mem dir) (with-output-language (L15d Triv) (nanopass-case (L15d Triv) mem @@ -783,45 +763,48 @@ (define-instruction value (fpt) [(op (x fpur) (y ur)) - `(set! ,(make-live-info) ,x (asm ,info ,asm-fpt ,y))]) + (let ([u (make-tmp 'u 'fp)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) + `(set! ,(make-live-info) ,x (asm ,info ,asm-fpt ,y, u))))]) (define-instruction value (fpmove) [(op (x fpmem) (y fpur)) `(set! ,(make-live-info) ,x ,y)] - [(op (x fpur) (y fpur)) `(set! ,(make-live-info) ,x ,y)]) + [(op (x fpur) (y fpmem fpur)) `(set! ,(make-live-info) ,x ,y)]) (define-instruction value (fpcastto/hi) [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x ,(fpmem->mem y 'hi))] [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 'hi) ,y))]) - (define-instruction value (fpcastto/lo) - [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x ,(fpmem->mem y 'lo))] - [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 'lo) ,y))]) - - (define-instruction value (fpcastfrom) - [(op (x fpmem) (hi ur) (lo ur)) (seq - `(set! ,(make-live-info) ,(fpmem->mem x 'lo) ,lo) - `(set! ,(make-live-info) ,(fpmem->mem x 'hi) ,hi))] - [(op (x fpur) (hi ur) (lo ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastfrom ,lo ,hi))]) - - - - (define-instruction value (fp+ fp- fp/ fp*) - [(op (x fpur) (y fpur) (z fpur)) - `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpop-2 op) ,y ,z))]) - - (define-instruction value (fpsqrt) - [(op (x fpur) (y fpur)) - `(set! ,(make-live-info) ,x (asm ,info ,asm-fpsqrt ,y))]) - - (define-instruction value (trunc) - [(op (z ur) (x ur)) - (with-flonum-data-pointers (x) - `(set! ,(make-live-info) ,z (asm ,info ,asm-trunc ,x)))]) + (define-instruction value (fpcastto/lo) + [(op (x ur) (y fpmem)) `(set! ,(make-live-info) ,x ,(fpmem->mem y 'lo))] + [(op (x ur) (y fpur)) `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpcastto 'lo) ,y))]) + + (define-instruction value (fpcastfrom) + [(op (x fpmem) (hi ur) (lo ur)) (seq + `(set! ,(make-live-info) ,(fpmem->mem x 'lo) ,lo) + `(set! ,(make-live-info) ,(fpmem->mem x 'hi) ,hi))] + [(op (x fpur) (hi ur) (lo ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpcastfrom ,lo ,hi))])) + + (define-instruction value (fp+ fp- fp/ fp*) + [(op (x fpur) (y fpur) (z fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fpop-2 op) ,y ,z))]) + + (define-instruction value (fpsqrt) + [(op (x fpur) (y fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,asm-fpsqrt ,y))]) + + (define-instruction value (fptrunc) + [(op (z ur) (x fpur)) + (let ([u (make-tmp 'u 'fp)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) + `(set! ,(make-live-info) ,z (asm ,info ,asm-fptrunc ,x ,u))))]) - (define-instruction pred (fp= fp< fp<=) - [(op (x fpur) (y fpur)) - (let ([info (make-info-condition-code op #f #f)]) - (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))])) + (define-instruction pred (fp= fp< fp<=) + [(op (x fpur) (y fpur)) + (let ([info (make-info-condition-code op #f #f)]) + (values '() `(asm ,info ,(asm-fp-relop info) ,x ,y)))]) (define-instruction effect (inc-cc-counter) [(op (x ur) (w ur funky12) (z funky12 ur)) @@ -946,14 +929,16 @@ ; NB: compiler ipmlements init-lock! and unlock! as 32-bit store of zero (define-instruction pred (lock!) [(op (x ur) (y ur) (w funky12)) - (let ([u (make-tmp 'u)]) + (let ([u (make-tmp 'u)] + [u2 (make-tmp 'u2)]) (values (lea->reg x y w (lambda (r) (with-output-language (L15d Effect) (seq `(set! ,(make-live-info) ,u (asm ,null-info ,asm-kill)) - `(asm ,null-info ,asm-lock ,r ,u))))) + `(set! ,(make-live-info) ,u2 (asm ,null-info ,asm-kill)) + `(asm ,null-info ,asm-lock ,r ,u ,u2))))) `(asm ,info-cc-eq ,asm-eq ,u (immediate 0))))]) (define-instruction effect (locked-incr! locked-decr!) [(op (x ur) (y ur) (w funky12)) @@ -994,6 +979,9 @@ (define-instruction effect (vpush-multiple) [(op) `(asm ,info ,(asm-vpush-multiple (info-vpush-reg info) (info-vpush-n info)))]) + (define-instruction effect (vpop-multiple) + [(op) `(asm ,info ,(asm-vpop-multiple (info-vpush-reg info) (info-vpush-n info)))]) + (define-instruction effect save-flrv [(op) `(asm ,info ,asm-save-flrv)]) @@ -1009,13 +997,12 @@ asm-move asm-move/extend asm-load asm-store asm-swap asm-library-call asm-library-call! asm-library-jump asm-mul asm-smull asm-cmp/shift asm-add asm-sub asm-rsb asm-logand asm-logor asm-logxor asm-bic asm-pop-multiple asm-shiftop asm-logand asm-lognot - asm-logtest asm-fp-relop asm-relop asm-push-multiple asm-vpush-multiple + asm-logtest asm-fp-relop asm-relop asm-push-multiple asm-vpush-multiple asm-vpop-multiple asm-indirect-jump asm-literal-jump asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header asm-rp-compact-header asm-indirect-call asm-condition-code - asm-fl-load/store - asm-fl-load/cvt asm-fl-store/cvt asm-fpt asm-fpmove asm-fpcastto asm-fpcastfrom asm-trunc + asm-fpmove-single asm-fl-cvt asm-fpt asm-fpmove asm-fpcastto asm-fpcastfrom asm-fptrunc asm-lock asm-lock+/- asm-cas asm-fpop-2 asm-fpsqrt asm-c-simple-call asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size @@ -1190,7 +1177,8 @@ (define-op popm pm-op #b10001011) (define-op pushm pm-op #b10010010) - (define-op vpushm vpushm-op) + (define-op vpushm vpm-op #b11010 #b10) + (define-op vpopm vpm-op #b11001 #b11) (define-op vldr.sgl vldr/vstr-op #b1010 #b01) (define-op vldr.dbl vldr/vstr-op #b1011 #b01) @@ -1221,6 +1209,7 @@ (define-op mrc mrc/mcr-op #b1) (define-op vadd vadd-op #b11 #b0 #b11100) + (define-op vsub vadd-op #b11 #b1 #b11100) (define-op vmul vadd-op #b10 #b0 #b11100) (define-op vdiv vadd-op #b00 #b0 #b11101) @@ -1485,7 +1474,7 @@ (define vmov.gpr-op (lambda (op dir flreg flreg-delta gpreg code*) (let-values ([(n vn) (ax-flreg->bits flreg flreg-delta)]) - (emit-code (op flreg gpreg code*) + (emit-code (op flreg gpreg flreg-delta code*) [28 (ax-cond 'al)] [21 #b1110000] [20 dir] @@ -1574,14 +1563,15 @@ [12 #b1111] [0 #b101000010000]))) - (define vpushm-op - (lambda (op flreg n code*) + (define vpm-op + (lambda (op opcode opcode2 flreg n code*) (let-values ([(d vd) (ax-flreg->bits flreg)]) (emit-code (op flreg n code*) [28 (ax-cond 'al)] - [23 #b11010] + [23 opcode] [22 d] - [16 #b101101] + [20 opcode2] + [16 #b1101] [12 vd] [8 #b1011] [0 (fxsll n 1)])))) @@ -1959,40 +1949,15 @@ (Trivit (src0 src1) (emit cmp/shift count type src0 src1 code*))))) - (define-who asm-fl-load/cvt - (lambda (op flreg) - (lambda (code* base offset) - (Trivit (base offset) - (case op - [(load-single->double) - (emit vldr.sgl %fptmp2 base (ax-imm-data offset) - (emit vcvt.sgl->dbl flreg %fptmp2 code*))] - [(load-double->single) - (emit vldr.dbl %fptmp2 base (ax-imm-data offset) - (emit vcvt.dbl->sgl flreg %fptmp2 code*))] - [else (sorry! who "unrecognized op ~s" op)]))))) - - (define-who asm-fl-store/cvt - (lambda (op flreg) - (lambda (code* base offset) - (Trivit (base offset) - (case op - [(store-single->double) - (emit vcvt.sgl->dbl %fptmp2 flreg - (emit vstr.dbl %fptmp2 base (ax-imm-data offset) code*))] - [else (sorry! who "unrecognized op ~s" op)]))))) - - (define-who asm-fl-load/store - (lambda (op flreg) - (lambda (code* base offset) - (Trivit (base offset) - (let ([offset (ax-imm-data offset)]) - (case op - [(load-single) (emit vldr.sgl flreg base offset code*)] - [(load-double) (emit vldr.dbl flreg base offset code*)] - [(store-single) (emit vstr.sgl flreg base offset code*)] - [(store-double) (emit vstr.dbl flreg base offset code*)] - [else (sorry! who "unrecognized op ~s" op)])))))) + (define-who asm-fl-cvt + (lambda (op) + (lambda (code* dest-reg src-reg) + (case op + [(single->double) + (emit vcvt.sgl->dbl dest-reg src-reg code*)] + [(double->single) + (emit vcvt.dbl->sgl dest-reg src-reg code*)] + [else (sorry! who "unrecognized op ~s" op)])))) (define-who asm-load (lambda (type) @@ -2060,37 +2025,51 @@ (lambda (code* dest src) (emit vsqrt dest src code*))) - (define asm-trunc - (lambda (code* dest flonumreg) - (Trivit (dest flonumreg) - (emit vldr.dbl %fptmp1 flonumreg 0 - (emit vcvt.dbl->s32 %fptmp1 %fptmp1 - (emit vmov.s32->gpr %fptmp1 0 dest code*)))))) + (define asm-fptrunc + (lambda (code* dest flonumreg tmpreg) + (Trivit (dest) + (emit vcvt.dbl->s32 tmpreg flonumreg + (emit vmov.s32->gpr tmpreg 0 dest code*))))) (define asm-fpt - (lambda (code* dest src) + (lambda (code* dest src tmpreg) (Trivit (src) - (emit vmov.gpr->s32 %fptmp1 0 src - (emit vcvt.s32->dbl %fptmp1 dest code*))))) + (emit vmov.gpr->s32 tmpreg 0 src + (emit vcvt.s32->dbl dest tmpreg code*))))) (define-who asm-fpmove ;; fpmove pseudo instruction is used by set! case in ;; select-instructions! and generate-code; at most one of src or ;; dest can be an mref, and then the offset is double-aligned (lambda (code* dest src) + (gen-fpmove who code* dest src #t))) + + (define-who asm-fpmove-single + ;; fpmove pseudo instruction is used by set! case in + ;; select-instructions! and generate-code; at most one of src or + ;; dest can be an mref, and then the offset is double-aligned + (lambda (code* dest src) + (gen-fpmove who code* dest src #f))) + + (define gen-fpmove + (lambda (who code* dest src double?) (let ([dest-it dest] [src-it src]) (Trivit (dest-it src-it) (record-case dest-it [(disp) (imm reg) (safe-assert (fx= 0 (fxand imm #b11))) - (emit vstr.dbl src (cons 'reg reg) imm code*)] + (if double? + (emit vstr.dbl src (cons 'reg reg) imm code*) + (emit vstr.sgl src (cons 'reg reg) imm code*))] [(index) (n ireg breg) (sorry! who "cannot handle indexed fp dest ref")] [else (record-case src-it [(disp) (imm reg) (safe-assert (fx= 0 (fxand imm #b11))) - (emit vldr.dbl dest (cons 'reg reg) imm code*)] + (if double? + (emit vldr.dbl dest (cons 'reg reg) imm code*) + (emit vldr.sgl dest (cons 'reg reg) imm code*))] [(index) (n ireg breg) (sorry! who "cannot handle indexed fp src ref")] [else (emit vmov.fpr dest src code*)])]))))) @@ -2099,8 +2078,8 @@ (lambda (code* dest src) (Trivit (dest) (if (eq? part 'lo) - (emit vmov.gpr->s32 src 0 dest code*) - (emit vmov.gpr->s32 src 1 dest code*)))))) + (emit vmov.s32->gpr src 0 dest code*) + (emit vmov.s32->gpr src 1 dest code*)))))) (define asm-fpcastfrom (lambda (code* dest lo-src hi-src) @@ -2119,19 +2098,19 @@ [else (sorry! who "unexpected asm-swap type argument ~s" type)])))))) (define asm-lock - ; tmp = ldrex src - ; cmp tmp, 0 + ; tmp2 = ldrex src + ; cmp tmp2, 0 ; bne L1 (+2) - ; tmp = 1 - ; tmp = strex tmp, src + ; tmp2 = 1 + ; tmp = strex tmp2, src ;L1: - (lambda (code* src tmp) - (Trivit (src tmp) - (emit ldrex tmp src - (emit cmpi tmp 0 + (lambda (code* src tmp tmp2) + (Trivit (src tmp tmp2) + (emit ldrex tmp2 src + (emit cmpi tmp2 0 (emit bnei 1 - (emit movi1 tmp 1 - (emit strex tmp tmp src code*)))))))) + (emit movi1 tmp2 1 + (emit strex tmp tmp2 src code*)))))))) (define-who asm-lock+/- ; L: @@ -2211,6 +2190,11 @@ (lambda (code*) (emit vpushm reg n code*)))) + (define asm-vpop-multiple + (lambda (reg n) + (lambda (code*) + (emit vpopm reg n code*)))) + (define asm-save-flrv (lambda (code*) (let ([sp (cons 'reg %sp)]) @@ -2271,7 +2255,8 @@ (define asm-direct-jump (lambda (l offset) - (asm-helper-jump '() (make-funcrel 'arm32-jump l offset)))) + (let ([offset (adjust-return-point-offset offset l)]) + (asm-helper-jump '() (make-funcrel 'arm32-jump l offset))))) (define asm-literal-jump (lambda (info) @@ -2313,17 +2298,18 @@ (or (cond [(local-label-offset l) => (lambda (offset) - (let ([disp (fx- next-addr (fx- offset incr-offset) 4)]) - (cond - [(funky12 disp) - (Trivit (dest) - ; aka adr, encoding A1 - (emit addi #f dest `(reg . ,%pc) disp '()))] - [(funky12 (- disp)) - (Trivit (dest) - ; aka adr, encoding A2 - (emit subi #f dest `(reg . ,%pc) (- disp) '()))] - [else #f])))] + (let ([incr-offset (adjust-return-point-offset incr-offset l)]) + (let ([disp (fx- next-addr (fx- offset incr-offset) 4)]) + (cond + [(funky12 disp) + (Trivit (dest) + ; aka adr, encoding A1 + (emit addi #f dest `(reg . ,%pc) disp '()))] + [(funky12 (- disp)) + (Trivit (dest) + ; aka adr, encoding A2 + (emit subi #f dest `(reg . ,%pc) (- disp) '()))] + [else #f]))))] [else #f]) (asm-move '() dest (with-output-language (L16 Triv) `(label-ref ,l ,incr-offset))))))) @@ -2562,6 +2548,8 @@ (or (andmap double-member? members) (andmap float-member? members)))))] [else #f])) + (define num-int-regs 4) ; number of integer registers normally usd by the ABI + (define num-dbl-regs 8) ; number of `double` registers normally usd by the ABI (define sgl-regs (lambda () (list %Cfparg1 %Cfparg1b %Cfparg2 %Cfparg2b %Cfparg3 %Cfparg3b %Cfparg4 %Cfparg4b %Cfparg5 %Cfparg5b %Cfparg6 %Cfparg6b %Cfparg7 %Cfparg7b %Cfparg8 %Cfparg8b))) (define-who asm-foreign-call @@ -2570,15 +2558,13 @@ (letrec ([load-double-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] + `(set! ,(%mref ,%sp ,%zero ,offset fp) + ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))))] [load-single-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] + (%inline store-double->single ,(%mref ,%sp ,%zero ,offset fp) + ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))))] [load-int-stack (lambda (offset) (lambda (rhs) ; requires rhs @@ -2611,11 +2597,21 @@ [load-double-reg (lambda (fpreg fp-disp) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero (immediate ,fp-disp))))] + `(set! ,fpreg ,(%mref ,x ,%zero ,fp-disp fp))))] [load-single-reg (lambda (fpreg fp-disp single?) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,(if single? %load-single %load-double->single) ,x ,%zero (immediate ,fp-disp))))] + (let ([%op (if single? %load-single %double->single)]) + `(set! ,fpreg (inline ,null-info ,%op ,(%mref ,x ,%zero ,fp-disp fp))))))] + [load-double-int-reg + (lambda (loreg hireg) + (lambda (x) ; requires var + (let-values ([(endreg otherreg) (constant-case native-endianness + [(little) (values loreg hireg)] + [(big) (values hireg loreg)])]) + (%seq + (set! ,endreg ,(%mref ,x ,(constant flonum-data-disp))) + (set! ,otherreg ,(%mref ,x ,(fx+ 4 (constant flonum-data-disp))))))))] [load-int-reg (lambda (ireg) (lambda (x) @@ -2649,34 +2645,46 @@ (set! ,loreg ,(%mref ,x ,from-offset)) (set! ,hireg ,(%mref ,x ,(fx+ from-offset 4))))))] [do-args - (lambda (types) + (lambda (types varargs?) ; sgl* is always of even-length, i.e., has a sgl/dbl reg first ; bsgl is set to "b" single (second half of double) if we have one to fill - (let loop ([types types] [locs '()] [live* '()] [int* (int-regs)] [sgl* (sgl-regs)] [bsgl #f] [isp 0]) + (let loop ([types types] [locs '()] [live* '()] [int* (int-regs)] [sgl* (if varargs? '() (sgl-regs))] [bsgl #f] [isp 0]) (if (null? types) (values isp locs live*) (nanopass-case (Ltype Type) (car types) [(fp-double-float) - (if (null? sgl*) - (let ([isp (align 8 isp)]) - (loop (cdr types) + (cond + [(and varargs? + ;; For varargs, treat a double like a 64-bit integer + (let ([int* (if (even? (length int*)) int* (cdr int*))]) + (and (pair? int*) + int*))) + => (lambda (int*) + (loop (cdr types) + (cons (load-double-int-reg (car int*) (cadr int*)) locs) + (cons* (car int*) (cadr int*) live*) (cddr int*) sgl* bsgl isp))] + [(null? sgl*) + (let ([isp (align 8 isp)]) + (loop (cdr types) (cons (load-double-stack isp) locs) - live* int* '() #f (fx+ isp 8))) - (loop (cdr types) - (cons (load-double-reg (car sgl*) (constant flonum-data-disp)) locs) - live* int* (cddr sgl*) bsgl isp))] + live* int* '() #f (fx+ isp 8)))] + [else + (loop (cdr types) + (cons (load-double-reg (car sgl*) (constant flonum-data-disp)) locs) + (cons (car sgl*) live*) int* (cddr sgl*) bsgl isp)])] [(fp-single-float) + (safe-assert (not varargs?)) (if bsgl (loop (cdr types) (cons (load-single-reg bsgl (constant flonum-data-disp) #f) locs) - live* int* sgl* #f isp) + (cons bsgl live*) int* sgl* #f isp) (if (null? sgl*) (loop (cdr types) (cons (load-single-stack isp) locs) live* int* '() #f (fx+ isp 4)) (loop (cdr types) (cons (load-single-reg (car sgl*) (constant flonum-data-disp) #f) locs) - live* int* (cddr sgl*) (cadr sgl*) isp)))] + (cons (car sgl*) live*) int* (cddr sgl*) (cadr sgl*) isp)))] [(fp-ftd& ,ftd) (let ([size ($ftd-size ftd)] [members ($ftd->members ftd)] @@ -2688,7 +2696,8 @@ [(8) (let* ([int* (if (even? (length int*)) int* (cdr int*))] [num-members (length members)] - [doubles? (and (fx<= num-members 4) + [doubles? (and (not varargs?) + (fx<= num-members 4) (andmap double-member? members))]) ;; Sequence of up to 4 doubles that fits in registers? (cond @@ -2721,14 +2730,15 @@ (cons* (car int*) (cadr int*) live*) (cddr int*) isp))]))]))] [else (let* ([num-members (length members)] - [floats? (and (fx<= num-members 4) + [floats? (and (not varargs?) + (fx<= num-members 4) (andmap float-member? members))]) ;; Sequence of up to 4 floats that fits in registers? (cond [(and floats? (fx>= (fx+ (length sgl*) (if bsgl 1 0)) num-members)) ;; Allocate each float to register - (let flt-loop ([size size] [offset 0] [sgl* sgl*] [bsgl bsgl] [loc #f]) + (let flt-loop ([size size] [offset 0] [sgl* sgl*] [bsgl bsgl] [loc #f] [live* live*]) (cond [(fx= size 0) (loop (cdr types) (cons loc locs) live* int* sgl* bsgl isp)] @@ -2736,7 +2746,8 @@ (flt-loop (fx- size 4) (fx+ offset 4) (if bsgl sgl* (cddr sgl*)) (if bsgl #f (cadr sgl*)) - (combine-loc loc (load-single-reg (or bsgl (car sgl*)) offset #t)))]))] + (combine-loc loc (load-single-reg (or bsgl (car sgl*)) offset #t)) + (cons (or bsgl (car sgl*)) live*))]))] [else ;; General case; use integer registers while available, ;; possibly splitting between registers and stack @@ -2800,8 +2811,9 @@ (fx+ offset (if double? 8 4)) `(seq ,e - (inline ,(make-info-loadfl (car sgl*)) ,(if double? %store-double %store-single) - ,dest-x ,%zero (immediate ,offset))))])))] + ,(if double? + `(set! ,(%mref ,dest-x ,%zero ,offset fp) ,(car sgl*)) + (%inline store-single ,(%mref ,dest-x ,%zero ,offset fp) ,(car sgl*)))))])))] [else ;; result is in %Cretval and maybe %r1 `(seq @@ -2817,13 +2829,36 @@ [(8) `(seq (set! ,(%mref ,dest-x ,0) ,%Cretval) (set! ,(%mref ,dest-x ,4) ,%r1))]))]))])] - [else e]))]) + [else e]))] + [get-result-regs + (lambda (result-type varargs?) + (nanopass-case (Ltype Type) result-type + [(fp-double-float) + (if varargs? + (list %r1 %Cretval) + (list %Cfpretval))] + [(fp-single-float) + (if varargs? + (list %Cretval) + (list %Cfpretval))] + [(fp-integer ,bits) + (case bits + [(64) (list %r1 %Cretval)] + [else (list %Cretval)])] + [(fp-unsigned ,bits) + (case bits + [(64) (list %r1 %Cretval)] + [else (list %Cretval)])] + [else (list %r0)]))]) (lambda (info) (safe-assert (reg-callee-save? %tc)) ; no need to save-restore (let* ([arg-type* (info-foreign-arg-type* info)] + [varargs? (memq 'varargs (info-foreign-conv* info))] [result-type (info-foreign-result-type info)] + [result-reg* (get-result-regs result-type varargs?)] [fill-result-here? (indirect-result-that-fits-in-registers? result-type)]) - (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*)) + (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*) + varargs?) (lambda (args-frame-size locs live*) (let* ([frame-size (align 8 (+ args-frame-size (if fill-result-here? @@ -2844,16 +2879,25 @@ [else locs])) (lambda (t0) (add-fill-result fill-result-here? result-type args-frame-size - `(inline ,(make-info-kill*-live* (reg-list %r0) live*) ,%c-call ,t0))) + `(inline ,(make-info-kill*-live* result-reg* live*) ,%c-call ,t0))) (nanopass-case (Ltype Type) result-type [(fp-double-float) - (lambda (lvalue) - `(inline ,(make-info-loadfl %Cfpretval) ,%store-double ,lvalue ,%zero - ,(%constant flonum-data-disp)))] + (if varargs? + (lambda (lvalue) + `(set! ,(%mref ,lvalue ,%zero ,(constant flonum-data-disp) fp) + ,(%inline fpcastfrom ,%r1 ,%Cretval))) + (lambda (lvalue) + `(set! ,(%mref ,lvalue ,%zero ,(constant flonum-data-disp) fp) + ,%Cfpretval)))] [(fp-single-float) - (lambda (lvalue) - `(inline ,(make-info-loadfl %Cfpretval) ,%store-single->double ,lvalue ,%zero - ,(%constant flonum-data-disp)))] + (if varargs? + (lambda (lvalue) + (let ([t %Cfpretval]) ; should be ok as a temporary register + `(seq + (set! ,t ,(%inline fpcastfrom ,%r1 ,%Cretval)) ; we don't actually care about the hi/%r1 part + (set! ,(%mref ,lvalue ,%zero ,(constant flonum-data-disp) fp) ,(%inline single->double ,t))))) + (lambda (lvalue) + `(set! ,(%mref ,lvalue ,%zero ,(constant flonum-data-disp) fp) ,(%inline single->double ,%Cfpretval))))] [(fp-integer ,bits) (case bits [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%r0)))] @@ -2884,31 +2928,31 @@ +---------------------------+ | | | incoming stack args | - sp+36+R+X+Y+Z+W: | | + sp+52+R+X+Y+Z+W: | | +---------------------------+<- 8-byte boundary | | | saved int reg args | 0-4 words - sp+36+R+X+Y+Z: | | + sp+52+R+X+Y+Z: | | +---------------------------+ | | | pad word if necessary | 0-1 words - sp+36+R+X+Y: | | + sp+52+R+X+Y: | | +---------------------------+<- 8-byte boundary | | | saved float reg args | 0-16 words - sp+36+R+X: | | + sp+52+R+X: | | +---------------------------+<- 8-byte boundary | | | &-return space | up to 8 words - sp+36+R: | | + sp+52+R: | | +---------------------------+<- 8-byte boundary | | | pad word if necessary | 0-1 words - sp+36: | | + sp+52: | | +---------------------------+ | | - | callee-save regs + lr | 9 words - sp+0: | | + | callee-save regs + lr | 13 words + sp+0: | callee-save fpregs | +---------------------------+<- 8-byte boundary X = 0 or 4 (depending on whether pad is present) @@ -2920,15 +2964,13 @@ (define load-double-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp) + ,(%mref ,%sp ,%zero ,offset fp))))) (define load-single-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp) + ,(%inline load-single->double ,(%mref ,%sp ,%zero ,offset fp)))))) (define load-int-stack (lambda (type offset) (lambda (lvalue) @@ -2957,16 +2999,19 @@ (lambda (lvalue) `(set! ,lvalue ,(%inline + ,%sp (immediate ,offset)))))) (define count-reg-args - (lambda (types synthesize-first?) + (lambda (types synthesize-first? varargs?) ; bsgl? is #t iff we have a "b" single (second half of double) float reg to fill (let f ([types types] [iint (if synthesize-first? -1 0)] [idbl 0] [bsgl? #f]) (if (null? types) (values iint idbl) (nanopass-case (Ltype Type) (car types) [(fp-double-float) - (if (fx< idbl 8) - (f (cdr types) iint (fx+ idbl 1) bsgl?) - (f (cdr types) iint idbl #f))] + (if varargs? + (let ([iint (align 2 iint)]) + (f (cdr types) (if (fx< iint num-int-regs) (fx+ iint 2) iint) idbl bsgl?)) + (if (fx< idbl 8) + (f (cdr types) iint (fx+ idbl 1) bsgl?) + (f (cdr types) iint idbl #f)))] [(fp-single-float) (if bsgl? (f (cdr types) iint idbl #f) @@ -3007,8 +3052,8 @@ [(fp-unsigned ,bits) (fx= bits 64)] [else #f]) (let ([iint (align 2 iint)]) - (f (cdr types) (if (fx< iint 4) (fx+ iint 2) iint) idbl bsgl?)) - (f (cdr types) (if (fx< iint 4) (fx+ iint 1) iint) idbl bsgl?))]))))) + (f (cdr types) (if (fx< iint num-int-regs) (fx+ iint 2) iint) idbl bsgl?)) + (f (cdr types) (if (fx< iint num-int-regs) (fx+ iint 1) iint) idbl bsgl?))]))))) (define do-stack ; all of the args are on the stack at this point, though not contiguous since ; we push all of the int reg args with one push instruction and all of the @@ -3016,7 +3061,7 @@ ; continue on into the stack variables, which is convenient when a struct ; argument is split across registers and the stack (lambda (types saved-reg-bytes pre-pad-bytes return-bytes float-reg-bytes post-pad-bytes int-reg-bytes - synthesize-first?) + synthesize-first? varargs?) (let* ([return-space-offset (fx+ saved-reg-bytes pre-pad-bytes)] [float-reg-offset (fx+ return-space-offset return-bytes)] [int-reg-offset (fx+ float-reg-offset float-reg-bytes post-pad-bytes)] @@ -3037,20 +3082,36 @@ locs)) (nanopass-case (Ltype Type) (car types) [(fp-double-float) - (if (< idbl 8) - (loop (cdr types) - (cons (load-double-stack float-reg-offset) locs) - iint (fx+ idbl 1) bsgl-offset int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset) - (let ([stack-arg-offset (align 8 stack-arg-offset)]) - (loop (cdr types) - (cons (load-double-stack stack-arg-offset) locs) - iint 8 #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 8))))] + (cond + [(and varargs? + ;; For varargs, treat a double like a 64-bit integer + (let ([iint (align 2 iint)]) + (and (fx< iint num-int-regs) + iint))) + => (lambda (new-iint) + (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] + [iint new-iint]) + (loop (cdr types) + (cons (load-double-stack int-reg-offset) locs) + (fx+ iint 2) idbl bsgl-offset (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset)))] + [(and (not varargs?) + (< idbl num-dbl-regs)) + (loop (cdr types) + (cons (load-double-stack float-reg-offset) locs) + iint (fx+ idbl 1) bsgl-offset int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset)] + [else + (let ([stack-arg-offset (align 8 stack-arg-offset)] + [iint (if varargs? (align 2 iint) iint)]) ; use up register if argument didn't fit + (loop (cdr types) + (cons (load-double-stack stack-arg-offset) locs) + iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 8)))])] [(fp-single-float) + (safe-assert (not varargs?)) (if bsgl-offset (loop (cdr types) (cons (load-single-stack bsgl-offset) locs) iint idbl #f int-reg-offset float-reg-offset stack-arg-offset) - (if (< idbl 8) + (if (< idbl num-dbl-regs) (loop (cdr types) ; with big-endian ARM might need to adjust offset +/- 4 since pair of ; single floats in a pushed double float might be reversed @@ -3058,28 +3119,30 @@ iint (fx+ idbl 1) (fx+ float-reg-offset 4) int-reg-offset (fx+ float-reg-offset 8) stack-arg-offset) (loop (cdr types) (cons (load-single-stack stack-arg-offset) locs) - iint 8 #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))))] + iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset 4))))] [(fp-ftd& ,ftd) (let* ([size ($ftd-size ftd)] [members ($ftd->members ftd)] [num-members (length members)]) (cond - [(and (fx<= num-members 4) + [(and (not varargs?) + (fx<= num-members 4) (andmap double-member? members)) ;; doubles are either in registers or all on stack - (if (fx<= (fx+ idbl num-members) 8) + (if (fx<= (fx+ idbl num-members) num-dbl-regs) (loop (cdr types) (cons (load-stack-address float-reg-offset) locs) iint (fx+ idbl num-members) #f int-reg-offset (fx+ float-reg-offset size) stack-arg-offset) (let ([stack-arg-offset (align 8 stack-arg-offset)]) (loop (cdr types) (cons (load-stack-address stack-arg-offset) locs) - iint 8 #f int-reg-offset #f (fx+ stack-arg-offset size))))] - [(and (fx<= num-members 4) + iint num-dbl-regs #f int-reg-offset #f (fx+ stack-arg-offset size))))] + [(and (not varargs?) + (fx<= num-members 4) (andmap float-member? members)) ;; floats are either in registers or all on stack (let ([amt (fxsrl (align 2 (fx- num-members (if bsgl-offset 1 0))) 1)]) - (if (fx<= (fx+ idbl amt) 8) + (if (fx<= (fx+ idbl amt) num-dbl-regs) (let ([odd-floats? (fxodd? num-members)]) (if bsgl-offset (let ([dbl-size (align 8 (fx- size 4))]) @@ -3094,15 +3157,15 @@ (fx+ float-reg-offset dbl-size) stack-arg-offset)))) (loop (cdr types) (cons (load-stack-address stack-arg-offset) locs) - iint 8 #f int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))] + iint num-dbl-regs #f int-reg-offset float-reg-offset (fx+ stack-arg-offset size))))] [(fx= 8 ($ftd-alignment ftd)) (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] [iint (align 2 iint)] [amt (fxsrl size 2)]) - (if (fx< iint 4) ; argument starts in registers, may continue on stack + (if (fx< iint num-int-regs) ; argument starts in registers, may continue on stack (loop (cdr types) (cons (load-stack-address int-reg-offset) locs) - (fxmin 4 (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset + (fxmin num-int-regs (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) (let ([stack-arg-offset (align 8 stack-arg-offset)]) (loop (cdr types) @@ -3111,10 +3174,10 @@ [else (let* ([size (align 4 size)] [amt (fxsrl size 2)]) - (if (fx< iint 4) ; argument starts in registers, may continue on stack + (if (fx< iint num-int-regs) ; argument starts in registers, may continue on stack (loop (cdr types) (cons (load-stack-address int-reg-offset) locs) - (fxmin 4 (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset + (fxmin num-int-regs (fx+ iint amt)) idbl bsgl-offset (fx+ int-reg-offset size) float-reg-offset (fx+ stack-arg-offset (fxmax 0 (fx* 4 (fx- (fx+ iint amt) 4))))) (loop (cdr types) (cons (load-stack-address stack-arg-offset) locs) @@ -3126,7 +3189,7 @@ [else #f]) (let ([int-reg-offset (if (fxeven? iint) int-reg-offset (fx+ int-reg-offset 4))] [iint (align 2 iint)]) - (if (fx= iint 4) + (if (fx= iint num-int-regs) (let ([stack-arg-offset (align 8 stack-arg-offset)]) (loop (cdr types) (cons (load-int64-stack stack-arg-offset) locs) @@ -3134,7 +3197,7 @@ (loop (cdr types) (cons (load-int64-stack int-reg-offset) locs) (fx+ iint 2) idbl bsgl-offset (fx+ int-reg-offset 8) float-reg-offset stack-arg-offset))) - (if (fx= iint 4) + (if (fx= iint num-int-regs) (loop (cdr types) (cons (load-int-stack (car types) stack-arg-offset) locs) iint idbl bsgl-offset int-reg-offset float-reg-offset (fx+ stack-arg-offset 4)) @@ -3142,13 +3205,14 @@ (cons (load-int-stack (car types) int-reg-offset) locs) (fx+ iint 1) idbl bsgl-offset (fx+ int-reg-offset 4) float-reg-offset stack-arg-offset)))])))))) (define do-result - (lambda (result-type synthesize-first? return-stack-offset) + (lambda (result-type synthesize-first? varargs? return-stack-offset) (nanopass-case (Ltype Type) result-type [(fp-ftd& ,ftd) (let* ([members ($ftd->members ftd)] [num-members (length members)]) (cond - [(and (fx<= 1 num-members 4) + [(and (not varargs?) + (fx<= 1 num-members 4) (or (andmap double-member? members) (andmap float-member? members))) ;; double/float results returned in floating-point registers @@ -3163,8 +3227,9 @@ (if double? (cddr sgl*) (cdr sgl*)) (fx+ offset (if double? 8 4)) (let ([new-e - `(inline ,(make-info-loadfl (car sgl*)) ,(if double? %load-double %load-single) - ,%sp ,%zero (immediate ,offset))]) + (if double? + `(set! ,(car sgl*) ,(%mref ,%sp ,%zero ,offset fp)) + `(set! ,(car sgl*) ,(%inline load-single ,(%mref ,%sp ,%zero ,offset fp))))]) (if e `(seq ,e ,new-e) new-e)))])))) '() ($ftd-size ftd))] @@ -3179,20 +3244,42 @@ 8)] [else (values (lambda () - `(set! ,%Cretval ,(%mref ,%sp ,return-stack-offset))) - (list %Cretval %r1) + (case ($ftd-size ftd) + [(1) + (let ([rep (if ($ftd-unsigned? ftd) 'unsigned-8 'integer-8)]) + `(set! ,%Cretval (inline ,(make-info-load rep #f) ,%load ,%sp ,%zero (immediate ,return-stack-offset))))] + [(2) + (let ([rep (if ($ftd-unsigned? ftd) 'unsigned-16 'integer-16)]) + `(set! ,%Cretval (inline ,(make-info-load rep #f) ,%load ,%sp ,%zero (immediate ,return-stack-offset))))] + [else `(set! ,%Cretval ,(%mref ,%sp ,return-stack-offset))])) + (list %Cretval) 4)])]))] [(fp-double-float) - (values (lambda (rhs) - `(inline ,(make-info-loadfl %Cfpretval) ,%load-double - ,rhs ,%zero ,(%constant flonum-data-disp))) - '() + (values (if varargs? + (lambda (rhs) + (let-values ([(endreg otherreg) (constant-case native-endianness + [(little) (values %Cretval %r1)] + [(big) (values %r1 %Cretval)])]) + `(seq + (set! ,endreg ,(%mref ,rhs ,(constant flonum-data-disp))) + (set! ,otherreg ,(%mref ,rhs ,(fx+ 4 (constant flonum-data-disp))))))) + (lambda (rhs) + `(set! ,%Cfpretval ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp)))) + (if varargs? + (list %Cretval %r1) + (list %Cfpretval)) 0)] [(fp-single-float) - (values (lambda (rhs) - `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single - ,rhs ,%zero ,(%constant flonum-data-disp))) - '() + (values (if varargs? + (lambda (rhs) + `(seq + (set! ,%Cfpretval ,(%inline double->single ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp))) + (set! ,%Cretval ,(%inline fpcastto/lo ,%Cfpretval)))) + (lambda (rhs) + `(set! ,%Cfpretval ,(%inline double->single ,(%mref ,rhs ,%zero ,(constant flonum-data-disp) fp))))) + (if varargs? + (list %Cretval) + (list %Cfpretval)) 0)] [(fp-void) (values (lambda () `(nop)) @@ -3213,21 +3300,30 @@ [else (values (lambda (x) `(set! ,%Cretval ,x)) - (list %Cretval %r1) + (list %Cretval) 0)])]))) (lambda (info) (define callee-save-regs+lr (list %r4 %r5 %r6 %r7 %r8 %r9 %r10 %r11 %lr)) + (define callee-save-fpregs (list %fp1 %fp2)) ; must be consecutive (define isaved (length callee-save-regs+lr)) + (define fpsaved (length callee-save-fpregs)) + (safe-assert (andmap (lambda (r) + (or (not (reg-callee-save? r)) + (if (eq? (reg-type r) 'fp) + (memq r callee-save-fpregs) + (memq r callee-save-regs+lr)))) + (vector->list regvec))) (let* ([arg-type* (info-foreign-arg-type* info)] + [varargs? (memq 'varargs (info-foreign-conv* info))] [result-type (info-foreign-result-type info)] - [synthesize-first? (indirect-result-that-fits-in-registers? result-type)]) - (let-values ([(iint idbl) (count-reg-args arg-type* synthesize-first?)]) - (let ([saved-reg-bytes (fx* isaved 4)] + [synthesize-first? (indirect-result-that-fits-in-registers? result-type)]) + (let-values ([(iint idbl) (count-reg-args arg-type* synthesize-first? varargs?)]) + (let ([saved-reg-bytes (fx+ (fx* isaved 4) (fx* fpsaved 8))] [pre-pad-bytes (if (fxeven? isaved) 0 4)] [int-reg-bytes (fx* iint 4)] [post-pad-bytes (if (fxeven? iint) 0 4)] [float-reg-bytes (fx* idbl 8)]) - (let-values ([(get-result result-regs return-bytes) (do-result result-type synthesize-first? + (let-values ([(get-result result-regs return-bytes) (do-result result-type synthesize-first? varargs? (fx+ saved-reg-bytes pre-pad-bytes))]) (let ([return-bytes (align 8 return-bytes)]) (values @@ -3244,6 +3340,7 @@ ,(if (fx= pre-pad-bytes 0) `(nop) `(set! ,%sp ,(%inline - ,%sp (immediate 4)))) ; save the callee save registers & return address (inline ,(make-info-kill*-live* '() callee-save-regs+lr) ,%push-multiple) + (inline ,(make-info-vpush (car callee-save-fpregs) fpsaved) ,%vpush-multiple) ; set up tc for benefit of argument-conversion code, which might allocate ,(if-feature pthreads (%seq @@ -3253,12 +3350,13 @@ ; list of procedures that marshal arguments from their C stack locations ; to the Scheme argument locations (do-stack arg-type* saved-reg-bytes pre-pad-bytes return-bytes float-reg-bytes post-pad-bytes int-reg-bytes - synthesize-first?) + synthesize-first? varargs?) get-result (lambda () (in-context Tail (%seq ; restore the callee save registers + (inline ,(make-info-vpush (car callee-save-fpregs) fpsaved) ,%vpop-multiple) (inline ,(make-info-kill* callee-save-regs+lr) ,%pop-multiple) ; deallocate space for pad & arg reg values (set! ,%sp ,(%inline + ,%sp (immediate ,(fx+ pre-pad-bytes int-reg-bytes post-pad-bytes float-reg-bytes)))) diff --git a/s/cpnanopass.ss b/s/cpnanopass.ss index ace787d30..5788caca0 100644 --- a/s/cpnanopass.ss +++ b/s/cpnanopass.ss @@ -599,18 +599,13 @@ (syntax-case x (reserved allocable machine-dependent) [(k (reserved [rreg rreg-alias ... rreg-callee-save? rreg-mdinfo rreg-type] ...) (allocable [areg areg-alias ... areg-callee-save? areg-mdinfo areg-type] ...) - (machine-depdendent [mdreg mdreg-alias ... mdreg-callee-save? mdreg-mdinfo mdreg-type] ...) - (reify-support reify-reg ...)) - (with-implicit (k regvec arg-registers extra-registers extra-fpregisters real-register? - cons-reify-registers with-initialized-registers) + (machine-depdendent [mdreg mdreg-alias ... mdreg-callee-save? mdreg-mdinfo mdreg-type] ...)) + (with-implicit (k regvec arg-registers extra-registers extra-fpregisters real-register? with-initialized-registers) #`(begin (define-reserved-registers [rreg rreg-alias ... rreg-callee-save? rreg-mdinfo rreg-type] ...) (define-allocable-registers regvec arg-registers extra-registers extra-fpregisters with-initialized-registers [areg areg-alias ... areg-callee-save? areg-mdinfo areg-type] ...) (define-machine-dependent-registers [mdreg mdreg-alias ... mdreg-callee-save? mdreg-mdinfo mdreg-type] ...) - (define-syntax cons-reify-registers - (syntax-rules () - [(_ reg*) (cons* reify-reg ... reg*)])) (define-syntax real-register? (with-syntax ([real-reg* #''(rreg ... rreg-alias ... ... areg ... areg-alias ... ... mdreg ... mdreg-alias ... ...)]) (syntax-rules () @@ -709,8 +704,6 @@ (fold-right (lambda (reg reg*) (cond - [(eq? (syntax->datum reg) 'reify-support) - #`(cons-reify-registers #,reg*)] [(real-register? (syntax->datum reg)) #`(cons #,reg #,reg*)] [else reg*])) @@ -976,8 +969,8 @@ (declare-intrinsic dofretu32* dofretu32* (%ac0 %ts %td %cp %ac1) (%ac0) (%xp)) (declare-intrinsic get-room get-room () (%xp) (%xp)) (declare-intrinsic scan-remembered-set scan-remembered-set () () ()) - (declare-intrinsic reify-1cc reify-1cc (%xp %ac0 reify-support) () (%td)) - (declare-intrinsic maybe-reify-cc maybe-reify-cc (%xp %ac0 reify-support) () (%td)) + (declare-intrinsic reify-1cc reify-1cc (%xp %ac0 %ts %reify1 %reify2) () (%td)) ; %reify1 & %reify2 are defined as needed per machine... + (declare-intrinsic maybe-reify-cc maybe-reify-cc (%xp %ac0 %ts %reify1 %reify2) () (%td)) ; ... to have enough registers to allocate (declare-intrinsic dooverflow dooverflow () () ()) (declare-intrinsic dooverflood dooverflood () (%xp) ()) ; a dorest routine takes all of the register and frame arguments from the rest @@ -1036,11 +1029,6 @@ (sealed #t) (fields type swapped?)) - (define-record-type info-loadfl (nongenerative) - (parent info) - (sealed #t) - (fields flreg)) - (define-record-type info-condition-code (nongenerative) (parent info) (sealed #t) @@ -4005,10 +3993,8 @@ [else (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double - ,base ,index (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double - ,t ,%zero ,(%constant flonum-data-disp)) + (set! ,(%mref ,t ,%zero ,(constant flonum-data-disp) fp) + (unboxed-fp ,(%mref ,base ,index ,offset fp))) ,t))])))] [(single-float) (if swapped? @@ -4018,18 +4004,22 @@ (set! ,(%mref ,t ,(constant flonum-data-disp)) (inline ,(make-info-load 'unsigned-32 #t) ,%load ,base ,index (immediate ,offset))) - (inline ,(make-info-loadfl %fptmp1) ,%load-single->double - ,t ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double - ,t ,%zero ,(%constant flonum-data-disp)) + (set! ,(%mref ,t ,%zero ,(constant flonum-data-disp) fp) + (unboxed-fp (inline ,(make-info-unboxed-args '(#t)) + ,%load-single->double + ;; slight abuse to call this "unboxed", but `load-single->double` + ;; wants an FP-flavored address + (unboxed-fp ,(%mref ,t ,%zero ,(constant flonum-data-disp) fp))))) ,t))) (bind #f (base index) (bind #t ([t (%constant-alloc type-flonum (constant size-flonum))]) (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-single->double - ,base ,index (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double - ,t ,%zero ,(%constant flonum-data-disp)) + (set! ,(%mref ,t ,%zero ,(constant flonum-data-disp) fp) + (unboxed-fp (inline ,(make-info-unboxed-args '(#t)) + ,%load-single->double + ;; slight abuse to call this "unboxed", but `load-single->double` + ;; wants an FP-flavored address + (unboxed-fp ,(%mref ,base ,index ,offset fp))))) ,t))))] [(integer-8 integer-16 integer-24 integer-32 integer-40 integer-48 integer-56 integer-64) (build-int-load swapped? type base index offset @@ -4108,21 +4098,14 @@ [(double-float) (bind #f (base index) (bind #f fp (value) - `(set! ,(%mref ,base ,index ,offset fp) ,value))) - #; - (bind #f (base index) - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double - ,value ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double - ,base ,index (immediate ,offset))))] + `(set! ,(%mref ,base ,index ,offset fp) ,value)))] [(single-float) (bind #f (base index) - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double->single - ,value ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-single - ,base ,index (immediate ,offset))))] + `(inline ,(make-info-unboxed-args '(#t #t)) ,%store-double->single + ;; slight abuse to call this "unboxed", but `store-double->single` + ;; wants an FP-flavored address + (unboxed-fp ,(%mref ,base ,index ,offset fp)) + (unboxed-fp ,(%mref ,value ,%zero ,(constant flonum-data-disp) fp))))] ; 40-bit+ only on 64-bit machines [(integer-8 integer-16 integer-24 integer-32 integer-40 integer-48 integer-56 integer-64 unsigned-8 unsigned-16 unsigned-24 unsigned-32 unsigned-40 unsigned-48 unsigned-56 unsigned-64) @@ -7428,7 +7411,7 @@ (%inline sll ,body (immediate ,(fx- 0 cnt))) body))) (immediate ,mask))))) -< + (define-inline 3 fllp [(e) (build-flonum-extractor 19 12 e)]) @@ -7543,19 +7526,14 @@ (lambda (e1 e2) (bind #f (e1 e2) (bind #t ([t (%constant-alloc type-typed-object (constant size-inexactnum))]) - `(seq + (%seq (set! ,(%mref ,t ,(constant inexactnum-type-disp)) ,(%constant type-inexactnum)) - ,(%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double - ,e1 ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double - ,t ,%zero ,(%constant inexactnum-real-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%load-double - ,e2 ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double - ,t ,%zero ,(%constant inexactnum-imag-disp)) - ,t)))))) + (set! ,(%mref ,t ,%zero ,(constant inexactnum-real-disp) fp) + (unboxed-fp ,(%mref ,e1 ,%zero ,(constant flonum-data-disp) fp))) + (set! ,(%mref ,t ,%zero ,(constant inexactnum-imag-disp) fp) + (unboxed-fp ,(%mref ,e2 ,%zero ,(constant flonum-data-disp) fp))) + ,t))))) (define-inline 3 fl-make-rectangular [(e1 e2) (build-fl-make-rectangular e1 e2)]) @@ -7856,7 +7834,7 @@ (define-inline 3 flonum->fixnum [(e-x) (bind #f (e-x) (build-fix - (%inline trunc ,e-x)))]) + `(inline ,(make-info-unboxed-args '(#t)) ,%fptrunc ,e-x)))]) (let () (define build-fixnum->flonum ; NB: x must already be bound in order to ensure it is done before the flonum is allocated @@ -12178,7 +12156,10 @@ (if (null? frame-x*) (begin (set! max-fv (fxmax max-fv i)) '()) (let ([i (fx+ i 1)]) - (cons (get-ptr-fv i) (f (cdr frame-x*) i)))))]) + (cons (get-ptr-fv i) (f (cdr frame-x*) i)))))] + [cp-save (meta-cond + [(real-register? '%cp) (make-tmp 'cp)] + [else #f])]) ; add 2 for the old RA and cchain (set! max-fv (fx+ max-fv 2)) (let-values ([(c-init c-args c-result c-return) (asm-foreign-callable info)]) @@ -12188,17 +12169,20 @@ ; c-return restores callee-save registers and returns to C (%seq ,(c-init) - ; although we don't actually need %cp in a register, we need - ; to make sure that `(%tc-ref cp)` doesn't change before S_call_help - ; is called, and claiming that %cp is live is the easiest way ,(restore-scheme-state - (in %cp) + (in %cp) ; to save and then restore just before S_call_help (out %ac0 %ac1 %xp %yp %ts %td scheme-args extra-regs)) ; need overflow check since we're effectively retroactively turning ; what was a foreign call into a Scheme non-tail call (fcallable-overflow-check) ; leave room for the RA & c-chain (set! ,%sfp ,(%inline + ,%sfp (immediate ,(fx* (constant ptr-bytes) 2)))) + ; stash %cp and restore later to make sure it's intact by the time + ; that we get to S_call_help + ,(meta-cond + [(real-register? '%cp) `(set! ,cp-save ,%cp)] + [else `(nop)]) + ; convert arguments ,(fold-left (lambda (e x arg-type c-arg) `(seq ,(C->Scheme arg-type c-arg x) ,e)) (set-locs fv* frame-x* (set-locs (map (lambda (reg) (in-context Lvalue (%mref ,%tc ,(reg-tc-disp reg)))) reg*) reg-x* @@ -12208,6 +12192,9 @@ ; needs to be a quote, not an immediate (set! ,(ref-reg %ac1) (literal ,(make-info-literal #f 'object 0 0))) (set! ,(ref-reg %ts) (label-ref ,self-label 0)) ; for locking + ,(meta-cond + [(real-register? '%cp) `(set! ,%cp ,cp-save)] + [else `(nop)]) ,(save-scheme-state (in %ac0 %ac1 %ts %cp) (out %xp %yp %td scheme-args extra-regs)) @@ -12546,7 +12533,7 @@ (set! ,fv0 ,%xp) ,(%mv-jump ,%xp (,%ac0 ,arg-registers ... ,fv0))))])))))))))))) (define reify-cc-help - (lambda (1-shot? always? finish) + (lambda (1-shot? always? save-ra? finish) (with-output-language (L13 Tail) (%seq (set! ,%td ,(%tc-ref stack-link)) @@ -12555,7 +12542,7 @@ (%seq ,(let ([alloc (%seq - (set! ,%xp ,(%constant-alloc type-closure (constant size-continuation))) + (set! ,%xp ,(%constant-alloc type-closure (constant size-continuation) #f save-ra?)) (set! ,(%mref ,%xp ,(constant continuation-code-disp)) (literal ,(make-info-literal #f 'library (lookup-libspec nuate) (constant code-data-disp)))))]) (if 1-shot? @@ -12697,28 +12684,25 @@ [(dorest4) (make-do-rest 4 frame-args-offset)] [(dorest5) (make-do-rest 5 frame-args-offset)] [(reify-1cc maybe-reify-cc) - (let ([other-reg* (fold-left (lambda (live* kill) (remq kill live*)) - (vector->list regvec) - ;; Registers used by `reify-cc-help` output, - ;; including some as needed per machine - (reg-list %xp %td %ac0 reify-support))] - [1cc? (eq? sym 'reify-1cc)]) + (let ([1cc? (eq? sym 'reify-1cc)]) `(lambda ,(make-named-info-lambda (if 1cc? "reify-1cc" "maybe-reify-cc") '(0)) 0 () ,(asm-enter (%seq - (check-live ,other-reg* ...) - ,(reify-cc-help 1cc? 1cc? + ;; make sure the reify-1cc intrinsic declares kill for registers used by `reify-cc-help`, + ;; plus (say) %ts to have one to allocate, plus more as needed to allocate per machine + (check-live ,(intrinsic-entry-live* reify-1cc) ...) + ,(reify-cc-help 1cc? 1cc? #t (lambda (reg) (if (eq? reg %td) - `(asm-return ,%td ,other-reg* ...) + `(asm-return ,%td ,(intrinsic-return-live* reify-1cc) ...) `(seq (set! ,%td ,reg) - (asm-return ,%td ,other-reg* ...)))))))))] + (asm-return ,%td ,(intrinsic-return-live* reify-1cc) ...)))))))))] [(callcc) `(lambda ,(make-named-info-lambda 'callcc '(1)) 0 () ,(%seq (set! ,(ref-reg %cp) ,(make-arg-opnd 1)) - ,(reify-cc-help #f #f + ,(reify-cc-help #f #f #f (lambda (reg) (%seq (set! ,(make-arg-opnd 1) ,reg) @@ -15716,7 +15700,7 @@ [(asm-return) (values (asm-return) chunk* offset)] [(asm-c-return ,info) (values (asm-c-return info) chunk* offset)] [(jump (label-ref ,l ,offset0)) - (values (asm-direct-jump l (adjust-return-point-offset offset0 l)) chunk* offset)] + (values (asm-direct-jump l offset0) chunk* offset)] [(jump (literal ,info)) (values (asm-literal-jump info) chunk* offset)] [(jump ,t) @@ -15799,8 +15783,7 @@ [(rp-compact-header ,error-on-values ,fs ,lpm) (values (asm-rp-compact-header code* error-on-values fs lpm current-func #f) chunk* offset)] [(set! ,x (label-ref ,l ,offset1)) (guard (eq? (local-label-func l) current-func)) - (let ([chunk (make-chunk code*)] - [offset1 (adjust-return-point-offset offset1 l)]) + (let ([chunk (make-chunk code*)]) (let ([offset (fx+ (chunk-size chunk) offset)] [chunk* (cons chunk chunk*)]) (let ([chunk (asm-return-address x l offset1 offset)]) (values '() (cons chunk chunk*) (fx+ (chunk-size chunk) offset)))))] diff --git a/s/cprep.ss b/s/cprep.ss index 2f610e189..e8e969f12 100644 --- a/s/cprep.ss +++ b/s/cprep.ss @@ -93,6 +93,7 @@ [(i3nt-stdcall) '__stdcall] [(i3nt-com) '__com] [(adjust-active) '__collect_safe] + [(varargs) '__varargs] [else #f])) x*))) (define-who uncprep-fp-specifier diff --git a/s/ftype.ss b/s/ftype.ss index 3f4e3d0aa..ce9ab7108 100644 --- a/s/ftype.ss +++ b/s/ftype.ss @@ -122,7 +122,7 @@ notes: big-endian machines, the first field occupies the high-order bits, with each subsequent field just below the preceding field. - - ftype pointers are records encapsulating an ftype descriptor + - ftypmembers (lambda (x) ;; Currently used for x86_64 and arm32 ABI: Returns a list of diff --git a/s/mathprims.ss b/s/mathprims.ss index 741eea4ed..8401e2f3a 100644 --- a/s/mathprims.ss +++ b/s/mathprims.ss @@ -317,11 +317,25 @@ ($flonum-sign x))) (set-who! flonum->fixnum - (let ([flmnf (fixnum->flonum (most-negative-fixnum))] - [flmpf (fixnum->flonum (most-positive-fixnum))]) + (let ([flmnf (meta-cond + ;; 64-bit fixnums: -1.0 is the same flonum + [(fl= (fixnum->flonum (most-negative-fixnum)) + (fl- (fixnum->flonum (most-negative-fixnum)) 1.0)) + ;; Find the next lower flonum: + (let loop ([amt 2.0]) + (let ([v (fl- (fixnum->flonum (most-negative-fixnum)) amt)]) + (if (fl= v (fixnum->flonum (most-negative-fixnum))) + (loop (fl* 2.0 amt)) + v)))] + [else + (fl- (fixnum->flonum (most-negative-fixnum)) 1.0)])] + ;; Although adding 1.0 doesn't change the flonum for + ;; 64-bit fixnums, the flonum doesn't fit in a fixnum, so + ;; this is the upper bbound we want either way: + [flmpf (fl+ (fixnum->flonum (most-positive-fixnum)) 1.0)]) (lambda (x) (unless (flonum? x) (flargerr who x)) - (unless (fl<= flmnf x flmpf) + (unless (fl< flmnf x flmpf) ($oops who "result for ~s would be outside of fixnum range" x)) (#3%flonum->fixnum x)))) ) diff --git a/s/mkgc.ss b/s/mkgc.ss index da253d998..a30a2da9e 100644 --- a/s/mkgc.ss +++ b/s/mkgc.ss @@ -1072,7 +1072,7 @@ [sweep (S_set_code_obj "gc" (RELOC_TYPE entry) _ a obj item_off)] [vfasl-sweep - (S_set_code_obj "vfasl" (abs-for-vfasl (RELOC_TYPE entry)) _ a obj item_off)] + (S_set_code_obj "vfasl" (abs_reloc_variant (RELOC_TYPE entry)) _ a obj item_off)] [else])))) (case-mode @@ -1142,11 +1142,6 @@ [(vfasl-copy vfasl-sweep) e] [else])) -(define-trace-macro (abs-for-vfasl e) - (case-mode - [vfasl-sweep reloc_abs] - [else e])) - (define-trace-macro (when-mark e) (case-mode [(mark) e] diff --git a/s/mkheader.ss b/s/mkheader.ss index e2eab56a5..c8d8c1bb2 100644 --- a/s/mkheader.ss +++ b/s/mkheader.ss @@ -656,8 +656,8 @@ (pr " \"cmp r12, #0\\n\\t\"\\~%") (pr " \"bne 1f\\n\\t\"\\~%") (pr " \"mov r12, #1\\n\\t\"\\~%") - (pr " \"strex r11, r12, [%0]\\n\\t\"\\~%") - (pr " \"cmp r11, #0\\n\\t\"\\~%") + (pr " \"strex r7, r12, [%0]\\n\\t\"\\~%") + (pr " \"cmp r7, #0\\n\\t\"\\~%") (pr " \"beq 2f\\n\\t\"\\~%") (pr " \"1:\\n\\t\"\\~%") (pr " \"ldr r12, [%0, #0]\\n\\t\"\\~%") @@ -667,7 +667,7 @@ (pr " \"2:\\n\\t\"\\~%") (pr " : \\~%") (pr " : \"r\" (addr)\\~%") - (pr " : \"cc\", \"memory\", \"r12\", \"r11\")~%") + (pr " : \"cc\", \"memory\", \"r12\", \"r7\")~%") (nl) (pr "#define UNLOCK(addr) \\~%") @@ -683,14 +683,14 @@ (pr " \"0:\\n\\t\"\\~%") (pr " \"ldrex r12, [%1, #0]\\n\\t\"\\~%") (pr " \"add r12, r12, #1\\n\\t\"\\~%") - (pr " \"strex r11, r12, [%1]\\n\\t\"\\~%") - (pr " \"cmp r11, #0\\n\\t\"\\~%") + (pr " \"strex r7, r12, [%1]\\n\\t\"\\~%") + (pr " \"cmp r7, #0\\n\\t\"\\~%") (pr " \"bne 0b\\n\\t\"\\~%") (pr " \"cmp r12, #0\\n\\t\"\\~%") (pr " \"moveq %0, #1\\n\\t\"\\~%") (pr " : \"=&r\" (ret)\\~%") (pr " : \"r\" (addr)\\~%") - (pr " : \"cc\", \"memory\", \"r12\", \"r11\")~%") + (pr " : \"cc\", \"memory\", \"r12\", \"r7\")~%") (nl) (pr "#define LOCKED_DECR(addr, ret) \\~%") @@ -698,14 +698,14 @@ (pr " \"0:\\n\\t\"\\~%") (pr " \"ldrex r12, [%1, #0]\\n\\t\"\\~%") (pr " \"sub r12, r12, #1\\n\\t\"\\~%") - (pr " \"strex r11, r12, [%1]\\n\\t\"\\~%") - (pr " \"cmp r11, #0\\n\\t\"\\~%") + (pr " \"strex r7, r12, [%1]\\n\\t\"\\~%") + (pr " \"cmp r7, #0\\n\\t\"\\~%") (pr " \"bne 0b\\n\\t\"\\~%") (pr " \"cmp r12, #0\\n\\t\"\\~%") (pr " \"moveq %0, #1\\n\\t\"\\~%") (pr " : \"=&r\" (ret)\\~%") (pr " : \"r\" (addr)\\~%") - (pr " : \"cc\", \"memory\", \"r12\", \"r11\")~%")] + (pr " : \"cc\", \"memory\", \"r12\", \"r7\")~%")] [(rv64) (pr "#define INITLOCK(addr) \\~%") (pr " __asm__ __volatile__ (\"sw zero, 0(%0)\\n\\t\"\\~%") diff --git a/s/np-languages.ss b/s/np-languages.ss index 1e46e69e4..eb2c322cd 100644 --- a/s/np-languages.ss +++ b/s/np-languages.ss @@ -542,10 +542,6 @@ (declare-primitive inc-profile-counter effect #f) (declare-primitive invoke-prelude effect #f) (declare-primitive keep-live effect #f) - (declare-primitive load-double effect #f) - (declare-primitive load-double->single effect #f) - (declare-primitive load-single effect #f) - (declare-primitive load-single->double effect #f) (declare-primitive locked-decr! effect #f) (declare-primitive locked-incr! effect #f) (declare-primitive pause effect #f) @@ -558,12 +554,12 @@ (declare-primitive save-flrv effect #f) (declare-primitive save-lr effect #f) ; ppc (declare-primitive store effect #f) - (declare-primitive store-double effect #f) - (declare-primitive store-single effect #f) - (declare-primitive store-single->double effect #f) + (declare-primitive store-single effect #f); not required by cpnanopass + (declare-primitive store-double->single effect #f) (declare-primitive store-with-update effect #f) ; ppc (declare-primitive unactivate-thread effect #f) ; threaded version only (declare-primitive vpush-multiple effect #f) ; arm + (declare-primitive vpop-multiple effect #f) ; arm (declare-primitive cas effect #f) (declare-primitive < pred #t) @@ -615,7 +611,6 @@ (declare-primitive sll value #t) (declare-primitive srl value #t) (declare-primitive sra value #t) - (declare-primitive trunc value #t) (declare-primitive zext8 value #t) (declare-primitive zext16 value #t) (declare-primitive zext32 value #t) ; 64-bit only @@ -627,6 +622,12 @@ (declare-primitive fp/ value #t) (declare-primitive fpt value #t) (declare-primitive fpsqrt value #t) ; not implemented for some ppc32 (so we don't use it) + (declare-primitive fptrunc value #t) + (declare-primitive double->single value #t) ; not required by cpnanopass + (declare-primitive single->double value #t) ; not required by cpnanopass + + (declare-primitive load-single value #t) ; not required by cpnanopass + (declare-primitive load-single->double value #t) (declare-primitive fpcastto value #t) ; 64-bit only (declare-primitive fpcastto/hi value #t) ; 32-bit only diff --git a/s/primdata.ss b/s/primdata.ss index a0f61bebb..dc0fe5148 100644 --- a/s/primdata.ss +++ b/s/primdata.ss @@ -2098,6 +2098,7 @@ ($ftd-atomic-category [flags single-valued]) ($ftd-compound? [sig [(sub-ptr) -> (boolean)]] [flags discard]) ($ftd-size [flags single-valued]) + ($ftd-unsigned? [flags single-valued]) ($ftd->members [flags single-valued]) ($ftype-guardian-oops [flags]) ($ftype-pointer? [sig [(ptr) -> (boolean)]] [flags pure unrestricted mifoldable]) diff --git a/s/rv64.ss b/s/rv64.ss index d32889b87..33b40f7ab 100644 --- a/s/rv64.ss +++ b/s/rv64.ss @@ -62,60 +62,227 @@ ;; Mapping of scheme specific task registers to registers of the CPU (define-registers -;; Use saved registers r18-r21 for %tc, %sfp, %ap and %trap (reserved ;; Three or more cols for each definition ;reg alias ... callee-save reg-mdinfo - [%tc %x9 %s1 #t 9] ;; thread context - [%sfp %x8 %s0 %fp #t 8] ;; scheme frame pointer - [%ap %x10 %a0 %Carg1 %Cretval #f 10] ;; - [%trap %x11 %a1 %Carg2 %Cretval1 #f 11] ;; tracks when scheme should check for interrupts - [%real-zero %x0 #f 0]);; hardwired zero - can't call it %zero + [%tc %x9 %s1 #t 9 uptr] ;; thread context + [%sfp %x8 %s0 %fp #t 8 uptr] ;; scheme frame pointer + [%ap %x10 %a0 %Carg1 %Cretval #f 10 uptr] ;; + [%trap %x11 %a1 %Carg2 %Cretval1 #f 11 uptr] ;; tracks when scheme should check for interrupts + [%real-zero %x0 #f 0 uptr]);; hardwired zero - can't call it %zero (allocable - [%ac0 %x12 %a2 %Carg3 #f 12] ;; argument count - [%xp %x13 %a3 %Carg4 #f 13] ;; used during alloc for the computed alloc spot - [%ts %x14 %a4 %Carg5 #f 14] ;; special temps - [%td %x15 %a5 %Carg6 #f 15] ;; special temps - [%cp %x16 %a6 %Carg7 #f 16] ;; closure pointer + [%ac0 %x12 %a2 %Carg3 #f 12 uptr] ;; argument count + [%xp %x13 %a3 %Carg4 #f 13 uptr] ;; used during alloc for the computed alloc spot + [%ts %x14 %a4 %Carg5 #f 14 uptr] ;; special temps + [%td %x15 %a5 %Carg6 #f 15 uptr] ;; special temps + [%cp %x16 %a6 %Carg7 #f 16 uptr] ;; closure pointer ;; Extra registers - length should match asm-arg-reg-max - [ %x1 %ra %lr #f 1] - [ %x3 %gp #f 3] - [ %x4 %tp #f 4] - [ %x5 %t0 #f 5] - [ %x6 %t1 #f 6] - [ %x7 %t2 #f 7] - [ %x17 %a7 %Carg8 #f 17] - [ %x18 %s2 #t 18] - [ %x19 %s3 #t 19] - [ %x20 %s4 #t 20] - [ %x21 %s5 #t 21] - [ %x22 %s6 #t 22] - [ %x23 %s7 #t 23] - [ %x24 %s8 #t 24] - [ %x25 %s9 #t 25] - [ %x26 %s10 #t 26] - [ %x27 %s11 #t 27] - [ %x28 %t3 #f 28] - [ %x29 %t4 #f 29] - [ %x30 %t5 #f 30] - [ %x31 %t6 #f 31] - ) + [ %x1 %ra %lr #f 1 uptr] + [ %x3 %gp #f 3 uptr] + [ %x4 %tp #f 4 uptr] + [ %x5 %t0 #f 5 uptr] + [ %x6 %t1 #f 6 uptr] + [ %x7 %t2 #f 7 uptr] + [ %x17 %a7 %Carg8 #f 17 uptr] + [ %x18 %s2 #t 18 uptr] + [ %x19 %s3 #t 19 uptr] + [ %x20 %s4 #t 20 uptr] + [ %x21 %s5 #t 21 uptr] + [ %x22 %s6 #t 22 uptr] + [ %x23 %s7 #t 23 uptr] + [ %x24 %s8 #t 24 uptr] + [ %x25 %s9 #t 25 uptr] + [ %x26 %s10 #t 26 uptr] + [ %x27 %s11 #t 27 uptr] + [ %x28 %t3 #f 28 uptr] + [ %x29 %t4 #f 29 uptr] + [ %x30 %t5 #f 30 uptr] + [ %x31 %t6 #f 31 uptr] + [ %f18 %fs2 #t 51 fp] + [ %f19 %fs3 #t 52 fp] + ) (machine-dependent - [%sp %x2 #f 2] - [%pc #f 32] - [%flreg1 #f 33] - [ %f10 %fa0 %Cfpretval #f 43] - [ %f11 %fa1 %Cfpretval1 #f 44] + [%sp %x2 #t 2 uptr] + ;; There is really not a specific number for the pc reg + ;; so we fake it and call it 32 + [%pc #f 32 uptr] + ;; Floating point registers + [ %f0 %ft0 #f 33 fp] + [ %f1 %ft1 #f 34 fp] + [ %f2 %ft2 #f 35 fp] + [ %f3 %ft3 #f 36 fp] + [ %f4 %ft4 #f 37 fp] + [ %f5 %ft5 #f 38 fp] + [ %f6 %ft6 #f 39 fp] + [ %f7 %ft7 #f 40 fp] + [ %f8 %fs0 #t 41 fp] + [ %f9 %fs1 #t 42 fp] + [%Cfparg1 %Cfpretval %f10 %fa0 #f 43 fp] + [%Cfparg2 %Cfpretval1 %f11 %fa1 #f 44 fp] + [%Cfparg3 %f12 %fa2 #f 45 fp] + [%Cfparg4 %f13 %fa3 #f 46 fp] + [%Cfparg5 %f14 %fa4 #f 47 fp] + [%Cfparg6 %f15 %fa5 #f 48 fp] + [%Cfparg7 %f16 %fa6 #f 49 fp] + [%Cfparg8 %f17 %fa7 #f 50 fp] + ;; f18 and f19 are in the allocable section + [ %f20 %fs4 #t 53 fp] + [ %f21 %fs5 #t 54 fp] + [ %f22 %fs6 #t 55 fp] + [ %f23 %fs7 #t 56 fp] + [ %f24 %fs8 #t 57 fp] + [ %f25 %fs9 #t 58 fp] + [ %f26 %fs10 #t 59 fp] + [ %f27 %fs11 #t 60 fp] + [ %f28 %ft8 #f 61 fp] + [ %f29 %ft9 #f 62 fp] + [ %f30 %ft10 #f 63 fp] + [ %f31 %ft11 #f 64 fp] )) ;;; SECTION 2: instructions -(module (md-handle-jump) ; also sets primitive handlers +(module (md-handle-jump) ; also sets primitive handlers (import asm-module) -) + (define-syntax (seq x) + (syntax-case x () + [(_ e ... ex) + (with-syntax ([(t ...) (generate-temporaries #'(e ...))]) + #'(let ([t e] ...) + (with-values ex + (case-lambda + [(x*) (cons* t ... x*)] + [(x* p) (values (cons* t ... x*) p)]))))])) + + (define lmem? mref?) + + (define (mem? x) + (or (lmem? x) (literal@? x))) + + (define (set-ur=mref ur mref) + (mref->mref mref + (lambda (mref) + (build-set! ,ur ,mref)))) + + (define (lvalue->ur x k) + (if (mref? x) + (let ([u (make-tmp 'u)]) + (seq + (set-ur=mref u x) + (k u))) + (k x))) + + (define (mref->mref a k) + (define (return x0 x1 imm type) + (k + (with-output-language (L15d Triv) + `(mref ,x0 ,x1 ,imm ,type)))) + (nanopass-case (L15c Triv) a + [(mref ,lvalue0 ,lvalue1 ,imm ,type) + (lvalue->ur lvalue0 + (lambda (x0) + (lvalue->ur lvalue1 + (lambda (x1) + (return x0 x1 imm type)))))])) + + (define (mem->mem a k) + (cond + [(literal@? a) + (let ([u (make-tmp 'u)]) + (seq + (build-set! ,u ,(literal@->literal a)) + (k + (with-output-language (L15d Lvalue) + `(mref ,u ,%zero 0 ptr)))))] + [else (mref->mref a k)])) + + (define (md-handle-jump t) + (with-output-language (L15d Tail) + + (nanopass-case (L15c Triv) t + [,lvalue + (if (mem? lvalue) + (mem->mem lvalue (lambda (e) (values '() `(jump ,e)))) + (values '() `(jump ,lvalue)))] + [(literal ,info) (values '() `(jump (literal ,info)))] + [(label-ref ,l ,offset) (values '() `(jump (label-ref ,l ,offset)))])))) ;;; SECTION 3: assembler -(module asm-module () +(module asm-module ( + asm-c-return + asm-conditional-jump + asm-direct-jump + asm-enter + asm-foreign-call + asm-foreign-callable + asm-fpmove + asm-indirect-jump + asm-jump + asm-library-jump + asm-literal-jump + asm-move + asm-return + asm-return-address + asm-rp-compact-header + asm-rp-header + asm-size + ) -) + (define asm-enter values) + (define (asm-c-return info) + (sorry! 'asm-c-return "unimplemented: asm-c-return")) + + (define (asm-direct-jump l offset) + (sorry! 'asm-direct-jump "unimplemented: asm-direct-jump")) + + (define-who (asm-indirect-jump src) + (sorry! who "unimplemented: asm-indirect-jump")) + + (define (asm-library-jump l) + (sorry! 'asm-library-jump "unimplemented: asm-library-jump")) + + (define (asm-return) + (sorry! 'asm-return "unimplemented: asm-return")) + + (define-who (asm-fpmove code* dest src) + (sorry! who "unimplemented: asm-fpmove")) + + (define (asm-literal-jump info) + (sorry! 'asm-literal-jump "unimplemented: asm-literal-jump")) + + (define-who (asm-move code* dest src) + (sorry! who "unimplemented: asm-move")) + + (define (asm-rp-compact-header code* err? fs lpm func code-size) + (sorry! 'asm-rp-compact-header "unimplemented: asm-rp-compact-header")) + + (define (asm-rp-header code* mrvl fs lpm func code-size) + (sorry! 'asm-rp-header "unimplemented: asm-rp-header")) + + (define (asm-size x) + (case (car x) + [(asm rv64-abs rv64-jump rv64-call) 0] + [else 8])) + + (define-who (asm-jump l next-addr) + (sorry! who "unimplemented: asm-jump")) + + (define-who (asm-conditional-jump info l1 l2 next-addr) + (sorry! who "unimplemented: asm-conditional-jump")) + + (define-who (asm-return-address dest l incr-offset next-addr) + (sorry! who "unimplemented: asm-return-address")) + + (module ( + asm-foreign-call + asm-foreign-callable + ) + + (define-who asm-foreign-call + (with-output-language (L13 Effect) + (let () #f))) + + (define-who asm-foreign-callable + (with-output-language (L13 Effect) + (let () #f))))) diff --git a/s/rv64le.def b/s/rv64le.def index 699f9e4d7..a1bf1cbe7 100644 --- a/s/rv64le.def +++ b/s/rv64le.def @@ -29,6 +29,12 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 21) (define-constant asm-arg-reg-cnt 8) + +;; What's this for? +;; Sounds like the maximum number of available fp regs +;; but it's set to 2 in all other archs... doing the same here +;; for now. +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long") (define-constant typedef-uptr "unsigned long") diff --git a/s/syntax.ss b/s/syntax.ss index 4c185b439..425f30410 100644 --- a/s/syntax.ss +++ b/s/syntax.ss @@ -8920,39 +8920,40 @@ (define squawk (lambda (x) (syntax-error x (format "invalid ~s convention" who)))) - (let loop ([conv* conv*] [accum '()] [keep-accum '()]) + (let loop ([conv* conv*] [selected #f] [accum '()] [keep-accum '()]) (cond [(null? conv*) (datum->syntax #'filter-conv keep-accum)] [else (let* ([orig-c (car conv*)] - [c (syntax->datum orig-c)] - [c (cond - [(not c) #f] - [(eq? c '__collect_safe) 'adjust-active] - [else - (case ($target-machine) - [(i3nt ti3nt) - (case c - [(__stdcall) 'i3nt-stdcall] - [(__cdecl) #f] - [(__com) 'i3nt-com] - [else (squawk orig-c)])] - [(ppcnt) - (case c - [(__stdcall __cdecl) #f] - [else (squawk orig-c)])] - [else (squawk orig-c)])])]) - (when (member c accum) - (syntax-error orig-c (format "redundant ~s convention" who))) - (unless (or (null? accum) - (eq? c 'adjust-active) - (and (eq? 'adjust-active (car accum)) - (null? (cdr accum)))) - (syntax-error orig-c (format "conflicting ~s convention" who))) - (loop (cdr conv*) (cons c accum) - (if c - (cons c keep-accum) - keep-accum)))])))) + [c (syntax->datum orig-c)]) + (let-values ([(c select?) + (cond + [(not c) (values #f #f)] + [(eq? c '__collect_safe) (values 'adjust-active #f)] + [(eq? c '__varargs) (values 'varargs #f)] + [else + (values + (case ($target-machine) + [(i3nt ti3nt) + (case c + [(__stdcall) 'i3nt-stdcall] + [(__cdecl) #f] + [(__com) 'i3nt-com] + [else (squawk orig-c)])] + [(ppcnt) + (case c + [(__stdcall __cdecl) #f] + [else (squawk orig-c)])] + [else (squawk orig-c)]) + #t)])]) + (when (member c accum) + (syntax-error orig-c (format "redundant ~s convention" who))) + (when (and select? selected) + (syntax-error orig-c (format "conflicting ~s convention" who))) + (loop (cdr conv*) (if select? c selected) (cons c accum) + (if c + (cons c keep-accum) + keep-accum))))])))) (define $make-foreign-procedure (lambda (who conv* foreign-name ?foreign-addr type* result-type) @@ -8960,6 +8961,9 @@ (define (check-strings-allowed) (when (memq 'adjust-active (syntax->datum conv*)) ($oops who "string argument not allowed with __collect_safe procedure"))) + (define (check-floats-allowed) + (when (memq 'varargs (syntax->datum conv*)) + ($oops who "float argument not allowed for __varargs procedure"))) (with-syntax ([conv* conv*] [foreign-name foreign-name] [?foreign-addr ?foreign-addr] @@ -9056,6 +9060,9 @@ ($fp-string->utf32 x 'big) (err ($moi) x))))) (u32*))] + [(single-float) + (check-floats-allowed) + #f] [else #f]) (if (or ($ftd? type) ($ftd-as-box? type)) (let ([ftd (if ($ftd? type) type (unbox type))]) @@ -9151,6 +9158,9 @@ (define (check-strings-allowed) (when (memq 'adjust-active (syntax->datum conv*)) ($oops who "string result not allowed with __collect_safe callable"))) + (define (check-floats-allowed) + (when (memq 'varargs (syntax->datum conv*)) + ($oops who "float argument not allowed for __varargs procedure"))) (with-syntax ([conv* conv*] [?proc ?proc]) (with-syntax ([((actual (t ...) (arg ...)) ...) (map @@ -9240,6 +9250,9 @@ #`((mod x #x100000000000000) (x) (unsigned-64)))] + [(single-float) + (check-floats-allowed) + #f] [else #f]) (with-syntax ([(x) (generate-temporaries #'(*))]) #`(x (x) (#,(datum->syntax #'foreign-callable type)))))) diff --git a/s/trv64le.def b/s/trv64le.def index 3b0b4312c..cf580cbc6 100644 --- a/s/trv64le.def +++ b/s/trv64le.def @@ -29,6 +29,12 @@ (define-constant max-integer-alignment 8) (define-constant asm-arg-reg-max 21) (define-constant asm-arg-reg-cnt 8) + +;; What's this for? +;; Sounds like the maximum number of available fp regs +;; but it's set to 2 in all other archs... doing the same here +;; for now. +(define-constant asm-fpreg-max 2) (define-constant typedef-ptr "void *") (define-constant typedef-iptr "long") (define-constant typedef-uptr "unsigned long") diff --git a/s/x86.ss b/s/x86.ss index 25986f6dc..666be8659 100644 --- a/s/x86.ss +++ b/s/x86.ss @@ -38,8 +38,7 @@ [%fptmp1 #f 0 fp] [%fptmp2 #f 1 fp] [%sp #t 4 uptr] - #;[%esi #f 6]) - (reify-support %ts)) + #;[%esi #f 6])) ;;; SECTION 2: instructions (module (md-handle-jump) ; also sets primitive handlers @@ -811,17 +810,16 @@ (define-instruction effect (flds) [(op (z mem)) `(asm ,info ,asm-flds ,z)]) - (define-instruction effect (load-single->double load-double->single) - [(op (x ur) (y ur) (z imm32)) - `(asm ,info ,(asm-fl-cvt op (info-loadfl-flreg info)) ,x ,y ,z)]) + (define-instruction value (load-single->double) + [(op (x fpur) (y fpmem)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fl-cvt 'single->double) ,y))]) - (define-instruction effect (store-single store-double) - [(op (x ur) (y ur) (z imm32)) - `(asm ,info ,(asm-fl-store op (info-loadfl-flreg info)) ,x ,y ,z)]) - - (define-instruction effect (load-double load-single) - [(op (x ur) (y ur) (z imm32)) - `(asm ,info ,(asm-fl-load op (info-loadfl-flreg info)) ,x ,y ,z)]) + (define-instruction effect (store-double->single) + [(op (x fpmem) (y fpmem fpur)) + (let ([u (make-tmp 'u 'fp)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-fl-cvt 'double->single) ,y)) + `(asm ,info ,asm-store-single ,x ,u)))]) (define-instruction value (fpt) [(op (x fpur) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpt ,y))]) @@ -855,8 +853,8 @@ (define-instruction effect inc-profile-counter [(op (x ur mem) (y imm32 ur)) `(asm ,info ,asm-inc-profile-counter ,x ,y)]) - (define-instruction value (trunc) - [(op (z ur) (x ur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-trunc ,x))]) + (define-instruction value (fptrunc) + [(op (z ur) (x fpmem fpur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-fptrunc ,x))]) ;; no kills since we expect to be called when all necessary state has already been saved (define-instruction value get-tc @@ -1029,7 +1027,7 @@ asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header asm-rp-compact-header asm-lea1 asm-lea2 asm-indirect-call asm-fstpl asm-fstps asm-fldl asm-flds asm-condition-code - asm-fl-cvt asm-fl-store asm-fl-load asm-fpt asm-trunc asm-div + asm-fl-cvt asm-store-single asm-fpt asm-fptrunc asm-div asm-exchange asm-pause asm-locked-incr asm-locked-decr asm-locked-cmpxchg asm-fpop-2 asm-fpmove asm-fpmovefrom asm-fpcastfrom asm-fpcastto asm-fpsqrt asm-c-simple-call asm-save-flrv asm-restore-flrv asm-return asm-c-return asm-size @@ -1820,28 +1818,17 @@ (emit flds src code*)))) (define asm-fl-cvt - (lambda (op flreg) - (lambda (code* base index offset) - (let ([src (build-mem-opnd base index offset)]) - (case op - [(load-single->double) (emit sse.cvtss2sd src (cons 'reg flreg) code*)] - [(load-double->single) (emit sse.cvtsd2ss src (cons 'reg flreg) code*)]))))) - - (define asm-fl-store - (lambda (op flreg) - (lambda (code* base index offset) - (let ([dest (build-mem-opnd base index offset)]) + (lambda (op) + (lambda (code* dest-reg src) + (Trivit (src) (case op - [(store-single) (emit sse.movss (cons 'reg flreg) dest code*)] - [(store-double) (emit sse.movsd (cons 'reg flreg) dest code*)]))))) + [(single->double) (emit sse.cvtss2sd src (cons 'reg dest-reg) code*)] + [(double->single) (emit sse.cvtsd2ss src (cons 'reg dest-reg) code*)]))))) - (define asm-fl-load - (lambda (op flreg) - (lambda (code* base index offset) - (let ([src (build-mem-opnd base index offset)]) - (case op - [(load-single) (emit sse.movss src (cons 'reg flreg) code*)] - [(load-double) (emit sse.movsd src (cons 'reg flreg) code*)]))))) + (define asm-store-single + (lambda (code* dest flreg) + (Trivit (dest) + (emit sse.movss (cons 'reg flreg) dest code*)))) (define asm-fpt (lambda (code* dest src) @@ -1907,11 +1894,10 @@ (emit sse.psrlq (cons 'reg %fptmp1) shift (emit sse.movd (cons 'reg %fptmp1) dest code*)))]))))) - (define asm-trunc - (lambda (code* dest flonumreg) - (Trivit (dest) - (let ([src `(disp ,(constant flonum-data-disp) ,flonumreg)]) - (emit sse.cvttsd2si src dest code*))))) + (define asm-fptrunc + (lambda (code* dest src) + (Trivit (dest src) + (emit sse.cvttsd2si src dest code*)))) (define asm-load (lambda (type) @@ -2323,7 +2309,8 @@ (define asm-direct-jump (lambda (l offset) - (emit bra (make-funcrel 'literal l offset) '()))) + (let ([offset (adjust-return-point-offset offset l)]) + (emit bra (make-funcrel 'literal l offset) '())))) (define asm-literal-jump (lambda (info) @@ -2552,15 +2539,13 @@ (letrec ([load-double-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] + `(set! ,(%mref ,%sp ,%zero ,offset fp) + ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))))] [load-single-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] + (%inline store-double->single ,(%mref ,%sp ,%zero ,offset fp) + ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))))] [load-stack (lambda (offset) (lambda (rhs) ; requires rhs @@ -2852,15 +2837,13 @@ (define load-double-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp) + ,(%mref ,%sp ,%zero ,offset fp))))) (define load-single-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp) + ,(%inline load-single->double ,(%mref ,%sp ,%zero ,offset fp)))))) (define load-stack (lambda (type offset) (lambda (lvalue) ; requires lvalue diff --git a/s/x86_64.ss b/s/x86_64.ss index c62724fb7..08d122ed5 100644 --- a/s/x86_64.ss +++ b/s/x86_64.ss @@ -43,8 +43,7 @@ [%Cfparg2 #f 1 fp] [%fptmp1 #f 4 fp] ; xmm 0-5 are caller-save [%fptmp2 #f 5 fp] ; xmm 6-15 are callee-save - [%sp #t 4 uptr]) - (reify-support %ts)) + [%sp #t 4 uptr])) (define-registers (reserved [%tc %r14 #t 14 uptr] @@ -77,8 +76,7 @@ [%Cfparg8 #f 7 fp] [%fptmp1 #f 8 fp] [%fptmp2 #f 9 fp] - [%sp #t 4 uptr]) - (reify-support %ts))) + [%sp #t 4 uptr]))) ;;; SECTION 2: instructions (module (md-handle-jump) ; also sets primitive handlers @@ -866,26 +864,32 @@ `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-lea2 0) ,y ,z)) `(asm ,info ,(asm-store (info-load-type info)) ,x ,u (immediate 0) ,w))))))]) - (define-instruction effect (load-single->double load-double->single) - [(op (x ur) (y ur) (z imm32)) - `(asm ,info ,(asm-fl-cvt op (info-loadfl-flreg info)) ,x ,y ,z)]) + (define-instruction value (load-single->double) + [(op (x fpur) (y fpmem)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fl-cvt 'single->double) ,y))]) - (define-instruction effect (store-single->double) - [(op (x ur) (y ur) (z imm32)) - `(asm ,info ,(asm-store-single->double (info-loadfl-flreg info)) ,x ,y ,z)]) + (define-instruction value (single->double double->single) + [(op (x fpur) (y fpmem fpur)) + `(set! ,(make-live-info) ,x (asm ,info ,(asm-fl-cvt op) ,y))]) - (define-instruction effect (store-single store-double) - [(op (x ur) (y ur) (z imm32)) - `(asm ,info ,(asm-fl-store op (info-loadfl-flreg info)) ,x ,y ,z)]) + (define-instruction effect (store-double->single) + [(op (x fpmem) (y fpmem fpur)) + (let ([u (make-tmp 'u 'fp)]) + (seq + `(set! ,(make-live-info) ,u (asm ,null-info ,(asm-fl-cvt 'double->single) ,y)) + `(asm ,info ,asm-store-single ,x ,u)))]) + + (define-instruction effect (store-single) + [(op (x fpmem) (y fpur)) + `(asm ,info ,asm-store-single ,x ,y)]) - (define-instruction effect (load-double load-single) - [(op (x ur) (y ur) (z imm32)) - `(asm ,info ,(asm-fl-load op (info-loadfl-flreg info)) ,x ,y ,z)]) + (define-instruction value (load-single) + [(op (x fpur) (y fpmem)) + `(set! ,(make-live-info) ,x (asm ,info ,asm-load-single ,y))]) (define-instruction value (get-double) - [(op (z ur)) - `(set! ,(make-live-info) ,z - (asm ,info ,(asm-get-double (info-loadfl-flreg info))))]) + [(op (z ur) (y fpur)) + `(set! ,(make-live-info) ,z (asm ,info ,asm-get-double ,y))]) (define-instruction value (fpt) [(op (x fpur) (y ur)) `(set! ,(make-live-info) ,x (asm ,info ,asm-fpt ,y))]) @@ -917,8 +921,8 @@ (define-instruction effect inc-profile-counter [(op (x ur mem) (y imm32 ur)) `(asm ,info ,asm-inc-profile-counter ,x ,y)]) - (define-instruction value (trunc) - [(op (z ur) (x ur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-trunc ,x))]) + (define-instruction value (fptrunc) + [(op (z ur) (x fpmem fpur)) `(set! ,(make-live-info) ,z (asm ,info ,asm-fptrunc ,x))]) (define-instruction value get-tc [(op (z ur)) @@ -1114,7 +1118,7 @@ asm-direct-jump asm-return-address asm-jump asm-conditional-jump asm-data-label asm-rp-header asm-rp-compact-header asm-lea1 asm-lea2 asm-indirect-call asm-condition-code - asm-fl-cvt asm-fl-store asm-fl-load asm-fpt asm-trunc asm-div asm-popcount + asm-fl-cvt asm-store-single asm-load-single asm-fpt asm-fptrunc asm-div asm-popcount asm-exchange asm-pause asm-locked-incr asm-locked-decr asm-locked-cmpxchg asm-fpsqrt asm-fpop-2 asm-fpmove asm-fpcast asm-c-simple-call @@ -2004,12 +2008,12 @@ [else (sorry! who "unexpected op ~s" op)]))))) (define asm-fl-cvt - (lambda (op flreg) - (lambda (code* base index offset) - (let ([src (build-mem-opnd base index offset)]) + (lambda (op) + (lambda (code* dest-reg src) + (Trivit (src) (case op - [(load-single->double) (emit sse.cvtss2sd src (cons 'reg flreg) code*)] - [(load-double->single) (emit sse.cvtsd2ss src (cons 'reg flreg) code*)]))))) + [(single->double) (emit sse.cvtss2sd src (cons 'reg dest-reg) code*)] + [(double->single) (emit sse.cvtsd2ss src (cons 'reg dest-reg) code*)]))))) (define asm-store-single->double (lambda (flreg) @@ -2018,26 +2022,19 @@ (emit sse.cvtss2sd flreg flreg (emit sse.movsd flreg dest code*)))))) - (define asm-fl-store - (lambda (op flreg) - (lambda (code* base index offset) - (let ([dest (build-mem-opnd base index offset)]) - (case op - [(store-single) (emit sse.movss (cons 'reg flreg) dest code*)] - [(store-double) (emit sse.movsd (cons 'reg flreg) dest code*)]))))) + (define asm-store-single + (lambda (code* dest flreg) + (Trivit (dest) + (emit sse.movss (cons 'reg flreg) dest code*)))) - (define asm-fl-load - (lambda (op flreg) - (lambda (code* base index offset) - (let ([src (build-mem-opnd base index offset)]) - (case op - [(load-single) (emit sse.movss src (cons 'reg flreg) code*)] - [(load-double) (emit sse.movsd src (cons 'reg flreg) code*)]))))) + (define asm-load-single + (lambda (code* flreg src) + (Trivit (src) + (emit sse.movss src (cons 'reg flreg) code*)))) (define asm-get-double - (lambda (flreg) - (lambda (code* dst) - (emit sse.movd (cons 'reg flreg) (cons 'reg dst) code*)))) + (lambda (code* dst flreg) + (emit sse.movd (cons 'reg flreg) (cons 'reg dst) code*))) (define asm-fpt (lambda (code* dest src) @@ -2082,11 +2079,10 @@ (Trivit (dest src) (emit sse.movd src dest code*)))) - (define asm-trunc - (lambda (code* dest flonumreg) - (Trivit (dest) - (let ([src `(disp ,(constant flonum-data-disp) ,flonumreg)]) - (emit sse.cvttsd2si src dest code*))))) + (define asm-fptrunc + (lambda (code* dest src) + (Trivit (dest src) + (emit sse.cvttsd2si src dest code*)))) (define asm-load (lambda (type) @@ -2523,7 +2519,8 @@ (define asm-direct-jump (lambda (l offset) - (asm-helper-jump '() (make-funcrel 'x86_64-jump l offset)))) + (let ([offset (adjust-return-point-offset offset l)]) + (asm-helper-jump '() (make-funcrel 'x86_64-jump l offset))))) (define asm-literal-jump (lambda (info) @@ -2543,10 +2540,11 @@ (or (cond [(local-label-offset l) => (lambda (offset) - (let ([disp (fx- next-addr (fx- offset incr-offset))]) - (and (signed-32? disp) - (Trivit (dest) - (emit lea `(riprel ,disp) dest '())))))] + (let ([incr-offset (adjust-return-point-offset incr-offset l)]) + (let ([disp (fx- next-addr (fx- offset incr-offset))]) + (and (signed-32? disp) + (Trivit (dest) + (emit lea `(riprel ,disp) dest '()))))))] [else #f]) (asm-move '() dest (with-output-language (L16 Triv) `(label-ref ,l ,incr-offset))))))) @@ -2817,17 +2815,15 @@ (module (push-registers pop-registers push-registers-size) (define (move-registers regs load?) - (define vfp (make-vfp)) - (define (fp-reg? reg) - (let loop ([i (fx- (vector-length vfp) 1)]) - (or (eq? reg (vector-ref vfp i)) - (and (fx> i 0) (loop (fx- i 1)))))) + (define (fp-reg? reg) (eq? (reg-type reg) 'fp)) (with-output-language (L13 Effect) (let loop ([regs regs] [offset 0]) (let* ([reg (car regs)] [e (cond [(fp-reg? reg) - `(inline ,(make-info-loadfl reg) ,(if load? %load-double %store-double) ,%sp ,%zero (immediate ,offset))] + (if load? + `(set! ,reg ,(%mref ,%sp ,%zero ,offset fp)) + `(set! ,(%mref ,%sp ,%zero ,offset fp) ,reg))] [load? `(set! ,reg ,(%mref ,%sp ,offset))] [else `(set! ,(%mref ,%sp ,offset) ,reg)])] [regs (cdr regs)]) @@ -2861,15 +2857,13 @@ (letrec ([load-double-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,%sp ,%zero (immediate ,offset)))))] + `(set! ,(%mref ,%sp ,%zero ,offset fp) + ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))))] [load-single-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp)) - (inline ,(make-info-loadfl %fptmp1) ,%store-single ,%sp ,%zero (immediate ,offset)))))] + (%inline store-double->single ,(%mref ,%sp ,%zero ,offset fp) + ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))))] [load-int-stack (lambda (offset) (lambda (rhs) ; requires rhs @@ -2877,17 +2871,18 @@ [load-double-reg (lambda (fpreg) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))))] + `(set! ,fpreg ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))))] [load-double-reg2 (lambda (fpreg ireg) (lambda (x) ; requires var (%seq - (inline ,(make-info-loadfl fpreg) ,%load-double ,x ,%zero ,(%constant flonum-data-disp)) - (set! ,ireg (inline ,(make-info-loadfl fpreg) ,%get-double)))))] + (set! ,fpreg ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp)) + ;; To support the varargs convention, copy the value into a GP register + (set! ,ireg ,(%inline get-double ,fpreg)))))] [load-single-reg (lambda (fpreg) (lambda (x) ; requires var - `(inline ,(make-info-loadfl fpreg) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))))] + `(set! ,fpreg ,(%inline double->single ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp)))))] [load-int-reg (lambda (type ireg) (lambda (x) @@ -2934,10 +2929,10 @@ (cond [(fx= size 4) ;; Must be the last element - `(inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%load-single ,x ,%zero (immediate ,x-offset))] + `(set! ,(vector-ref vfp ifp) ,(%inline load-single ,(%mref ,x ,%zero ,x-offset fp)))] [else `(seq - (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%load-double ,x ,%zero (immediate ,x-offset)) + (set! ,(vector-ref vfp ifp) ,(%mref ,x ,%zero ,x-offset fp)) ,(loop (fx- size 8) iint (fx+ ifp 1) (cdr classes) (fx+ x-offset 8)))])] ;; Remaining cases are integers: [(>= size 8) @@ -3138,8 +3133,8 @@ `(seq ,(loop (cdr classes) (fx+ offset 8) iregs (cdr fpregs) (fx- size 8)) ,(case size - [(4) `(inline ,(make-info-loadfl (car fpregs)) ,%store-single ,%rcx ,%zero (immediate ,offset))] - [else `(inline ,(make-info-loadfl (car fpregs)) ,%store-double ,%rcx ,%zero (immediate ,offset))]))] + [(4) (%inline store-single ,(%mref ,%rcx ,%zero ,offset fp) ,(car fpregs))] + [else `(set! ,(%mref ,%rcx ,%zero ,offset fp) ,(car fpregs))]))] [else `(seq ,(loop (cdr classes) (fx+ offset 8) (cdr iregs) fpregs (fx- size 8)) @@ -3215,6 +3210,7 @@ [result-classes (classify-type result-type)] [result-size (classified-size result-type)] [fill-result-here? (result-fits-in-registers? result-classes)] + [result-reg* (get-result-regs fill-result-here? result-type result-classes)] [adjust-active? (if-feature pthreads (memq 'adjust-active conv*) #f)]) (with-values (do-args (if fill-result-here? (cdr arg-type*) arg-type*) (make-vint) (make-vfp)) (lambda (frame-size nfp locs live* fp-live*) @@ -3225,17 +3221,17 @@ (let* ([t (if adjust-active? %deact t0)] ; need a register if `adjust-active?` [c-call (add-deactivate adjust-active? t0 (append fp-live* live*) - (get-result-regs fill-result-here? result-type result-classes) + result-reg* (if-feature windows (%seq (set! ,%sp ,(%inline - ,%sp (immediate 32))) - (inline ,(make-info-kill*-live* (reg-list %rax %rdx) live*) ,%c-call ,t) + (inline ,(make-info-kill*-live* result-reg* (append fp-live* live*)) ,%c-call ,t) (set! ,%sp ,(%inline + ,%sp (immediate 32)))) (%seq ;; System V ABI varargs functions require count of fp regs used in %al register. ;; since we don't know if the callee is a varargs function, we always set it. (set! ,%rax (immediate ,nfp)) - (inline ,(make-info-kill*-live* (reg-list %rax %rdx) (cons %rax live*)) ,%c-call ,t))))]) + (inline ,(make-info-kill*-live* result-reg* (cons %rax (append fp-live* live*))) ,%c-call ,t))))]) (cond [fill-result-here? (add-fill-result c-call (fx- frame-size (constant ptr-bytes)) result-classes result-size)] @@ -3243,12 +3239,10 @@ (nanopass-case (Ltype Type) result-type [(fp-double-float) (lambda (lvalue) - `(inline ,(make-info-loadfl %Cfpretval) ,%store-double ,lvalue ,%zero - ,(%constant flonum-data-disp)))] + `(set! ,(%mref ,lvalue ,%zero ,(constant flonum-data-disp) fp) ,%Cfpretval))] [(fp-single-float) (lambda (lvalue) - `(inline ,(make-info-loadfl %Cfpretval) ,%store-single->double ,lvalue ,%zero - ,(%constant flonum-data-disp)))] + `(set! ,(%mref ,lvalue ,%zero ,(constant flonum-data-disp) fp) ,(%inline single->double ,%Cfpretval)))] [(fp-integer ,bits) (case bits [(8) (lambda (lvalue) `(set! ,lvalue ,(%inline sext8 ,%rax)))] @@ -3322,15 +3316,13 @@ (define load-double-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp) + ,(%mref ,%sp ,%zero ,offset fp))))) (define load-single-stack (lambda (offset) (lambda (x) ; requires var - (%seq - (inline ,(make-info-loadfl %fptmp1) ,%load-single->double ,%sp ,%zero (immediate ,offset)) - (inline ,(make-info-loadfl %fptmp1) ,%store-double ,x ,%zero ,(%constant flonum-data-disp)))))) + `(set! ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp) + ,(%inline load-single->double ,(%mref ,%sp ,%zero ,offset fp)))))) (define load-int-stack (lambda (type offset) (lambda (lvalue) @@ -3370,15 +3362,13 @@ [(fp-double-float) (if (< i 4) (%seq - (inline ,(make-info-loadfl (vector-ref vfp i)) ,%store-double - ,%sp ,%zero (immediate ,isp)) + (set! ,(%mref ,%sp ,%zero ,isp fp) ,(vector-ref vfp i)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8))) (f (cdr types) i isp))] [(fp-single-float) (if (< i 4) (%seq - (inline ,(make-info-loadfl (vector-ref vfp i)) ,%store-single - ,%sp ,%zero (immediate ,isp)) + ,(%inline store-single ,(%mref ,%sp ,%zero ,isp fp) ,(vector-ref vfp i)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8))) (f (cdr types) i isp))] [(fp-ftd& ,ftd) @@ -3393,8 +3383,7 @@ (eq? 'float (caar ($ftd->members ftd)))) ;; float or double `(seq - (inline ,(make-info-loadfl (vector-ref vfp i)) ,%store-double - ,%sp ,%zero (immediate ,isp)) + (set! ,(%mref ,%sp ,%zero ,isp fp) ,(vector-ref vfp i)) ,(f (cdr types) (fx+ i 1) (fx+ isp 8)))] [else ;; integer @@ -3428,15 +3417,13 @@ [(fp-double-float) (if (< ifp 8) (%seq - (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%store-double - ,%sp ,%zero (immediate ,isp)) + (set! ,(%mref ,%sp ,%zero ,isp fp) ,(vector-ref vfp ifp)) ,(f (cdr types) iint (fx+ ifp 1) (fx+ isp 8))) (f (cdr types) iint ifp isp))] [(fp-single-float) (if (< ifp 8) (%seq - (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%store-single - ,%sp ,%zero (immediate ,isp)) + ,(%inline store-single ,(%mref ,%sp ,%zero ,isp fp) ,(vector-ref vfp ifp)) ,(f (cdr types) iint (fx+ ifp 1) (fx+ isp 8))) (f (cdr types) iint ifp isp))] [(fp-ftd& ,ftd) @@ -3455,8 +3442,7 @@ (f (cdr types) iint ifp isp)] [(eq? (car classes) 'sse) `(seq - (inline ,(make-info-loadfl (vector-ref vfp ifp)) ,%store-double - ,%sp ,%zero (immediate ,isp)) + (set! ,(%mref ,%sp ,%zero ,isp fp) ,(vector-ref vfp ifp)) ,(reg-loop (cdr classes) iint (fx+ ifp 1) (+ isp 8)))] [else `(seq @@ -3576,7 +3562,7 @@ (fx+ offset 8) int* (cdr fp*) - (cons `(inline ,(make-info-loadfl (car fp*)) ,%load-double ,%sp ,%zero (immediate ,offset)) + (cons `(set! ,(car fp*) ,(%mref ,%sp ,%zero ,offset fp)) accum) live* (cons (car fp*) fp-live*))]))] @@ -3589,13 +3575,13 @@ [(fp-double-float) (values (lambda (x) - `(inline ,(make-info-loadfl %Cfpretval) ,%load-double ,x ,%zero ,(%constant flonum-data-disp))) + `(set! ,%Cfpretval ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp))) '() (list %Cfpretval))] [(fp-single-float) (values (lambda (x) - `(inline ,(make-info-loadfl %Cfpretval) ,%load-double->single ,x ,%zero ,(%constant flonum-data-disp))) + `(set! ,%Cfpretval ,(%inline double->single ,(%mref ,x ,%zero ,(constant flonum-data-disp) fp)))) '() (list %Cfpretval))] [(fp-void) @@ -3710,5 +3696,5 @@ (set! ,%rbp ,(%inline pop)) (set! ,%rbx ,(%inline pop)) (set! ,%sp ,(%inline + ,%sp (immediate 136))))) - (asm-c-return ,null-info ,callee-save-regs ... ,result-regs ...)))))))))))))) + (asm-c-return ,null-info ,callee-save-regs ... ,result-regs ... ,result-fp-regs ...)))))))))))))) )