+#include "encoding.h"
+
+#ifdef __riscv64
+# define LREG ld
+# define SREG sd
+#else
+# define LREG lw
+# define SREG sw
+#endif
+
.data
.globl _heapend
.globl environ
li x30,0
li x31,0
- # enable fp
- mfpcr x1,cr0
- ori x1,x1,0x2
- mtpcr x1,cr0
+#ifdef __riscv64
+ li a0, SR_U64 | SR_S64
+ csrs status, a0
+#endif
+ csrc status, SR_PS
- # enable vec
- mfpcr x1,cr0
- ori x1,x1,0x4
- mtpcr x1,cr0
+ # enable fp and accelerator
+ li a0, SR_EF | SR_EA
+ csrs status, a0
## if that didn't stick, we don't have an FPU, so don't initialize it
- mfpcr x1,cr0
- andi x1,x1,0x2
- beqz x1,1f
-
- mtfsr x0
- mxtf.s f0, x0
- mxtf.s f1, x0
- mxtf.s f2, x0
- mxtf.s f3, x0
- mxtf.s f4, x0
- mxtf.s f5, x0
- mxtf.s f6, x0
- mxtf.s f7, x0
- mxtf.s f8, x0
- mxtf.s f9, x0
- mxtf.s f10,x0
- mxtf.s f11,x0
- mxtf.s f12,x0
- mxtf.s f13,x0
- mxtf.s f14,x0
- mxtf.s f15,x0
- mxtf.s f16,x0
- mxtf.s f17,x0
- mxtf.s f18,x0
- mxtf.s f19,x0
- mxtf.s f20,x0
- mxtf.s f21,x0
- mxtf.s f22,x0
- mxtf.s f23,x0
- mxtf.s f24,x0
- mxtf.s f25,x0
- mxtf.s f26,x0
- mxtf.s f27,x0
- mxtf.s f28,x0
- mxtf.s f29,x0
- mxtf.s f30,x0
- mxtf.s f31,x0
+ csrr t0, status
+ and t0, t0, SR_EF
+ beqz t0, 1f
+
+ fssr x0
+ fmv.s.x f0, x0
+ fmv.s.x f1, x0
+ fmv.s.x f2, x0
+ fmv.s.x f3, x0
+ fmv.s.x f4, x0
+ fmv.s.x f5, x0
+ fmv.s.x f6, x0
+ fmv.s.x f7, x0
+ fmv.s.x f8, x0
+ fmv.s.x f9, x0
+ fmv.s.x f10,x0
+ fmv.s.x f11,x0
+ fmv.s.x f12,x0
+ fmv.s.x f13,x0
+ fmv.s.x f14,x0
+ fmv.s.x f15,x0
+ fmv.s.x f16,x0
+ fmv.s.x f17,x0
+ fmv.s.x f18,x0
+ fmv.s.x f19,x0
+ fmv.s.x f20,x0
+ fmv.s.x f21,x0
+ fmv.s.x f22,x0
+ fmv.s.x f23,x0
+ fmv.s.x f24,x0
+ fmv.s.x f25,x0
+ fmv.s.x f26,x0
+ fmv.s.x f27,x0
+ fmv.s.x f28,x0
+ fmv.s.x f29,x0
+ fmv.s.x f30,x0
+ fmv.s.x f31,x0
1:
- # only allow core 0 to proceed
-1:mfpcr a0, cr10
- bnez a0, 1b
+ la t0, trap_entry
+ csrw evec, t0
+
+ la tp, _end + 63
+ and tp, tp, -64
+
+ # get core id and number of cores
+ csrr a0, hartid
+ lw a1, 4(zero)
+
+ # give each core 128KB of stack + TLS
+#define STKSHIFT 17
+ sll a2, a0, STKSHIFT
+ add tp, tp, a2
+ add sp, a0, 1
+ sll sp, sp, STKSHIFT
+ add sp, sp, tp
+
+ lui t0, %tprel_hi(tls_start)
+ add t0, t0, %tprel_lo(tls_start)
+ sub tp, tp, t0
+
+ la t0, _init
+ csrw epc, t0
+ sret
+
+trap_entry:
+ csrw sup0, sp
+ csrw sup1, t0
+ csrr t0, status
+ andi t0, t0, SR_PS
+ bnez t0, 1f
+ la sp, kstacktop
+1:
+ addi sp, sp, -272
+ csrr t0, sup1
+
+ SREG x1, 8(sp)
+ SREG x2, 16(sp)
+ SREG x3, 24(sp)
+ SREG x4, 32(sp)
+ SREG x5, 40(sp)
+ SREG x6, 48(sp)
+ SREG x7, 56(sp)
+ SREG x8, 64(sp)
+ SREG x9, 72(sp)
+ SREG x10, 80(sp)
+ SREG x11, 88(sp)
+ SREG x12, 96(sp)
+ SREG x13, 104(sp)
+ SREG x14, 112(sp)
+ SREG x15, 120(sp)
+ SREG x16, 128(sp)
+ SREG x17, 136(sp)
+ SREG x18, 144(sp)
+ SREG x19, 152(sp)
+ SREG x20, 160(sp)
+ SREG x21, 168(sp)
+ SREG x22, 176(sp)
+ SREG x23, 184(sp)
+ SREG x24, 192(sp)
+ SREG x25, 200(sp)
+ SREG x26, 208(sp)
+ SREG x27, 216(sp)
+ SREG x28, 224(sp)
+ SREG x29, 232(sp)
+ SREG x30, 240(sp)
+ SREG x31, 248(sp)
+
+ csrr t0, sup0
+ csrr t1, status
+ SREG t0, 256(sp)
+ SREG t1, 264(sp)
+
+ csrr a0, cause
+ csrr a1, epc
+ mv a2, sp
+ jal handle_trap
+ csrw epc, v0
+
+ LREG t0, 256(sp)
+ LREG t1, 264(sp)
+ csrw sup0, t0
+ csrw status, t1
+
+ LREG x1, 8(sp)
+ LREG x2, 16(sp)
+ LREG x3, 24(sp)
+ LREG x4, 32(sp)
+ LREG x5, 40(sp)
+ LREG x6, 48(sp)
+ LREG x7, 56(sp)
+ LREG x8, 64(sp)
+ LREG x9, 72(sp)
+ LREG x10, 80(sp)
+ LREG x11, 88(sp)
+ LREG x12, 96(sp)
+ LREG x13, 104(sp)
+ LREG x14, 112(sp)
+ LREG x15, 120(sp)
+ LREG x16, 128(sp)
+ LREG x17, 136(sp)
+ LREG x18, 144(sp)
+ LREG x19, 152(sp)
+ LREG x20, 160(sp)
+ LREG x21, 168(sp)
+ LREG x22, 176(sp)
+ LREG x23, 184(sp)
+ LREG x24, 192(sp)
+ LREG x25, 200(sp)
+ LREG x26, 208(sp)
+ LREG x27, 216(sp)
+ LREG x28, 224(sp)
+ LREG x29, 232(sp)
+ LREG x30, 240(sp)
+ LREG x31, 248(sp)
- la sp,stacktop
- jal main
-1:b 1b
+ csrr sp, sup0
+ sret
- .bss
- .globl stacktop
+.bss
+.align 4
+.skip 4096
+kstacktop:
- .align 4
- .skip 131072
-stacktop:
+.section .tbss
+tls_start: