diff --git a/add-riscv-support.patch b/add-riscv-support.patch index 439ab737060e9e58c491ff74fd570deaeb134ecc..2ed2f208bf2151de83300f917162f6e874f9c9ae 100644 --- a/add-riscv-support.patch +++ b/add-riscv-support.patch @@ -1,30 +1,59 @@ -From: gns -Subject: [PATCH] riscv64: add initial support for riscv64 - -This adds interpreter, FFI, and JIT support for rv64g platform with lp64d ABI. -Keep in mind that there might still be some issues, your feedback is greatly appreciated. ---- -diff --git a/Makefile b/Makefile -index b0288b4..420a4b3 100644 ---- a/Makefile -+++ b/Makefile -@@ -90,6 +90,7 @@ FILE_MAN= luajit.1 - FILE_PC= luajit.pc - FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h - FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ -+ dis_riscv.lua dis_riscv64.lua \ - dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/Makefile +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/Makefile ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/Makefile +@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ - dis_mips64.lua dis_mips64el.lua vmdef.lua -diff --git a/dynasm/dasm_riscv.h b/dynasm/dasm_riscv.h -new file mode 100644 -index 0000000..4afe064 + dis_mips64.lua dis_mips64el.lua \ + dis_mips64r6.lua dis_mips64r6el.lua \ ++ dis_riscv.lua dis_riscv64.lua \ + vmdef.lua + + ifeq (,$(findstring Windows,$(OS))) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/README.md +=================================================================== +--- /dev/null ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/README.md +@@ -0,0 +1,31 @@ ++# LJRV - LuaJIT RISC-V 64 Port ++ ++LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language, ++RISC-V is a free and open ISA enabling a new era of processor innovation. ++ ++## Introduction ++ ++LJRV is a ongoing porting project of LuaJIT to the RISC-V 64-bit architecture by PLCT Lab, ISCAS. ++The ultimate goal is to provide a RISC-V 64 LuaJIT implementation and have it upstreamed to the official LuaJIT repository. ++ ++## Progress ++ ++- [x] Interpreter Runtime ++- [x] JIT Compiler ++ ++LJRV is still of beta quality, particularly the JIT compiler. ++For production usage, we suggests disable the JIT compiler during compilation by setting `XCFLAGS+= -DLUAJIT_DISABLE_JIT` in Makefile or environment variable. ++ ++## Bug Report ++ ++Please report bugs to [Issues](https://github.com/ruyisdk/LuaJIT/issues). ++ ++## Copyright ++ ++LuaJIT is Copyright (C) 2005-2023 Mike Pall. ++LuaJIT is free software, released under the MIT license. ++See full Copyright Notice in the COPYRIGHT file or in luajit.h. ++ ++LJRV is Copyright (C) 2022-2023 PLCT Lab, ISCAS. Contributed by gns. ++LJRV is free software, released under the MIT license. ++LJRV is part of RuyiSDK. +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.h +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv.h -@@ -0,0 +1,438 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.h +@@ -0,0 +1,433 @@ +/* +** DynASM RISC-V encoding engine. -+** Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2023 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + @@ -93,7 +122,7 @@ index 0000000..4afe064 + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; -+ void **globals; /* Array of globals (bias -10). */ ++ void **globals; /* Array of globals. */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ @@ -110,7 +139,6 @@ index 0000000..4afe064 +{ + dasm_State *D; + size_t psz = 0; -+ int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; @@ -121,12 +149,7 @@ index 0000000..4afe064 + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; -+ for (i = 0; i < maxsection; i++) { -+ D->sections[i].buf = NULL; /* Need this for pass3. */ -+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); -+ D->sections[i].bsize = 0; -+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ -+ } ++ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); +} + +/* Free DynASM state. */ @@ -146,7 +169,7 @@ index 0000000..4afe064 +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; -+ D->globals = gl - 10; /* Negative bias to compensate for locals. */ ++ D->globals = gl; + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + @@ -171,6 +194,7 @@ index 0000000..4afe064 + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); ++ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; + D->sections[i].ofs = 0; + } +} @@ -388,7 +412,7 @@ index 0000000..4afe064 + break; + case DASM_REL_LG: + if (n < 0) { -+ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4); ++ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4); + goto patchrel; + } + /* fallthrough */ @@ -407,7 +431,7 @@ index 0000000..4afe064 + } + break; + case DASM_LABEL_LG: -+ val &= 2047; if (val >= 20) D->globals[val-10] = (void *)(base + n); ++ val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: @@ -460,16 +484,15 @@ index 0000000..4afe064 +} +#endif + -diff --git a/dynasm/dasm_riscv.lua b/dynasm/dasm_riscv.lua -new file mode 100644 -index 0000000..e8e522a +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.lua +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv.lua -@@ -0,0 +1,973 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.lua +@@ -0,0 +1,981 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V module. +-- -+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + @@ -738,17 +761,22 @@ index 0000000..e8e522a + +local map_op_rv32imafd = { + ++ -- DASM pseudo-instrs ++ empty_0 = "ffffffff", ++ call_1 = "7fffffffJ", ++ + -- RV32I + lui_2 = "00000037DU", -+ auipc_2 = "00000017DU", ++ auipc_2 = "00000017DA", + + jal_2 = "0000006fDJ", -+ jalr_3 = "00000067DRI", ++ jalr_3 = "00000067DRJ", + -- pseudo-instrs + j_1 = "0000006fJ", + jal_1 = "000000efJ", + jr_1 = "00000067R", + jalr_1 = "000000e7R", ++ jalr_2 = "000000e7RJ", + + beq_3 = "00000063RrB", + bne_3 = "00001063RrB", @@ -1267,6 +1295,9 @@ index 0000000..e8e522a + local mode, m, s = parse_label(params[n], false) + if p == "B" then m = m + 2048 end + waction("REL_"..mode, m, s, 1); n = n + 1 ++ elseif p == "A" then -- AUIPC ++ local mode, m, s = parse_label(params[n], false) ++ waction("REL_"..mode, m, s, 1); n = n + 1 + else + assert(false) + end @@ -1439,11 +1470,10 @@ index 0000000..e8e522a + +------------------------------------------------------------------------------ + -diff --git a/dynasm/dasm_riscv32.lua b/dynasm/dasm_riscv32.lua -new file mode 100644 -index 0000000..654eb76 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv32.lua +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv32.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv32.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 32 module. @@ -1457,11 +1487,10 @@ index 0000000..654eb76 + +riscv32 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") -diff --git a/dynasm/dasm_riscv64.lua b/dynasm/dasm_riscv64.lua -new file mode 100644 -index 0000000..10cdfe2 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv64.lua +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv64.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 64 module. @@ -1475,31 +1504,23 @@ index 0000000..10cdfe2 + +riscv64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") -diff --git a/src/Makefile b/src/Makefile -index 323baf2..1fd84fc 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -53,6 +53,7 @@ CCOPT_arm= - CCOPT_arm64= - CCOPT_ppc= - CCOPT_mips= -+CCOPT_riscv64= - # - CCDEBUG= - # Uncomment the next line to generate debug information: -@@ -267,6 +268,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) - else +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/Makefile +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/Makefile ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/Makefile +@@ -268,6 +269,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(T TARGET_LJARCH= mips endif -+else + else +ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv32 +else +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv64 - else ++else $(error Unsupported target architecture) endif + endif @@ -275,6 +282,8 @@ endif endif endif @@ -1509,7 +1530,7 @@ index 323baf2..1fd84fc 100644 ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 -@@ -464,6 +473,12 @@ ifeq (ppc,$(TARGET_LJARCH)) +@@ -469,6 +478,12 @@ ifeq (ppc,$(TARGET_LJARCH)) DASM_AFLAGS+= -D PPE -D TOC endif endif @@ -1522,11 +1543,11 @@ index 323baf2..1fd84fc 100644 endif endif -diff --git a/src/host/buildvm.c b/src/host/buildvm.c -index 9ee47ad..9ebfb8f 100644 ---- a/src/host/buildvm.c -+++ b/src/host/buildvm.c -@@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/host/buildvm.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm.c +@@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, #include "../dynasm/dasm_ppc.h" #elif LJ_TARGET_MIPS #include "../dynasm/dasm_mips.h" @@ -1535,17 +1556,41 @@ index 9ee47ad..9ebfb8f 100644 #else #error "No support for this architecture (yet)" #endif -diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c -index 7baa011..b2a7daf 100644 ---- a/src/host/buildvm_asm.c -+++ b/src/host/buildvm_asm.c -@@ -156,6 +156,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm_asm.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/host/buildvm_asm.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm_asm.c +@@ -97,9 +97,15 @@ static void emit_asm_words(BuildCtx *ctx + #if LJ_TARGET_ARM64 && LJ_BE + ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ + #endif +- if ((i & 15) == 0) ++ if ((i & 15) == 0) { ++#if LJ_TARGET_RISCV64 ++ while (ins == 0xffffffffu) { i += 4; ins = *(uint32_t *)(p+i); } ++#endif + fprintf(ctx->fp, "\t.long 0x%08x", ins); +- else ++ } else ++#if LJ_TARGET_RISCV64 ++ if (ins != 0xffffffffu) ++#endif + fprintf(ctx->fp, ",0x%08x", ins); + if ((i & 15) == 12) putc('\n', ctx->fp); + } +@@ -156,6 +162,21 @@ static void emit_asm_wordreloc(BuildCtx "Error: unsupported opcode %08x for %s symbol relocation.\n", ins, sym); exit(1); -+#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64 -+ if ((ins & 0x7f) == 0x6fu) { -+ fprintf(ctx->fp, "\tjal %s\n", sym); ++#elif LJ_TARGET_RISCV64 ++ if (ins == 0x7fffffffu) { ++ fprintf(ctx->fp, "\tcall %s\n", sym); ++ } else if ((ins & 0x7f) == 0x17u) { ++ fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym); ++ } else if ((ins & 0x7f) == 0x67u) { ++ fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym); ++ } else if ((ins & 0x7f) == 0x6fu) { ++ fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym); + } else { + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", @@ -1555,21 +1600,21 @@ index 7baa011..b2a7daf 100644 #else #error "missing relocation support for this architecture" #endif -@@ -272,6 +281,9 @@ void emit_asm(BuildCtx *ctx) +@@ -249,6 +270,9 @@ void emit_asm(BuildCtx *ctx) #if LJ_TARGET_MIPS - fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); + fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); #endif +#if LJ_TARGET_RISCV64 + fprintf(ctx->fp, ".option arch, -c\n.option norelax\n"); +#endif + emit_asm_align(ctx, 4); - for (i = rel = 0; i < ctx->nsym; i++) { - int32_t ofs = ctx->sym[i].ofs; -diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua -index 90fe9da..88a6045 100644 ---- a/src/jit/bcsave.lua -+++ b/src/jit/bcsave.lua -@@ -97,6 +97,7 @@ local map_arch = { + #if LJ_TARGET_PS3 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/bcsave.lua +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/jit/bcsave.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/bcsave.lua +@@ -101,6 +101,7 @@ local map_arch = { mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, @@ -1577,12 +1622,11 @@ index 90fe9da..88a6045 100644 } local map_os = { -diff --git a/src/jit/dis_riscv.lua b/src/jit/dis_riscv.lua -new file mode 100644 -index 0000000..82b41aa +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv.lua +=================================================================== --- /dev/null -+++ b/src/jit/dis_riscv.lua -@@ -0,0 +1,772 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv.lua +@@ -0,0 +1,793 @@ +------------------------------------------------------------------------------ +-- LuaJIT RISC-V disassembler module. +-- @@ -1613,13 +1657,13 @@ index 0000000..82b41aa + +local map_quad0 = { + shift = 13, mask = 7, -+ [0] = "c.addi4spnAW", "c.fldNMh", "c.lwAMn", "c.flwNMn", -+ false, "c.fsdNMh", "c.swAMn", "c.fswNMn" ++ [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", ++ false, "c.fsdNMh", "c.swZMn", "c.fswNMn" +} + +local map_sub2quad1 = { + shift = 5, mask = 3, -+ [0] = "c.subMA", "c.xorMA", "c.orMA", "c.andMA" ++ [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" +} + +local map_sub1quad1 = { @@ -1921,7 +1965,7 @@ index 0000000..82b41aa + +local map_pri = { + [3] = map_load, [7] = map_fload, [15] = map_fence, [19] = map_ali, -+ [23] = "auipcDU", [27] = map_addi_shift, ++ [23] = "auipcDA", [27] = map_addi_shift, + [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_al, + [55] = "luiDU", [59] = map_arithw_shiftw, [67] = map_fmadd, [71] = map_fmsub, + [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, @@ -2164,7 +2208,7 @@ index 0000000..82b41aa + operands[#operands] = x + x = temp + end -+ elseif p == "A" then ++ elseif p == "Z" then + x = map_gpr[8 + band(rshift(op, 2), 7)] + elseif p == "N" then + x = map_fgpr[8 + band(rshift(op, 2), 7)] @@ -2174,7 +2218,7 @@ index 0000000..82b41aa + x = map_gpr[band(rshift(op, 2), 31)] + elseif p == "W" then + local uimm = parse_W(op) -+ x = format("%s,%d", "x2", uimm) ++ x = format("%s,%d", "sp", uimm) + elseif p == "x" then + x = parse_x(op) + elseif p == "h" then @@ -2184,7 +2228,7 @@ index 0000000..82b41aa + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "X" then + local imm = parse_X(op) -+ x = format("%s,%d", "x2", imm) ++ x = format("%s,%d", "sp", imm) + elseif p == "O" then + x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) + elseif p == "H" then @@ -2197,7 +2241,15 @@ index 0000000..82b41aa + x = arshift(op, 20) + --different for jalr + if(name == "jalr") then -+ operands[#operands] = format("%d(%s)", x, last) ++ local reg = map_gpr[band(rshift(op, 15), 31)] ++ if(ctx.reltab[reg] == nil) then ++ operands[#operands] = format("%d(%s)", x, last) ++ else ++ local target = ctx.reltab[reg] + x ++ operands[#operands] = format("%d(%s) #0x%08x", x, last, target) ++ ctx.rel = target ++ ctx.reltab[reg] = nil --assume no reuses of the register ++ end + x = nil --not to add additional operand + end + elseif p == "i" then @@ -2215,8 +2267,14 @@ index 0000000..82b41aa + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), + lshift(part3, 2)) + operands[#operands] = format("%d(%s)", uimm, last) ++ elseif p == "A" then ++ local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] ++ ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) ++ x = format("0x%x", value) + elseif p == "B" then -+ x = parse_B(op) ++ x = ctx.addr + ctx.pos + parse_B(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "U" then + local value = band(rshift(op, 12), 0xfffff) + x = string.format("0x%x", value) @@ -2226,11 +2284,15 @@ index 0000000..82b41aa + local part3 = band(rshift(op, 5), 3) --4:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 3)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "q" then -+ x = parse_q(op) ++ x = ctx.addr + ctx.pos + parse_q(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "J" then -+ x = parse_J(op) ++ x = ctx.addr + ctx.pos + parse_J(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "K" then + local value = parse_K(op) + x = string.format("0x%x", value) @@ -2240,28 +2302,30 @@ index 0000000..82b41aa + local part3 = band(rshift(op, 4), 7) --4:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 2)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "1" then + local part1 = band(rshift(op, 12), 1) --5 + local part2 = band(rshift(op, 2), 31) --4:0 + local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) + x = string.format("0x%x", uimm) + elseif p == "T" then -+ x = parse_T(op) ++ x = ctx.addr + ctx.pos + parse_T(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "t" then + local part1 = band(rshift(op, 7), 7) --8:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "u" then + local part1 = band(rshift(op, 7), 3) --7:6 + local part2 = band(rshift(op, 9), 15) --5:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "V" then + x = map_fgpr[band(rshift(op, 2), 31)] + elseif p == "0" then --PSEUDOINSTRUCTIONS -+ if (last == "x0" or last == 0) then ++ if (last == "zero" or last == 0) then + local n = #operands + operands[n] = nil + last = operands[n-1] @@ -2289,15 +2353,15 @@ index 0000000..82b41aa + local value = string.sub(operands[#operands], 1, 1) + local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) + if(value == "0" and -+ (operands[#operands - 1] == "x1" or operands[#operands - 1] == "x0")) then -+ if(operands[#operands - 1] == "x0") then ++ (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then ++ if(operands[#operands - 1] == "zero") then + name = altname + end + operands[#operands] = nil + operands[#operands] = reg + end + elseif (p == "2" and alias_done == false) then -+ if (last == "x0" or last == 0) then ++ if (last == "zero" or last == 0) then + local a1, a2 = match(altname, "([^|]*)|(.*)") + name = a2 + operands[#operands] = nil @@ -2335,6 +2399,7 @@ index 0000000..82b41aa + ctx.get = get_le + ctx.map_pri = map_pri + ctx.map_compr = map_compr ++ ctx.reltab = {} + return ctx +end + @@ -2355,11 +2420,10 @@ index 0000000..82b41aa + disass = disass, + regname = regname +} -diff --git a/src/jit/dis_riscv64.lua b/src/jit/dis_riscv64.lua -new file mode 100644 -index 0000000..ff038d1 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv64.lua +=================================================================== --- /dev/null -+++ b/src/jit/dis_riscv64.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv64.lua @@ -0,0 +1,16 @@ +---------------------------------------------------------------------------- +-- LuaJIT RISC-V 64 disassembler wrapper module. @@ -2378,15 +2442,15 @@ index 0000000..ff038d1 + regname = dis_riscv.regname +} \ No newline at end of file -diff --git a/src/lib_jit.c b/src/lib_jit.c -index 2867d42..b2ccfa2 100644 ---- a/src/lib_jit.c -+++ b/src/lib_jit.c -@@ -648,6 +648,75 @@ JIT_PARAMDEF(JIT_PARAMINIT) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lib_jit.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lib_jit.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lib_jit.c +@@ -631,6 +631,81 @@ JIT_PARAMDEF(JIT_PARAMINIT) #include #endif -+#if LJ_TARGET_RISCV64 ++#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX +#include +#include +static sigjmp_buf sigbuf = {0}; @@ -2397,11 +2461,11 @@ index 2867d42..b2ccfa2 100644 + +static int riscv_compressed() +{ -+#if defined(__riscv_compressed) -+ // Don't bother checking for RVC -- would crash before getting here. ++#if defined(__riscv_c) || defined(__riscv_compressed) ++ /* Don't bother checking for RVC -- would crash before getting here. */ + return 1; +#elif defined(__GNUC__) -+ // c.nop; c.nop; ++ /* c.nop; c.nop; */ + __asm__(".4byte 0x00010001"); + return 1; +#else @@ -2411,9 +2475,12 @@ index 2867d42..b2ccfa2 100644 + +static int riscv_zba() +{ -+#if defined(__GNUC__) -+ // Don't bother verifying the result, just check if the instruction exists. -+ // add.uw zero, zero, zero ++#if defined(__riscv_b) || defined(__riscv_zba) ++ /* Don't bother checking for Zba -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) ++ /* Don't bother verifying the result, just check if the instruction exists. */ ++ /* add.uw zero, zero, zero */ + __asm__(".4byte 0x0800003b"); + return 1; +#else @@ -2423,9 +2490,12 @@ index 2867d42..b2ccfa2 100644 + +static int riscv_zbb() +{ -+#if defined(__GNUC__) ++#if defined(__riscv_b) || defined(__riscv_zbb) ++ /* Don't bother checking for Zbb -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) + register int t asm ("a0"); -+ // addi a0, zero, 255; sext.b a0, a0; ++ /* addi a0, zero, 255; sext.b a0, a0; */ + __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); + return t < 0; +#else @@ -2437,11 +2507,11 @@ index 2867d42..b2ccfa2 100644 +{ +#if defined(__GNUC__) + register int t asm ("a0"); -+ // C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". -+ // Therefore assume XThead* are present if XTheadBb is present. -+ // addi a0, zero, 255; th.ext a0, a0, 7, 0; ++ /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */ ++ /* Therefore assume XThead* are present if XTheadBb is present. */ ++ /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */ + __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); -+ return t == -1; // In case of collision with other vendor extensions. ++ return t == -1; /* In case of collision with other vendor extensions. */ +#else + return 0; +#endif @@ -2458,13 +2528,13 @@ index 2867d42..b2ccfa2 100644 /* Arch-dependent CPU feature detection. */ static uint32_t jit_cpudetect(void) { -@@ -719,6 +788,22 @@ static uint32_t jit_cpudetect(void) +@@ -702,6 +777,22 @@ static uint32_t jit_cpudetect(void) } #endif +#elif LJ_TARGET_RISCV64 +#if LJ_HASJIT -+ // SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. ++ /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */ + struct sigaction old = {0}, act = {0}; + act.sa_handler = detect_sigill; + sigaction(SIGILL, &act, &old); @@ -2474,17 +2544,30 @@ index 2867d42..b2ccfa2 100644 + flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); + sigaction(SIGILL, &old, NULL); + -+ // Detect V/P? -+ // V have no hardware available, P not ratified yet. ++ /* Detect V/P? */ ++ /* V have no hardware available, P not ratified yet. */ +#endif + #else #error "Missing CPU detection for this architecture" #endif -diff --git a/src/lj_arch.h b/src/lj_arch.h -index bddd757..453414f 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_alloc.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_alloc.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_alloc.c +@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, siz + #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) + #define CALL_MREMAP_NOMOVE 0 + #define CALL_MREMAP_MAYMOVE 1 +-#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) ++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64) + #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE + #else + #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_arch.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_arch.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_arch.h @@ -31,6 +31,10 @@ #define LUAJIT_ARCH_mips32 6 #define LUAJIT_ARCH_MIPS64 7 @@ -2505,9 +2588,9 @@ index bddd757..453414f 100644 +#elif defined(__riscv) && __riscv_xlen == 64 +#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 #else - #error "No support for this architecture (yet)" + #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif -@@ -435,6 +443,32 @@ +@@ -439,6 +447,30 @@ #define LJ_ARCH_VERSION 10 #endif @@ -2522,7 +2605,7 @@ index bddd757..453414f 100644 +#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ +#define LJ_TARGET_RISCV64 1 +#define LJ_TARGET_GC64 1 -+#define LJ_TARGET_EHRETREG 0 // TODO ++#define LJ_TARGET_EHRETREG 10 +#define LJ_TARGET_EHRAREG 1 +#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ + AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ @@ -2530,8 +2613,6 @@ index bddd757..453414f 100644 +#define LJ_TARGET_MASKROT 1 +#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR, no ROLI */ +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -+// for now -+#define LUAJIT_NO_UNWIND 1 + +#else +#error "No support for RISC-V 64 Soft-float/Single-float" @@ -2540,9 +2621,9 @@ index bddd757..453414f 100644 #else #error "No target architecture defined" #endif -@@ -518,6 +552,13 @@ - /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ +@@ -531,6 +563,13 @@ #error "Only n64 ABI supported for MIPS64" + #undef LJ_TARGET_MIPS #endif +#elif LJ_TARGET_RISCV +#if !defined(__riscv_float_abi_double) @@ -2554,11 +2635,11 @@ index bddd757..453414f 100644 #endif #endif -diff --git a/src/lj_asm.c b/src/lj_asm.c -index 6f5e0c4..d881ea1 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -185,6 +185,8 @@ IRFLDEF(FLOFS) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_asm.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm.c +@@ -227,6 +227,8 @@ static Reg rset_pickrandom(ASMState *as, #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS #include "lj_emit_mips.h" @@ -2567,7 +2648,7 @@ index 6f5e0c4..d881ea1 100644 #else #error "Missing instruction emitter for target CPU" #endif -@@ -1662,6 +1664,8 @@ static void asm_loop(ASMState *as) +@@ -1708,6 +1710,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS #include "lj_asm_mips.h" @@ -2576,12 +2657,11 @@ index 6f5e0c4..d881ea1 100644 #else #error "Missing assembler for target CPU" #endif -diff --git a/src/lj_asm_riscv64.h b/src/lj_asm_riscv64.h -new file mode 100644 -index 0000000..e6d68e1 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm_riscv64.h +=================================================================== --- /dev/null -+++ b/src/lj_asm_riscv64.h -@@ -0,0 +1,1969 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm_riscv64.h +@@ -0,0 +1,1976 @@ +/* +** RISC-V IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h @@ -2863,16 +2943,18 @@ index 0000000..e6d68e1 +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; -+ if (lref != rref && ++ if ((as->flags & JIT_F_OPT_FMA) && ++ lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); -+ Reg left = ra_alloc2(as, irm, rset_exclude(rset_exclude(RSET_FPR, dest), add)); ++ Reg left = ra_alloc2(as, irm, ++ rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; -+ emit_ds1s2s3(as, riscvi, (dest & 0x1f), (left & 0x1f), (right & 0x1f), (add & 0x1f)); ++ emit_ds1s2s3(as, riscvi, dest, left, right, add); + return 1; + } + return 0; @@ -3163,7 +3245,7 @@ index 0000000..e6d68e1 + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); -+ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, ofs); ++ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); +} + +/* -- Memory references --------------------------------------------------- */ @@ -3210,7 +3292,7 @@ index 0000000..e6d68e1 + } + } + /* g->tmptv holds the TValue(s). */ -+ emit_opk(as, RISCVI_ADDI, dest, RID_GL, offsetof(global_State, tmptv)); ++ emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv)); +} + +static void asm_aref(ASMState *as, IRIns *ir) @@ -3230,7 +3312,7 @@ index 0000000..e6d68e1 + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); -+ emit_sh3add(as, dest, base, idx); ++ emit_sh3add(as, dest, base, idx, RID_TMP); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. @@ -3347,12 +3429,12 @@ index 0000000..e6d68e1 + emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); -+ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, (-HASH_ROT3)&0x1f); ++ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); + emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); -+ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, (-HASH_ROT2-HASH_ROT1)&0x1f); ++ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); + emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); + emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); -+ emit_roti(as, RISCVI_RORIW, dest, tmp1, (-HASH_ROT1)&0x1f); ++ emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); + if (irt_isnum(kt)) { + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi @@ -3379,7 +3461,6 @@ index 0000000..e6d68e1 + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; -+ Reg key = ra_scratch(as, allow); + int64_t k; + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (bigofs) { @@ -3397,9 +3478,8 @@ index 0000000..e6d68e1 + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } -+ key = ra_scratch(as, allow); -+ asm_guard(as, RISCVI_BNE, key, ra_allock(as, k, allow)); -+ emit_lso(as, RISCVI_LD, key, idx, kofs); ++ asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); ++ emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); + if (bigofs) + emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); +} @@ -3407,22 +3487,29 @@ index 0000000..e6d68e1 +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); -+ if (irref_isk(ir->op1)) { ++ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); ++ if (irref_isk(ir->op1) && !guarded) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); + } else { -+ Reg uv = ra_scratch(as, RSET_GPR); -+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR); -+ if (ir->o == IR_UREFC) { -+ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_dsi(as, RISCVI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); -+ emit_lso(as, RISCVI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); ++ if (guarded) ++ asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); ++ if (ir->o == IR_UREFC) ++ emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); ++ else ++ emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); ++ if (guarded) ++ emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); ++ emit_loada(as, dest, o); + } else { -+ emit_lso(as, RISCVI_LD, dest, uv, (int32_t)offsetof(GCupval, v)); ++ emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), ++ (int32_t)offsetof(GCfuncL, uvptr) + ++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } -+ emit_lso(as, RISCVI_LD, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + -+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } +} + @@ -3494,9 +3581,9 @@ index 0000000..e6d68e1 + } + } + ofs = field_ofs[ir->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + } + rset_clear(allow, idx); -+ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + emit_lso(as, riscvi, dest, idx, ofs); +} + @@ -3907,8 +3994,7 @@ index 0000000..e6d68e1 + if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); + if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ + left = ra_alloc1(as, ir->op1, RSET_GPR); -+ asm_guard(as, k >= 0 ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_ds1s2(as, RISCVI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left); ++ asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); + emit_dsi(as, RISCVI_ADDI, dest, left, k); + if (dest == left) emit_mv(as, RID_TMP, left); + return; @@ -3962,7 +4048,7 @@ index 0000000..e6d68e1 +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -+ RegSet allow = rset_exclude(RSET_GPR, dest); ++ RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); + if (as->flags & JIT_F_RVZbb) { + if (!irt_is64(ir->t)) + emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); @@ -4066,7 +4152,7 @@ index 0000000..e6d68e1 + emit_dsshamt(as, riscvik, dest, left, shift); + break; + case RISCVI_RORI: case RISCVI_RORIW: -+ emit_roti(as, riscvik, dest, left, shift); ++ emit_roti(as, riscvik, dest, left, RID_TMP, shift); + break; + default: + lj_assertA(0, "bad shift instruction"); @@ -4081,7 +4167,7 @@ index 0000000..e6d68e1 + break; + case RISCVI_ROR: case RISCVI_ROL: + case RISCVI_RORW: case RISCVI_ROLW: -+ emit_rot(as, riscvi, dest, left, right); ++ emit_rot(as, riscvi, dest, left, right, RID_TMP); + break; + default: + lj_assertA(0, "bad shift instruction"); @@ -4132,10 +4218,12 @@ index 0000000..e6d68e1 + } else { + emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); + if (dest != right) { -+ emit_andn(as, RID_TMP, right, RID_TMP, RID_TMP); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, right, RID_TMP); ++ emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP); + emit_ds1s2(as, RISCVI_AND, dest, left, RID_TMP); + } else { -+ emit_andn(as, RID_TMP, left, RID_TMP, RID_TMP); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); ++ emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP); + emit_ds1s2(as, RISCVI_AND, dest, right, RID_TMP); + } + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); @@ -4208,9 +4296,8 @@ index 0000000..e6d68e1 + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ asm_guard(as, ((op^(op>>1))&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_ds1s2(as, (op&4) ? RISCVI_SLTU : RISCVI_SLT, -+ RID_TMP, (op&2) ? right : left, (op&2) ? left : right); ++ asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), ++ (op&2) ? right : left, (op&2) ? left : right); +} + +static void asm_comp(ASMState *as, IRIns *ir) @@ -4412,7 +4499,7 @@ index 0000000..e6d68e1 +} + +/* Coalesce BASE register for a side trace. */ -+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) ++static Reg asm_head_side_base(ASMState *as, IRIns *irp) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; @@ -4421,15 +4508,15 @@ index 0000000..e6d68e1 + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { -+ rset_clear(allow, r); /* Mark same BASE register as coalesced. */ ++ return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { -+ rset_clear(allow, irp->r); + emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ ++ return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } + } -+ return allow; ++ return RID_NONE; +} + +/* -- Tail of trace ------------------------------------------------------- */ @@ -4489,7 +4576,7 @@ index 0000000..e6d68e1 + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; -+ return REGSP_HINT(RID_RET); ++ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); +} + +static void asm_setup_target(ASMState *as) @@ -4544,18 +4631,18 @@ index 0000000..e6d68e1 + lj_assertJ(checki32(delta), "jump target out of range"); + p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); -+ if (!cstart) cstart = p + 2; ++ if (!cstart) cstart = p; + } + } + } + if (cstart) lj_mcode_sync(cstart, px+1); + lj_mcode_patch(J, mcarea, 1); +} -diff --git a/src/lj_ccall.c b/src/lj_ccall.c -index 25f54de..e108555 100644 ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -574,6 +574,97 @@ +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_ccall.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.c +@@ -575,6 +575,97 @@ goto done; \ } @@ -4653,7 +4740,7 @@ index 25f54de..e108555 100644 #else #error "Missing calling convention definitions for this architecture" #endif -@@ -889,6 +980,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, +@@ -891,6 +982,51 @@ static void ccall_copy_struct(CCallState #endif @@ -4705,7 +4792,7 @@ index 25f54de..e108555 100644 /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ -@@ -935,6 +1071,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, +@@ -937,6 +1073,10 @@ static int ccall_set_args(lua_State *L, #endif #endif @@ -4716,7 +4803,7 @@ index 25f54de..e108555 100644 /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); #if CCALL_NUM_FPR -@@ -1060,7 +1200,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, +@@ -1077,7 +1217,11 @@ static int ccall_set_args(lua_State *L, if (isfp && d->size == sizeof(float)) ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ #endif @@ -4729,7 +4816,7 @@ index 25f54de..e108555 100644 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) -@@ -1090,13 +1234,21 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, +@@ -1107,6 +1251,14 @@ static int ccall_set_args(lua_State *L, CTSize i = (sz >> 2) - 1; do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); } @@ -4744,19 +4831,20 @@ index 25f54de..e108555 100644 #else UNUSED(isfp); #endif - } - if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ +@@ -1116,7 +1268,7 @@ static int ccall_set_args(lua_State *L, + if ((int32_t)nsp < 0) nsp = 0; + #endif -#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 cc->nfpr = nfpr; /* Required for vararg functions. */ #endif - cc->nsp = nsp; -diff --git a/src/lj_ccall.h b/src/lj_ccall.h -index 0b3c524..aa51d5f 100644 ---- a/src/lj_ccall.h -+++ b/src/lj_ccall.h -@@ -126,6 +126,21 @@ typedef union FPRArg { + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_ccall.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.h +@@ -129,6 +129,21 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; @@ -4778,7 +4866,7 @@ index 0b3c524..aa51d5f 100644 #else #error "Missing calling convention definitions for this architecture" #endif -@@ -168,7 +183,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { +@@ -175,7 +190,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ #elif LJ_TARGET_ARM64 void *retp; /* Aggregate return pointer in x8. */ @@ -4787,11 +4875,11 @@ index 0b3c524..aa51d5f 100644 uint8_t nfpr; /* Number of arguments in FPRs. */ #endif #if LJ_32 -diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c -index 43e4430..94a9207 100644 ---- a/src/lj_ccallback.c -+++ b/src/lj_ccallback.c -@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccallback.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_ccallback.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccallback.c +@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs #define CALLBACK_MCODE_HEAD 52 @@ -4802,7 +4890,7 @@ index 43e4430..94a9207 100644 #else /* Missing support for this architecture. */ -@@ -238,6 +242,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) +@@ -238,6 +242,39 @@ static void *callback_mcode_init(global_ } return p; } @@ -4842,14 +4930,10 @@ index 43e4430..94a9207 100644 #else /* Missing support for this architecture. */ #define callback_mcode_init(g, p) (p) -@@ -512,6 +549,31 @@ void lj_ccallback_mcode_free(CTState *cts) - } - #endif +@@ -516,6 +553,31 @@ void lj_ccallback_mcode_free(CTState *ct + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; -+#define CALLBACK_HANDLE_RET \ -+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ -+ ((float *)dp)[1] = *(float *)dp; -+ +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_HANDLE_REGARG \ @@ -4871,10 +4955,14 @@ index 43e4430..94a9207 100644 + } \ + } + - #define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ((float *)dp)[1] = *(float *)dp; -@@ -662,7 +724,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ((float *)dp)[1] = *(float *)dp; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -662,7 +724,7 @@ static void callback_conv_result(CTState *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } @@ -4883,97 +4971,16 @@ index 43e4430..94a9207 100644 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) -diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c -index ded382a..32688a0 100644 ---- a/src/lj_dispatch.c -+++ b/src/lj_dispatch.c -@@ -56,6 +56,15 @@ static const ASMFunction dispatch_got[] = { - #undef GOTFUNC - #endif - -+#if LJ_TARGET_RISCV64 -+#include -+#define GOTFUNC(name) (ASMFunction)name, -+static const ASMFunction dispatch_got[] = { -+ GOTDEF(GOTFUNC) -+}; -+#undef GOTFUNC -+#endif -+ - /* Initialize instruction dispatch table and hot counters. */ - void lj_dispatch_init(GG_State *GG) - { -@@ -76,7 +85,7 @@ void lj_dispatch_init(GG_State *GG) - GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); - for (i = 0; i < GG_NUM_ASMFF; i++) - GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); --#if LJ_TARGET_MIPS -+#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 - memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); - #endif - } -diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h -index 52762ee..51f10ed 100644 ---- a/src/lj_dispatch.h -+++ b/src/lj_dispatch.h -@@ -66,6 +66,35 @@ GOTDEF(GOTENUM) - }; - #endif - -+#if LJ_TARGET_RISCV64 -+/* Need our own global offset table to wrap RISC-V PIC intern / extern calls */ -+ -+#if LJ_HASJIT -+#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) -+#else -+#define JITGOTDEF(_) -+#endif -+#if LJ_HASFFI -+#define FFIGOTDEF(_) \ -+ _(lj_meta_equal_cd) _(lj_ccallback_enter) _(lj_ccallback_leave) -+#else -+#define FFIGOTDEF(_) -+#endif -+ -+#define GOTDEF(_) \ -+ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ -+ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ -+ _(pow) _(fmod) _(ldexp) \ -+ JITGOTDEF(_) FFIGOTDEF(_) -+ -+enum { -+#define GOTENUM(name) LJ_GOT_##name, -+GOTDEF(GOTENUM) -+#undef GOTENUM -+ LJ_GOT__MAX -+}; -+#endif -+ - /* Type of hot counter. Must match the code in the assembler VM. */ - /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ - typedef uint16_t HotCount; -@@ -93,7 +122,7 @@ typedef struct GG_State { - /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ - uint8_t align1[(16-sizeof(global_State))&15]; - #endif --#if LJ_TARGET_MIPS -+#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 - ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ - #endif - #if LJ_HASJIT -diff --git a/src/lj_emit_riscv.h b/src/lj_emit_riscv.h -new file mode 100644 -index 0000000..c7273d5 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_emit_riscv.h +=================================================================== --- /dev/null -+++ b/src/lj_emit_riscv.h -@@ -0,0 +1,516 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_emit_riscv.h +@@ -0,0 +1,519 @@ +/* +** RISC-V instruction emitter. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + -+#include "lj_target.h" -+#include +static intptr_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); @@ -5068,7 +5075,8 @@ index 0000000..c7273d5 + } +} + -+static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t shamt) ++static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, ++ int32_t shamt) +{ + if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { + if (as->flags & JIT_F_RVXThead) switch (riscvi) { @@ -5093,13 +5101,13 @@ index 0000000..c7273d5 + lj_assertA(0, "invalid roti op"); + return; + } -+ emit_ds1s2(as, RISCVI_OR, rd, rd, RID_TMP); ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); -+ emit_dsshamt(as, ai, RID_TMP, rs1, shamt&shmsk); ++ emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); + } +} + -+static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) ++static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, riscvi, rd, rs1, rs2); @@ -5123,15 +5131,15 @@ index 0000000..c7273d5 + return; + } + if (rd == rs2) { -+ emit_ds1s2(as, RISCVI_OR, rd, rd, RID_TMP); -+ emit_ds1s2(as, sbi, RID_TMP, rs1, RID_TMP); ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); -+ emit_ds2(as, RISCVI_NEG, RID_TMP, rs2); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); + } else { -+ emit_ds1s2(as, RISCVI_OR, rd, rd, RID_TMP); ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); -+ emit_ds1s2(as, sbi, RID_TMP, rs1, RID_TMP); -+ emit_ds2(as, RISCVI_NEG, RID_TMP, rs2); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); + } + } +} @@ -5209,6 +5217,7 @@ index 0000000..c7273d5 + } +} + ++/* +static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { @@ -5218,7 +5227,9 @@ index 0000000..c7273d5 + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} ++*/ + ++/* +static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { @@ -5228,6 +5239,7 @@ index 0000000..c7273d5 + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} ++*/ + +static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) +{ @@ -5239,7 +5251,7 @@ index 0000000..c7273d5 + } +} + -+static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, unsigned int shamt) ++static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) +{ + if (as->flags & JIT_F_RVZba) { + switch (shamt) { @@ -5251,14 +5263,14 @@ index 0000000..c7273d5 + } else if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); + } else { -+ emit_ds1s2(as, RISCVI_ADD, rd, rs1, RID_TMP); -+ emit_dsshamt(as, RISCVI_SLLI, RID_TMP, rs2, 3); ++ emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); ++ emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); + } +} + -+#define emit_sh1add(as, rd, rs1, rs2) emit_shxadd(as, rd, rs1, rs2, 1) -+#define emit_sh2add(as, rd, rs1, rs2) emit_shxadd(as, rd, rs1, rs2, 2) -+#define emit_sh3add(as, rd, rs1, rs2) emit_shxadd(as, rd, rs1, rs2, 3) ++#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) ++#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) ++#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) + +static void emit_loadk12(ASMState *as, Reg rd, int32_t i) +{ @@ -5440,7 +5452,7 @@ index 0000000..c7273d5 + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, -+ intptr_t k) ++ Reg tmp, intptr_t k) +{ + if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); + else { @@ -5451,8 +5463,8 @@ index 0000000..c7273d5 + case RISCVI_ANDI: riscvi = RISCVI_AND; break; + default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; + } -+ emit_ds1s2(as, riscvi, dest, src, RID_TMP); -+ emit_loadu64(as, RID_TMP, (uintptr_t)k); ++ emit_ds1s2(as, riscvi, dest, src, tmp); ++ emit_loadu64(as, tmp, (uintptr_t)k); + } +} + @@ -5478,16 +5490,16 @@ index 0000000..c7273d5 +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) -+ emit_opk(as, RISCVI_ADDI, r, r, ofs); ++ emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); +} + + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) -diff --git a/src/lj_frame.h b/src/lj_frame.h -index aa1dc11..dace63d 100644 ---- a/src/lj_frame.h -+++ b/src/lj_frame.h -@@ -264,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_frame.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_frame.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_frame.h +@@ -264,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL #endif #define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 @@ -5503,10 +5515,50 @@ index aa1dc11..dace63d 100644 #else #error "Missing CFRAME_* definitions for this architecture" #endif -diff --git a/src/lj_jit.h b/src/lj_jit.h -index 7f08173..5f91644 100644 ---- a/src/lj_jit.h -+++ b/src/lj_jit.h +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_gdbjit.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_gdbjit.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_gdbjit.c +@@ -306,6 +306,9 @@ enum { + #elif LJ_TARGET_MIPS + DW_REG_SP = 29, + DW_REG_RA = 31, ++#elif LJ_TARGET_RISCV64 ++ DW_REG_SP = 2, ++ DW_REG_RA = 1, + #else + #error "Unsupported target architecture" + #endif +@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = + .machine = 20, + #elif LJ_TARGET_MIPS + .machine = 8, ++#elif LJ_TARGET_RISCV64 ++ .machine = 243, + #else + #error "Unsupported target architecture" + #endif +@@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(G + for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } + for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } + } ++#elif LJ_TARGET_RISCV64 ++ { ++ int i; ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } ++ DB(DW_CFA_offset|9); DUV(17); ++ DB(DW_CFA_offset|8); DUV(18); ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } ++ DB(DW_CFA_offset|32|9); DUV(29); ++ DB(DW_CFA_offset|32|8); DUV(30); ++ } + #else + #error "Unsupported target architecture" + #endif +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_jit.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_jit.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_jit.h @@ -67,6 +67,15 @@ #endif #endif @@ -5523,10 +5575,45 @@ index 7f08173..5f91644 100644 #else #define JIT_F_CPUSTRING "" -diff --git a/src/lj_target.h b/src/lj_target.h -index 1971692..5423a2e 100644 ---- a/src/lj_target.h -+++ b/src/lj_target.h +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_mcode.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_mcode.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_mcode.c +@@ -38,6 +38,12 @@ + void sys_icache_invalidate(void *start, size_t len); + #endif + ++#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#include ++#include ++#include ++#endif ++ + /* Synchronize data/instruction cache. */ + void lj_mcode_sync(void *start, void *end) + { +@@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *en + sys_icache_invalidate(start, (char *)end-(char *)start); + #elif LJ_TARGET_PPC + lj_vm_cachesync(start, end); ++#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#if (defined(__GNUC__) || defined(__clang__)) ++ __asm__ volatile("fence rw, rw"); ++#else ++ lj_vm_fence_rw_rw(); ++#endif ++#ifdef __GLIBC__ ++ __riscv_flush_icache(start, end, 0); ++#else ++ syscall(__NR_riscv_flush_icache, start, end, 0UL); ++#endif + #elif defined(__GNUC__) || defined(__clang__) + __clear_cache(start, end); + #else +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_target.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target.h @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. @@ -5534,18 +5621,9 @@ index 1971692..5423a2e 100644 -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 typedef uint64_t RegSet; - #else - typedef uint32_t RegSet; -@@ -69,7 +69,7 @@ typedef uint32_t RegSet; - #define rset_set(rs, r) (rs |= RID2RSET(r)) - #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) - #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) --#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 - #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) - #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) - #else -@@ -144,6 +144,8 @@ typedef uint32_t RegCost; + #define RSET_BITS 6 + #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +@@ -143,6 +143,8 @@ typedef uint32_t RegCost; #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" @@ -5554,11 +5632,10 @@ index 1971692..5423a2e 100644 #else #error "Missing include for target CPU" #endif -diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h -new file mode 100644 -index 0000000..5089493 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target_riscv.h +=================================================================== --- /dev/null -+++ b/src/lj_target_riscv.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target_riscv.h @@ -0,0 +1,513 @@ +/* +** Definitions for RISC-V CPUs. @@ -6073,11 +6150,25 @@ index 0000000..5089493 +} RISCVRM; + +#endif -diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c -index 4fa79ae..842cfba 100644 ---- a/src/lj_vmmath.c -+++ b/src/lj_vmmath.c -@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vm.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_vm.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vm.h +@@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint + #if LJ_TARGET_PPC + void lj_vm_cachesync(void *start, void *end); + #endif ++#if LJ_TARGET_RISCV64 ++void lj_vm_fence_rw_rw(); ++#endif + LJ_ASMF double lj_vm_foldarith(double x, double y, int op); + #if LJ_HASJIT + LJ_ASMF double lj_vm_foldfpm(double x, int op); +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vmmath.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_vmmath.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vmmath.c +@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double /* -- Helper functions for generated machine code ------------------------- */ @@ -6087,17 +6178,16 @@ index 4fa79ae..842cfba 100644 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; -diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc -new file mode 100644 -index 0000000..c329123 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/vm_riscv64.dasc +=================================================================== --- /dev/null -+++ b/src/vm_riscv64.dasc -@@ -0,0 +1,4677 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/vm_riscv64.dasc +@@ -0,0 +1,4866 @@ +|// Low-level VM code for RISC-V 64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. -+|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h +|// -+|// Contributed by Raymond Wong from PLCT Lab, ISCAS. ++|// Contributed by gns from PLCT Lab, ISCAS. +|// Sponsored by PLCT Lab, ISCAS. +| +|.arch riscv64 @@ -6524,19 +6614,19 @@ index 0000000..c329123 +|.endmacro +| +|// Assumes J is relative to GL. Some J members might be out of range though. -+#define GG_G2GOT (GG_OFS(got) - GG_OFS(g)) +#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) -+#define GL_GOT(name) (GG_G2GOT + sizeof(void*)*LJ_GOT_##name) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| -+|.macro load_got, func -+| ld CFUNCADDR, GL_GOT(func)(GL) ++|.macro call_intern, curfunc, func ++|->curfunc .. _pcrel_ .. func: ++| auipc CFUNCADDR, extern %pcrel_hi(func) ++| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) ++|.endmacro ++|.macro call_extern, func ++| call extern func ++| empty +|.endmacro -+|// JAL should be enough for *most* internal jumps. -+|.macro call_intern, func; jalr CFUNCADDR; .endmacro -+|.macro call_extern; jalr CFUNCADDR; .endmacro -+|.macro jmp_extern; jr CFUNCADDR; .endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro @@ -6575,11 +6665,18 @@ index 0000000..c329123 +|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. -+|.macro settp, dst, tp; -+| cleartp dst ++|.macro settp_a, dst; cleartp dst; .endmacro ++|.macro settp_a, dst, src; cleartp dst, src; .endmacro ++|.macro settp_b, dst, tp; +| slli x31, tp, 47 +| or dst, dst, x31 +|.endmacro ++|.macro settp_b, dst, src, tp; ++| slli x31, tp, 47 ++| or dst, src, x31 ++|.endmacro ++|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro ++|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; srai dst, src, 47; .endmacro @@ -6719,7 +6816,7 @@ index 0000000..c329123 + | mv MULTRES, RD + | srliw CARG2, TMP2, 3 + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n) + | lw TMP2, SAVE_NRES(sp) + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | mv RD, MULTRES @@ -6780,7 +6877,7 @@ index 0000000..c329123 + | // L->base = new base, L->top = top + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) @@ -6984,7 +7081,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) ++ | // (lua_State *L, TValue *o, TValue *k) ++ | call_intern vmeta_tgetv, lj_meta_tget + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + | ld TMP0, 0(CRET1) @@ -7004,7 +7102,7 @@ index 0000000..c329123 + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: -+ | jal extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | mv TMP1, TISNIL + | bxeqz CRET1, ->BC_TGETR_Z @@ -7041,7 +7139,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) ++ | // (lua_State *L, TValue *o, TValue *k) ++ | call_intern vmeta_tsetv, lj_meta_tset + | // Returns TValue * (finished) or NULL (metamethod). + | ld TMP2, 0(RA) + | beqz CRET1, >3 @@ -7066,7 +7165,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) ++ | // (lua_State *L, GCtab *t, int32_t key) ++ | call_intern vmeta_tsetr, lj_tab_setinth + | // Returns TValue *. + | j ->BC_TSETR_Z + | @@ -7081,7 +7181,8 @@ index 0000000..c329123 + | mv CARG1, L + | decode_OP1 CARG4, INS + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) ++ | // (lua_State *L, TValue *o1, *o2, int op) ++ | call_intern vmeta_comp, lj_meta_comp + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu TMP1, CRET1, 2 @@ -7129,7 +7230,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) ++ | // (lua_State *L, GCobj *o1, *o2, int ne) ++ | call_intern vmeta_equal, lj_meta_equal + | // Returns 0/1 or TValue * (metamethod). + | j <3 + | @@ -7140,7 +7242,7 @@ index 0000000..c329123 + | mv CARG2, INS + | sd BASE, L->base + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_equal_cd // (lua_State *L, BCIns op) ++ | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | j <3 + |.endif @@ -7152,7 +7254,8 @@ index 0000000..c329123 + | srliw CARG2, RA, 3 + | srliw CARG3, RD, 3 + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) ++ | // (lua_State *L, TValue *o, BCReg tp) ++ | call_intern vmeta_istype, lj_meta_istype + | j ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- @@ -7168,7 +7271,8 @@ index 0000000..c329123 + | mv CARG3, RB + | mv CARG4, RC + | decode_OP1 CARG5, INS -+ | jal extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | call_intern vmeta_arith, lj_meta_arith + | // Returns NULL (finished) or TValue * (metamethod). + | bxeqz CRET1, ->cont_nop + | @@ -7191,7 +7295,7 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_len // (lua_State *L, TValue *o) ++ | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bxnez CRET1, ->vmeta_binop // Binop call for compatibility. @@ -7211,7 +7315,8 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | add CARG3, BASE, RC + | mv MULTRES, NARGS8:RC -+ | jal extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | // (lua_State *L, TValue *func, TValue *top) ++ | call_intern vmeta_call, lj_meta_call + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB @@ -7225,7 +7330,8 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | add CARG3, RA, RC + | mv MULTRES, NARGS8:RC -+ | jal extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | // (lua_State *L, TValue *func, TValue *top) ++ | call_intern vmeta_callt, lj_meta_call + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. @@ -7240,7 +7346,7 @@ index 0000000..c329123 + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv MULTRES, INS -+ | jal extern lj_meta_for // (lua_State *L, TValue *base) ++ | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) + |.if JIT + | decode_OP1 TMP0, MULTRES + | li TMP1, BC_JFORI @@ -7330,9 +7436,8 @@ index 0000000..c329123 + | + |.ffunc_1 type + | gettp TMP0, CARG1 -+ | sltu TMP2, TISNUM, TMP0 + | not TMP3, TMP0 -+ | bnez TMP2, >1 ++ | bltu TISNUM, TMP0, >1 + | li TMP3, ~LJ_TISNUM + |1: + | slli TMP3, TMP3, 3 @@ -7375,8 +7480,7 @@ index 0000000..c329123 + | beq RC, TMP0, >5 + | bnez NODE:TMP2, <3 + |4: -+ | mv CARG1, RB -+ | settp CARG1, TMP3 ++ | settp CARG1, RB, TMP3 + | j ->fff_restv // Not found, keep default result. + |5: + | bxne CARG1, TISNIL, ->fff_restv @@ -7419,7 +7523,7 @@ index 0000000..c329123 + | addi CARG3, BASE, 8 + | bxnez TMP0, ->fff_fallback + | mv CARG1, L -+ | jal extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) ++ | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | ld CARG1, 0(CRET1) + | j ->fff_restv @@ -7452,7 +7556,7 @@ index 0000000..c329123 + | ffgccheck + | mv CARG1, L + | mv CARG2, BASE -+ | jal extern lj_strfmt_number // (lua_State *L, cTValue *o) ++ | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | li TMP1, LJ_TSTR + |// ld BASE, L->base @@ -7468,7 +7572,7 @@ index 0000000..c329123 + | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil. + | addi CARG2, BASE, 8 + | addi CARG3, BASE, -16 -+ | jal extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + |// addi RA, BASE, -16 + | li RD, (2+1)*8 @@ -7510,7 +7614,7 @@ index 0000000..c329123 + | sltu TMP3, TMP2, TMP0 + | addi RA, BASE, -16 + | zext.w TMP0, TMP2 -+ | settp TMP0, TISNUM ++ | settp_b TMP0, TISNUM + | sd TMP0, 0(RA) + | beqz TMP3, >2 // Not in array part? + | slli TMP3, TMP2, 3 @@ -7527,7 +7631,7 @@ index 0000000..c329123 + | li RD, (0+1)*8 + | bxeqz TMP0, ->fff_res + | mv CARG2, TMP2 -+ | jal extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | li RD, (0+1)*8 + | bxeqz CRET1, ->fff_res @@ -7555,6 +7659,9 @@ index 0000000..c329123 + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall ++ | ld TMP1, L->maxstack ++ | add TMP2, BASE, NARGS8:RC ++ | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:RC, NARGS8:RC, -8 + | lbu TMP3, GL->hookmask + | mv TMP2, BASE @@ -7575,6 +7682,9 @@ index 0000000..c329123 + | j ->vm_call_dispatch + | + |.ffunc xpcall ++ | ld TMP1, L->maxstack ++ | add TMP2, BASE, NARGS8:RC ++ | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) @@ -7707,13 +7817,19 @@ index 0000000..c329123 + |.else + | mv CARG1, L + | mv CARG2, L:RA -+ | jal extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) ++ | // (lua_State *L, lua_State *co) ++ | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err + |.endif + | + |9: // Handle stack expansion on return from yield. + | mv CARG1, L + | srliw CARG2, RD, 3 -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | // (lua_State *L, int n) ++ |.if resume ++ | call_intern ff_coroutine_resume, lj_state_growstack ++ |.else ++ | call_intern ff_coroutine_wrap_aux, lj_state_growstack ++ |.endif + | mv CRET1, x0 + | j <4 + |.endmacro @@ -7803,15 +7919,13 @@ index 0000000..c329123 + | + |.macro math_extern, func + | .ffunc_n math_ .. func -+ | load_got func -+ | call_extern ++ | call_extern func + | j ->fff_resn + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func -+ | load_got func -+ | call_extern ++ | call_extern func + | j ->fff_resn + |.endmacro + | @@ -7827,9 +7941,8 @@ index 0000000..c329123 + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. -+ | load_got log + | checknum CARG1, ->fff_fallback -+ | call_extern ++ | call_extern log + | j ->fff_resn + | + | math_extern log10 @@ -7850,17 +7963,15 @@ index 0000000..c329123 + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback -+ | load_got ldexp // (double x, int exp) + | fld FARG1, 0(BASE) + | lw CARG1, 8(BASE) -+ | call_extern ++ | call_extern ldexp // (double x, int exp) + | j ->fff_resn + | + |.ffunc_n math_frexp -+ | load_got frexp + | ld PC, FRAME_PC(BASE) + | addi CARG1, GL, offsetof(global_State, tmptv) -+ | call_extern ++ | call_extern frexp + | lw TMP1, GL->tmptv + | fcvt.d.w FARG2, TMP1 + | fsd FRET1, -16(BASE) @@ -7869,10 +7980,9 @@ index 0000000..c329123 + | j ->fff_res + | + |.ffunc_n math_modf -+ | load_got modf + | addi CARG1, BASE, -16 + | ld PC, FRAME_PC(BASE) -+ | call_extern ++ | call_extern modf + | fsd FRET1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res @@ -7900,7 +8010,7 @@ index 0000000..c329123 + | or CARG1, CARG1, CARG2 + | addi RA, RA, 8 + | zext.w CARG1, CARG1 -+ | settp CARG1, TISNUM ++ | settp_b CARG1, TISNUM + | j <1 + |3: // Convert intermediate result to number and continue below. + | fcvt.d.w FARG1, CARG1 @@ -7945,7 +8055,7 @@ index 0000000..c329123 + | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). + | addiw RD, RD, 1 + | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8 -+ | settp TMP2, TISNUM ++ | settp_b TMP2, TISNUM + | sd TMP2, -16(BASE) + | j ->fff_res + | @@ -7968,7 +8078,8 @@ index 0000000..c329123 + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_str_new // (lua_State *L, char *str, size_t l) ++ | // (lua_State *L, const char *str, size_t l) ++ | call_intern fff_newstr, lj_str_new + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: @@ -8035,9 +8146,9 @@ index 0000000..c329123 + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->w + | sd PC, SAVE_PC(sp) -+ | jal extern lj_buf_putstr_ .. name ++ | call_intern ff_string_ .. name, lj_buf_putstr_ .. name + |// mv SBUF:CARG1, SBUF:CRET1 -+ | jal extern lj_buf_tostr ++ | call_intern ff_string_ .. name, lj_buf_tostr + | ld BASE, L->base + | j ->fff_resstr + |.endmacro @@ -8207,7 +8318,7 @@ index 0000000..c329123 + |5: // Grow stack for fallback handler. + | li CARG2, LUA_MINSTACK + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | mv CRET1, x0 // Set zero-flag to force retry. + | j <1 @@ -8220,14 +8331,14 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | mv CARG1, L + | sd TMP0, L->top -+ | jal extern lj_gc_step // (lua_State *L) ++ | call_intern fff_gc_step, lj_gc_step // (lua_State *L) + | ld BASE, L->base -+ |// mv ra, MULTRES ++ | mv ra, MULTRES // Help return address predictor. + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | sub NARGS8:RC, TMP0, BASE -+ | jr MULTRES ++ | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- @@ -8275,7 +8386,7 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. -+ | jal extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) ++ | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. @@ -8307,7 +8418,7 @@ index 0000000..c329123 + | slli TMP1, TMP1, 3 + | add TMP1, BASE, TMP1 + | sd TMP1, L->top -+ | jal extern lj_trace_hot // (jit_State *J, const BCIns *pc) ++ | call_intern vm_hotloop, lj_trace_hot // (jit_State *J, const BCIns *pc) + | j <3 + |.endif + | @@ -8329,7 +8440,7 @@ index 0000000..c329123 + | sub RA, RA, BASE + | sd TMP0, L->top + | mv CARG1, L -+ | jal extern lj_dispatch_call // (lua_State *L, const BCIns *pc) ++ | call_intern vm_hotcall, lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ld BASE, L->base + | ld TMP0, L->top @@ -8380,7 +8491,8 @@ index 0000000..c329123 + | sd L, (offsetof(jit_State, L)-2047)(CARG2) + | sd BASE, L->base + | mv CARG2, PC -+ | jal extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) ++ | // (jit_State *J, const BCIns *pc) ++ | call_intern cont_stitch, lj_dispatch_stitch + | ld BASE, L->base + | j ->cont_nop + | @@ -8396,7 +8508,8 @@ index 0000000..c329123 + | mv CARG2, PC + | sd BASE, L->base + | sw MULTRES, TMPD(sp) -+ | jal extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) ++ | // (lua_State *L, const BCIns *pc) ++ | call_intern vm_profhook, lj_dispatch_profile + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | addi PC, PC, -4 + | ld BASE, L->base @@ -8458,7 +8571,7 @@ index 0000000..c329123 + | sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2) // Store exit number. + | sd x0, GL->jit_base + | mv CARG2, sp -+ | jal extern lj_trace_exit // (jit_State *J, ExitState *ex) ++ | call_intern vm_exit_handler, lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ld TMP1, L->cframe + | ld BASE, L->base @@ -8476,7 +8589,8 @@ index 0000000..c329123 + | sd BASE, L->base + |1: + | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | bltz CRET1, >9 // Check for error from exit. ++ | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. ++ | beqz TMP0, >9 + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | slli MULTRES, CRET1, 3 + | cleartp LFUNC:RB @@ -8490,16 +8604,19 @@ index 0000000..c329123 + | fmv.d.x TOBIT, TMP3 + | // Modified copy of ins_next which handles function header dispatch, too. + | lw INS, 0(PC) -+ | addi PC, PC, 4 ++ | addi PC, PC, 4 ++ | addiw CRET1, CRET1, 17 // Static dispatch? + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | sw TISNIL, GL->vmstate ++ | decode_RD8a RD, INS ++ | beqz CRET1, >5 + | decode_OP8 TMP1, INS -+ | sltiu TMP2, TMP1, BC_FUNCF*8 + | add TMP0, DISPATCH, TMP1 -+ | decode_RD8 RD, INS ++ | sltiu TMP2, TMP1, BC_FUNCF*8 + | ld TMP3, 0(TMP0) -+ | decode_RA8 RA, INS -+ | beqz TMP2, >2 ++ | decode_RA8 RA, INS ++ | beqz TMP2, >2 ++ | decode_RD8b RD + | jr TMP3 + |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? @@ -8521,10 +8638,25 @@ index 0000000..c329123 + | add RA, RA, BASE + | jr TMP3 + | ++ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. ++ | ld TMP0, GL_J(trace)(GL) ++ | decode_RD8b RD ++ | add TMP0, TMP0, RD ++ | ld TRACE:TMP2, 0(TMP0) ++ | lw INS, TRACE:TMP2->startins ++ | decode_OP8 TMP1, INS ++ | add TMP0, DISPATCH, TMP1 ++ | decode_RD8a RD, INS ++ | ld TMP3, GG_DISP2STATIC(TMP0) ++ | decode_RA8a RA, INS ++ | decode_RD8b RD ++ | decode_RA8b RA ++ | jr TMP3 ++ | + |9: // Rethrow error from the right C frame. + | negw CARG2, CRET1 + | mv CARG1, L -+ | jal extern lj_err_trace // (lua_State *L, int errcode) ++ | call_intern vm_exit_interp, lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- @@ -8613,6 +8745,13 @@ index 0000000..c329123 + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | ++ |// void lj_vm_fence_rw_rw() ++ |->vm_fence_rw_rw: ++ |.if JIT or FFI ++ | .long 0x0330000f ++ | ret ++ |.endif ++ | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 @@ -8688,7 +8827,6 @@ index 0000000..c329123 + | addxi DISPATCH, x7, GG_G2DISP + | srli x5, x5, 12 + | sw x5, CTSTATE->cb.slot -+ | load_got lj_ccallback_enter + | sd CARG1, CTSTATE->cb.gpr[0] + | fsd FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] @@ -8710,7 +8848,7 @@ index 0000000..c329123 + | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok. + | mv CARG1, CTSTATE + | mv CARG2, sp -+ | call_intern lj_ccallback_enter // (CTState *cts, void *cf) ++ | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top @@ -8730,14 +8868,14 @@ index 0000000..c329123 + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI -+ | load_got lj_ccallback_leave + | ld CTSTATE, GL->ctype_state + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | mv CARG1, CTSTATE + | mv CARG2, RA -+ | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) ++ | // (CTState *cts, TValue *o) ++ | call_intern cont_ffi_callback, lj_ccallback_leave + | fld FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | fld FRET2, CTSTATE->cb.fpr[1] @@ -8755,7 +8893,6 @@ index 0000000..c329123 + | mv TMP2, sp + | sub sp, sp, TMP1 + | sd ra, -8(TMP2) -+ | slliw CARG2, CARG2, 3 + | sd x18, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | mv x18, TMP2 @@ -9206,7 +9343,7 @@ index 0000000..c329123 + | negw TMP0, TMP0 + | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + | zext.w TMP0, TMP0 -+ | settp TMP0, TISNUM ++ | settp_b TMP0, TISNUM + | j >2 + |1: + | sltiu TMP3, CARG3, LJ_TISNUM @@ -9226,9 +9363,9 @@ index 0000000..c329123 + | addi TMP2, TMP1, -LJ_TSTR + | cleartp STR:CARG1, TMP0 + | bnez TMP2, >2 -+ | lw CARG1, STR:CARG1->len ++ | lwu CARG1, STR:CARG1->len + |1: -+ | settp CARG1, TISNUM ++ | settp_b CARG1, TISNUM + | sd CARG1, 0(RA) + | ins_next + |2: @@ -9240,7 +9377,7 @@ index 0000000..c329123 + |3: +#endif + |->BC_LEN_Z: -+ | jal extern lj_tab_len // (GCtab *t) ++ | call_intern BC_LEN, lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | j <1 +#if LJ_52 @@ -9339,7 +9476,7 @@ index 0000000..c329123 + | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. + |.endif + | zext.w CRET1, CRET1 -+ | settp CRET1, TISNUM ++ | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. @@ -9352,7 +9489,7 @@ index 0000000..c329123 + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | -+ |.macro ins_arithmod, fpins ++ |.macro ins_arithmod, fpins, BC + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 @@ -9361,9 +9498,9 @@ index 0000000..c329123 + | sext.w CARG2, CARG2 + | add RA, BASE, RA + | bxeqz CARG2, ->vmeta_arith -+ | jal extern lj_vm_modi ++ | call_intern BC, lj_vm_modi + | zext.w CRET1, CRET1 -+ | settp CRET1, TISNUM ++ | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. @@ -9382,8 +9519,14 @@ index 0000000..c329123 + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithdiv fdiv.d + break; -+ case BC_MODVN: case BC_MODNV: case BC_MODVV: -+ | ins_arithmod fpmod ++ case BC_MODVN: ++ | ins_arithmod fpmod, BC_MODVN ++ break; ++ case BC_MODNV: ++ | ins_arithmod fpmod, BC_MODNV ++ break; ++ case BC_MODVV: ++ | ins_arithmod fpmod, BC_MODVV + break; + case BC_POW: + | ins_arithpre @@ -9395,11 +9538,10 @@ index 0000000..c329123 + | sltiu TMP1, TMP1, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | add RA, BASE, RA -+ | load_got pow + | bxeqz TMP0, ->vmeta_arith + | fld FARG1, 0(RB) + | fld FARG2, 0(RC) -+ | call_extern ++ | call_extern pow + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 @@ -9417,7 +9559,7 @@ index 0000000..c329123 + | srliw CARG3, CARG3, 3 + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ld BASE, L->base + | bxnez CRET1, ->vmeta_binop @@ -9458,7 +9600,7 @@ index 0000000..c329123 + | add RA, BASE, RA + | zext.w RD, RD + | ins_next1 -+ | settp RD, TISNUM ++ | settp_b RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; @@ -9539,7 +9681,7 @@ index 0000000..c329123 + | beqz TMP3, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL -+ | jal extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETS: @@ -9568,7 +9710,7 @@ index 0000000..c329123 + | beqz TMP0, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL -+ | jal extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: @@ -9604,7 +9746,7 @@ index 0000000..c329123 + | mv CARG1, L + | beqz TMP2, >1 + | add CARG2, BASE, RA -+ | jal extern lj_func_closeuv // (lua_State *L, TValue *level) ++ | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) + | ld BASE, L->base + |1: + | ins_next @@ -9620,7 +9762,7 @@ index 0000000..c329123 + | cleartp CARG3 + | mv CARG1, L + | // (lua_State *L, GCproto *pt, GCfuncL *parent) -+ | jal extern lj_func_newL_gc ++ | call_intern BC_FNEW, lj_func_newL_gc + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base @@ -9653,15 +9795,15 @@ index 0000000..c329123 + | not TMP4, TMP4 + | and CARG2, CARG2, TMP4 + | or CARG2, CARG2, TMP0 -+ | // (lua_State *L, int32_t asize, uint32_t hbits) + | mv CARG1, L -+ | jal extern lj_tab_new ++ | // (lua_State *L, int32_t asize, uint32_t hbits) ++ | call_intern BC_TNEW, lj_tab_new + | // Returns Table *. + } else { + | sub TMP1, KBASE, RD + | mv CARG1, L -+ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 -+ | jal extern lj_tab_dup // (lua_State *L, Table *kt) ++ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 ++ | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) + | // Returns Table *. + } + | li TMP0, LJ_TTAB @@ -9674,7 +9816,11 @@ index 0000000..c329123 + |5: + | mv MULTRES, RD + | mv CARG1, L -+ | jal extern lj_gc_step_fixtop // (lua_State *L) ++ if (op == BC_TNEW) { ++ | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) ++ } else { ++ | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) ++ } + | mv RD, MULTRES + | j <1 + break; @@ -9711,7 +9857,7 @@ index 0000000..c329123 + | bne TMP3, TISNUM, >5 // Integer key? + | sext.w TMP2, TMP2 + | ld TMP1, TAB:RB->array -+ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? (keys = [0, asize-1]) ++ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? + | slliw TMP2, TMP2, 3 + | add TMP2, TMP1, TMP2 + | ld CRET1, 0(TMP2) @@ -9939,7 +10085,8 @@ index 0000000..c329123 + | mv CARG2, TAB:RB + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k ++ | // (lua_State *L, GCtab *t, TValue *k) ++ | call_intern BC_TSETS, lj_tab_newkey + | // Returns TValue *. + | ld BASE, L->base + | fsd FTMP0, 0(CRET1) @@ -10047,7 +10194,8 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | mv BASE, RD + | mv CARG1, L -+ | jal extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) ++ | // (lua_State *L, GCtab *t, int nasize) ++ | call_intern BC_TSETM, lj_tab_reasize + | // Must not reallocate the stack. + | mv RD, BASE + | ld BASE, L->base // Reload BASE for lack of a saved register. @@ -10317,7 +10465,7 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | srliw CARG2, TMP1, 3 + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) + | mv RC, BASE + | ld BASE, L->base + | add RA, BASE, RA @@ -10486,7 +10634,7 @@ index 0000000..c329123 + | or CARG3, CARG3, TMP1 + | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue + | zext.w CARG1, CARG1 -+ | settp CARG1, TISNUM ++ | settp_b CARG1, TISNUM + | sd CARG1, FORL_IDX*8(RA) + } + |1: @@ -10669,6 +10817,7 @@ index 0000000..c329123 + | settp LFUNC:RB, TMP0 + | add TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. ++ | addi TMP2, TMP2, -8 + | addi TMP3, RC, 16+FRAME_VARG + | ld KBASE, -4+PC2PROTO(k)(PC) + | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. @@ -10768,5 +10917,135 @@ index 0000000..c329123 +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ -+ ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.4byte .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.4byte 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 3\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.4byte .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte .Lbegin\n" ++ "\t.8byte %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */, ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 3\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.4byte .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.4byte lj_vm_ffi_call\n" ++ "\t.4byte %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 3\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#if !LJ_NO_UNWIND ++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe1:\n" ++ "\t.4byte .LECIE1-.LSCIE1\n" ++ ".LSCIE1:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zPR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 6\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.4byte lj_err_unwind_dwarf-.\n" ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE1:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE2:\n" ++ "\t.4byte .LEFDE2-.LASFDE2\n" ++ ".LASFDE2:\n" ++ "\t.4byte .LASFDE2-.Lframe1\n" ++ "\t.4byte .Lbegin-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */ ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 2\n" ++ ".LEFDE2:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".Lframe2:\n" ++ "\t.4byte .LECIE2-.LSCIE2\n" ++ ".LSCIE2:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 1\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE2:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE3:\n" ++ "\t.4byte .LEFDE3-.LASFDE3\n" ++ ".LASFDE3:\n" ++ "\t.4byte .LASFDE3- .Lframe2\n" ++ "\t.4byte lj_vm_ffi_call-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 2\n" ++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#endif ++ break; ++ default: ++ break; ++ } +} diff --git a/luajit-2.1-224129a-update.patch b/luajit-2.1-d06beb0-update.patch similarity index 81% rename from luajit-2.1-224129a-update.patch rename to luajit-2.1-d06beb0-update.patch index a00f7053f1e5cffc9a9ea67a29e895642d4e52ab..e64344e67f44e59d0d1003d4e44ccea61ade56ec 100644 --- a/luajit-2.1-224129a-update.patch +++ b/luajit-2.1-d06beb0-update.patch @@ -1,5 +1,5 @@ diff --git a/COPYRIGHT b/COPYRIGHT -index 6ed40025..c74216c3 100644 +index 6ed40025..d7620314 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -1,7 +1,7 @@ @@ -8,7 +8,7 @@ index 6ed40025..c74216c3 100644 +LuaJIT -- a Just-In-Time Compiler for Lua. https://luajit.org/ -Copyright (C) 2005-2017 Mike Pall. All rights reserved. -+Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++Copyright (C) 2005-2023 Mike Pall. All rights reserved. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -30,43 +30,101 @@ index 6ed40025..c74216c3 100644 =============================================================================== diff --git a/Makefile b/Makefile -index 0f933089..b0288b4d 100644 +index 0f933089..3aed365d 100644 --- a/Makefile +++ b/Makefile -@@ -10,7 +10,7 @@ +@@ -10,16 +10,21 @@ # For MSVC, please follow the instructions given in src/msvcbuild.bat. # For MinGW and Cygwin, cd to src and run make with the Makefile there. # -# Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h -+# Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h ++# Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h ############################################################################## MAJVER= 2 -@@ -33,7 +33,8 @@ DPREFIX= $(DESTDIR)$(PREFIX) + MINVER= 1 +-RELVER= 0 +-PREREL= -beta3 +-VERSION= $(MAJVER).$(MINVER).$(RELVER)$(PREREL) + ABIVER= 5.1 + ++# LuaJIT uses rolling releases. The release version is based on the time of ++# the latest git commit. The 'git' command must be available during the build. ++RELVER= $(shell cat src/luajit_relver.txt 2>/dev/null || : ) ++# Note: setting it with := doesn't work, since it will change during the build. ++ ++MMVERSION= $(MAJVER).$(MINVER) ++VERSION= $(MMVERSION).$(RELVER) ++ + ############################################################################## + # + # Change the installation path as needed. This automatically adjusts +@@ -33,9 +38,10 @@ DPREFIX= $(DESTDIR)$(PREFIX) INSTALL_BIN= $(DPREFIX)/bin INSTALL_LIB= $(DPREFIX)/$(MULTILIB) INSTALL_SHARE= $(DPREFIX)/share -INSTALL_INC= $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER) -+INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MAJVER).$(MINVER) ++INSTALL_DEFINC= $(DPREFIX)/include/luajit-$(MMVERSION) +INSTALL_INC= $(INSTALL_DEFINC) - INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION) +-INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(VERSION) ++INSTALL_LJLIBD= $(INSTALL_SHARE)/luajit-$(MMVERSION) INSTALL_JITLIB= $(INSTALL_LJLIBD)/jit -@@ -75,9 +76,12 @@ SYMLINK= ln -sf + INSTALL_LMODD= $(INSTALL_SHARE)/lua + INSTALL_LMOD= $(INSTALL_LMODD)/$(ABIVER) +@@ -49,10 +55,10 @@ INSTALL_TSYMNAME= luajit + INSTALL_ANAME= libluajit-$(ABIVER).a + INSTALL_SOSHORT1= libluajit-$(ABIVER).so + INSTALL_SOSHORT2= libluajit-$(ABIVER).so.$(MAJVER) +-INSTALL_SONAME= $(INSTALL_SOSHORT2).$(MINVER).$(RELVER) ++INSTALL_SONAME= libluajit-$(ABIVER).so.$(VERSION) + INSTALL_DYLIBSHORT1= libluajit-$(ABIVER).dylib + INSTALL_DYLIBSHORT2= libluajit-$(ABIVER).$(MAJVER).dylib +-INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(MAJVER).$(MINVER).$(RELVER).dylib ++INSTALL_DYLIBNAME= libluajit-$(ABIVER).$(VERSION).dylib + INSTALL_PCNAME= luajit.pc + + INSTALL_STATIC= $(INSTALL_LIB)/$(INSTALL_ANAME) +@@ -75,9 +81,13 @@ SYMLINK= ln -sf INSTALL_X= install -m 0755 INSTALL_F= install -m 0644 UNINSTALL= $(RM) -LDCONFIG= ldconfig -n +LDCONFIG= ldconfig -n 2>/dev/null SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ - -e "s|^multilib=.*|multilib=$(MULTILIB)|" +- -e "s|^multilib=.*|multilib=$(MULTILIB)|" ++ -e "s|^multilib=.*|multilib=$(MULTILIB)|" \ ++ -e "s|^relver=.*|relver=$(RELVER)|" +ifneq ($(INSTALL_DEFINC),$(INSTALL_INC)) + SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|" +endif FILE_T= luajit FILE_A= libluajit.a -@@ -121,7 +125,7 @@ install: $(INSTALL_DEP) +@@ -88,7 +98,9 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h + FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ +- dis_mips64.lua dis_mips64el.lua vmdef.lua ++ dis_mips64.lua dis_mips64el.lua \ ++ dis_mips64r6.lua dis_mips64r6el.lua \ ++ vmdef.lua + + ifeq (,$(findstring Windows,$(OS))) + HOST_SYS:= $(shell uname -s) +@@ -109,9 +121,9 @@ endif + INSTALL_DEP= src/luajit + + default all $(INSTALL_DEP): +- @echo "==== Building LuaJIT $(VERSION) ====" ++ @echo "==== Building LuaJIT $(MMVERSION) ====" + $(MAKE) -C src +- @echo "==== Successfully built LuaJIT $(VERSION) ====" ++ @echo "==== Successfully built LuaJIT $(MMVERSION) ====" + + install: $(INSTALL_DEP) + @echo "==== Installing LuaJIT $(VERSION) to $(PREFIX) ====" +@@ -121,7 +133,7 @@ install: $(INSTALL_DEP) $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) cd src && test -f $(FILE_SO) && \ $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ @@ -75,11 +133,47 @@ index 0f933089..b0288b4d 100644 $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) +@@ -130,18 +142,12 @@ install: $(INSTALL_DEP) + $(RM) $(FILE_PC).tmp + cd src && $(INSTALL_F) $(FILES_INC) $(INSTALL_INC) + cd src/jit && $(INSTALL_F) $(FILES_JITLIB) $(INSTALL_JITLIB) ++ $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM) + @echo "==== Successfully installed LuaJIT $(VERSION) to $(PREFIX) ====" +- @echo "" +- @echo "Note: the development releases deliberately do NOT install a symlink for luajit" +- @echo "You can do this now by running this command (with sudo):" +- @echo "" +- @echo " $(SYMLINK) $(INSTALL_TNAME) $(INSTALL_TSYM)" +- @echo "" +- + + uninstall: + @echo "==== Uninstalling LuaJIT $(VERSION) from $(PREFIX) ====" +- $(UNINSTALL) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) ++ $(UNINSTALL) $(INSTALL_TSYM) $(INSTALL_T) $(INSTALL_STATIC) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) $(INSTALL_MAN)/$(FILE_MAN) $(INSTALL_PC) + for file in $(FILES_JITLIB); do \ + $(UNINSTALL) $(INSTALL_JITLIB)/$$file; \ + done +@@ -155,8 +161,9 @@ uninstall: + ############################################################################## + + amalg: +- @echo "Building LuaJIT $(VERSION)" ++ @echo "==== Building LuaJIT $(MMVERSION) (amalgamation) ====" + $(MAKE) -C src amalg ++ @echo "==== Successfully built LuaJIT $(MMVERSION) (amalgamation) ====" + + clean: + $(MAKE) -C src clean diff --git a/README b/README -index 2b9ae9d2..1faef255 100644 +index 2b9ae9d2..e4a69265 100644 --- a/README +++ b/README -@@ -3,9 +3,9 @@ README for LuaJIT 2.1.0-beta3 +@@ -1,11 +1,11 @@ +-README for LuaJIT 2.1.0-beta3 +------------------------------ ++README for LuaJIT 2.1 ++--------------------- LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. @@ -87,30 +181,45 @@ index 2b9ae9d2..1faef255 100644 +Project Homepage: https://luajit.org/ -LuaJIT is Copyright (C) 2005-2017 Mike Pall. -+LuaJIT is Copyright (C) 2005-2022 Mike Pall. ++LuaJIT is Copyright (C) 2005-2023 Mike Pall. LuaJIT is free software, released under the MIT license. See full Copyright Notice in the COPYRIGHT file or in luajit.h. diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css -index 62e1c165..a49d309f 100644 +index 62e1c165..4a139278 100644 --- a/doc/bluequad-print.css +++ b/doc/bluequad-print.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2022 Mike Pall. ++/* Copyright (C) 2004-2023 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. diff --git a/doc/bluequad.css b/doc/bluequad.css -index be2c4bf2..4c1a9082 100644 +index be2c4bf2..7399f625 100644 --- a/doc/bluequad.css +++ b/doc/bluequad.css @@ -1,4 +1,4 @@ -/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2022 Mike Pall. ++/* Copyright (C) 2004-2023 Mike Pall. * * You are welcome to use the general ideas of this design for your own sites. * But please do not steal the stylesheet, the layout or the color scheme. +@@ -206,11 +206,9 @@ img.right { + .ext { + color: #ff8000; + } +-.new { +- font-size: 6pt; +- vertical-align: middle; +- background: #ff8000; +- color: #ffffff; ++.note { ++ padding: 0.5em 1em; ++ border-left: 3px solid #bfcfff; + } + #site { + clear: both; diff --git a/doc/changes.html b/doc/changes.html deleted file mode 100644 index a66a8d95..00000000 @@ -1001,7 +1110,7 @@ index a66a8d95..00000000 - - diff --git a/doc/contact.html b/doc/contact.html -index fe4751c0..6d609286 100644 +index fe4751c0..cc4d8c72 100644 --- a/doc/contact.html +++ b/doc/contact.html @@ -1,17 +1,16 @@ @@ -1014,7 +1123,7 @@ index fe4751c0..6d609286 100644 - - + -+ ++ @@ -1044,20 +1153,23 @@ index fe4751c0..6d609286 100644 jit.* Library
  • Lua/C API -@@ -46,28 +47,21 @@ +@@ -45,29 +46,22 @@ + Profiler
  • - Status +-Status - -
  • - FAQ -
  • --Performance » -
  • --Wiki » +-FAQ -
  • +-Performance » ++Status » +
  • +-Wiki » ++FAQ » +
  • -Mailing List » +Mailing List »
  • @@ -1091,7 +1203,7 @@ index fe4751c0..6d609286 100644

    All documentation is -Copyright © 2005-2017 Mike Pall. -+Copyright © 2005-2022 Mike Pall. ++Copyright © 2005-2023 Mike Pall.

    @@ -1100,22 +1212,22 @@ index fe4751c0..6d609286 100644 +