diff --git a/0001-add-riscv64-ccopt.patch b/0001-add-riscv64-ccopt.patch new file mode 100644 index 0000000000000000000000000000000000000000..f8297e48eabdd70f0eade76ebad9ce06aed6c1b1 --- /dev/null +++ b/0001-add-riscv64-ccopt.patch @@ -0,0 +1,24 @@ +From c1a0a3342d72d49960c43d373209c289f8fdbed2 Mon Sep 17 00:00:00 2001 +From: TexasOct +Date: Thu, 21 Mar 2024 14:41:39 +0800 +Subject: [PATCH] add riscv64 ccopt + +--- + src/Makefile | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/src/Makefile b/src/Makefile +index 224d21e7..e594a273 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -52,6 +52,7 @@ CCOPT_arm= + CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= ++CCOPT_riscv64= + # + CCDEBUG= + # Uncomment the next line to generate debug information: +-- +2.44.0 + diff --git a/LuaJIT-2.1.0-0d313b24.tar.gz b/LuaJIT-2.1.0-0d313b24.tar.gz new file mode 100644 index 0000000000000000000000000000000000000000..9285c376b97ebbdddcc45f65f8a9b815cb4b32b5 Binary files /dev/null and b/LuaJIT-2.1.0-0d313b24.tar.gz differ diff --git a/LuaJIT-2.1.0-beta3.tar.gz b/LuaJIT-2.1.0-beta3.tar.gz deleted file mode 100644 index 5aea267c907ca1c2e829982a95d8bd12e38a5241..0000000000000000000000000000000000000000 Binary files a/LuaJIT-2.1.0-beta3.tar.gz and /dev/null differ diff --git a/add-riscv-support.patch b/add-riscv-support.patch index 439ab737060e9e58c491ff74fd570deaeb134ecc..2ed2f208bf2151de83300f917162f6e874f9c9ae 100644 --- a/add-riscv-support.patch +++ b/add-riscv-support.patch @@ -1,30 +1,59 @@ -From: gns -Subject: [PATCH] riscv64: add initial support for riscv64 - -This adds interpreter, FFI, and JIT support for rv64g platform with lp64d ABI. -Keep in mind that there might still be some issues, your feedback is greatly appreciated. ---- -diff --git a/Makefile b/Makefile -index b0288b4..420a4b3 100644 ---- a/Makefile -+++ b/Makefile -@@ -90,6 +90,7 @@ FILE_MAN= luajit.1 - FILE_PC= luajit.pc - FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h - FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ -+ dis_riscv.lua dis_riscv64.lua \ - dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/Makefile +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/Makefile ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/Makefile +@@ -101,6 +101,7 @@ FILES_JITLIB= bc.lua bcsave.lua dump.lua dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ - dis_mips64.lua dis_mips64el.lua vmdef.lua -diff --git a/dynasm/dasm_riscv.h b/dynasm/dasm_riscv.h -new file mode 100644 -index 0000000..4afe064 + dis_mips64.lua dis_mips64el.lua \ + dis_mips64r6.lua dis_mips64r6el.lua \ ++ dis_riscv.lua dis_riscv64.lua \ + vmdef.lua + + ifeq (,$(findstring Windows,$(OS))) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/README.md +=================================================================== +--- /dev/null ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/README.md +@@ -0,0 +1,31 @@ ++# LJRV - LuaJIT RISC-V 64 Port ++ ++LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language, ++RISC-V is a free and open ISA enabling a new era of processor innovation. ++ ++## Introduction ++ ++LJRV is a ongoing porting project of LuaJIT to the RISC-V 64-bit architecture by PLCT Lab, ISCAS. ++The ultimate goal is to provide a RISC-V 64 LuaJIT implementation and have it upstreamed to the official LuaJIT repository. ++ ++## Progress ++ ++- [x] Interpreter Runtime ++- [x] JIT Compiler ++ ++LJRV is still of beta quality, particularly the JIT compiler. ++For production usage, we suggests disable the JIT compiler during compilation by setting `XCFLAGS+= -DLUAJIT_DISABLE_JIT` in Makefile or environment variable. ++ ++## Bug Report ++ ++Please report bugs to [Issues](https://github.com/ruyisdk/LuaJIT/issues). ++ ++## Copyright ++ ++LuaJIT is Copyright (C) 2005-2023 Mike Pall. ++LuaJIT is free software, released under the MIT license. ++See full Copyright Notice in the COPYRIGHT file or in luajit.h. ++ ++LJRV is Copyright (C) 2022-2023 PLCT Lab, ISCAS. Contributed by gns. ++LJRV is free software, released under the MIT license. ++LJRV is part of RuyiSDK. +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.h +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv.h -@@ -0,0 +1,438 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.h +@@ -0,0 +1,433 @@ +/* +** DynASM RISC-V encoding engine. -+** Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++** Copyright (C) 2005-2023 Mike Pall. All rights reserved. +** Released under the MIT license. See dynasm.lua for full copyright notice. +*/ + @@ -93,7 +122,7 @@ index 0000000..4afe064 + size_t lgsize; + int *pclabels; /* PC label chains/pos ptrs. */ + size_t pcsize; -+ void **globals; /* Array of globals (bias -10). */ ++ void **globals; /* Array of globals. */ + dasm_Section *section; /* Pointer to active section. */ + size_t codesize; /* Total size of all code sections. */ + int maxsection; /* 0 <= sectionidx < maxsection. */ @@ -110,7 +139,6 @@ index 0000000..4afe064 +{ + dasm_State *D; + size_t psz = 0; -+ int i; + Dst_REF = NULL; + DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); + D = Dst_REF; @@ -121,12 +149,7 @@ index 0000000..4afe064 + D->pcsize = 0; + D->globals = NULL; + D->maxsection = maxsection; -+ for (i = 0; i < maxsection; i++) { -+ D->sections[i].buf = NULL; /* Need this for pass3. */ -+ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); -+ D->sections[i].bsize = 0; -+ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ -+ } ++ memset((void *)D->sections, 0, maxsection * sizeof(dasm_Section)); +} + +/* Free DynASM state. */ @@ -146,7 +169,7 @@ index 0000000..4afe064 +void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) +{ + dasm_State *D = Dst_REF; -+ D->globals = gl - 10; /* Negative bias to compensate for locals. */ ++ D->globals = gl; + DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); +} + @@ -171,6 +194,7 @@ index 0000000..4afe064 + if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); + for (i = 0; i < D->maxsection; i++) { + D->sections[i].pos = DASM_SEC2POS(i); ++ D->sections[i].rbuf = D->sections[i].buf - D->sections[i].pos; + D->sections[i].ofs = 0; + } +} @@ -388,7 +412,7 @@ index 0000000..4afe064 + break; + case DASM_REL_LG: + if (n < 0) { -+ n = (int)((ptrdiff_t)D->globals[-n] - (ptrdiff_t)cp + 4); ++ n = (int)((ptrdiff_t)D->globals[-n-10] - (ptrdiff_t)cp + 4); + goto patchrel; + } + /* fallthrough */ @@ -407,7 +431,7 @@ index 0000000..4afe064 + } + break; + case DASM_LABEL_LG: -+ val &= 2047; if (val >= 20) D->globals[val-10] = (void *)(base + n); ++ val &= 2047; if (val >= 20) D->globals[val-20] = (void *)(base + n); + break; + case DASM_LABEL_PC: break; + case DASM_IMM: @@ -460,16 +484,15 @@ index 0000000..4afe064 +} +#endif + -diff --git a/dynasm/dasm_riscv.lua b/dynasm/dasm_riscv.lua -new file mode 100644 -index 0000000..e8e522a +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.lua +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv.lua -@@ -0,0 +1,973 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv.lua +@@ -0,0 +1,981 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V module. +-- -+-- Copyright (C) 2005-2022 Mike Pall. All rights reserved. ++-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. +-- See dynasm.lua for full copyright notice. +------------------------------------------------------------------------------ + @@ -738,17 +761,22 @@ index 0000000..e8e522a + +local map_op_rv32imafd = { + ++ -- DASM pseudo-instrs ++ empty_0 = "ffffffff", ++ call_1 = "7fffffffJ", ++ + -- RV32I + lui_2 = "00000037DU", -+ auipc_2 = "00000017DU", ++ auipc_2 = "00000017DA", + + jal_2 = "0000006fDJ", -+ jalr_3 = "00000067DRI", ++ jalr_3 = "00000067DRJ", + -- pseudo-instrs + j_1 = "0000006fJ", + jal_1 = "000000efJ", + jr_1 = "00000067R", + jalr_1 = "000000e7R", ++ jalr_2 = "000000e7RJ", + + beq_3 = "00000063RrB", + bne_3 = "00001063RrB", @@ -1267,6 +1295,9 @@ index 0000000..e8e522a + local mode, m, s = parse_label(params[n], false) + if p == "B" then m = m + 2048 end + waction("REL_"..mode, m, s, 1); n = n + 1 ++ elseif p == "A" then -- AUIPC ++ local mode, m, s = parse_label(params[n], false) ++ waction("REL_"..mode, m, s, 1); n = n + 1 + else + assert(false) + end @@ -1439,11 +1470,10 @@ index 0000000..e8e522a + +------------------------------------------------------------------------------ + -diff --git a/dynasm/dasm_riscv32.lua b/dynasm/dasm_riscv32.lua -new file mode 100644 -index 0000000..654eb76 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv32.lua +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv32.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv32.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 32 module. @@ -1457,11 +1487,10 @@ index 0000000..654eb76 + +riscv32 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") -diff --git a/dynasm/dasm_riscv64.lua b/dynasm/dasm_riscv64.lua -new file mode 100644 -index 0000000..10cdfe2 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv64.lua +=================================================================== --- /dev/null -+++ b/dynasm/dasm_riscv64.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/dynasm/dasm_riscv64.lua @@ -0,0 +1,12 @@ +------------------------------------------------------------------------------ +-- DynASM RISC-V 64 module. @@ -1475,31 +1504,23 @@ index 0000000..10cdfe2 + +riscv64 = true -- Using a global is an ugly, but effective solution. +return require("dasm_riscv") -diff --git a/src/Makefile b/src/Makefile -index 323baf2..1fd84fc 100644 ---- a/src/Makefile -+++ b/src/Makefile -@@ -53,6 +53,7 @@ CCOPT_arm= - CCOPT_arm64= - CCOPT_ppc= - CCOPT_mips= -+CCOPT_riscv64= - # - CCDEBUG= - # Uncomment the next line to generate debug information: -@@ -267,6 +268,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(TARGET_TESTARCH))) - else +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/Makefile +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/Makefile ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/Makefile +@@ -268,6 +269,12 @@ ifneq (,$(findstring LJ_TARGET_MIPS ,$(T TARGET_LJARCH= mips endif -+else + else +ifneq (,$(findstring LJ_TARGET_RISCV32 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv32 +else +ifneq (,$(findstring LJ_TARGET_RISCV64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= riscv64 - else ++else $(error Unsupported target architecture) endif + endif @@ -275,6 +282,8 @@ endif endif endif @@ -1509,7 +1530,7 @@ index 323baf2..1fd84fc 100644 ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) TARGET_SYS= PS3 -@@ -464,6 +473,12 @@ ifeq (ppc,$(TARGET_LJARCH)) +@@ -469,6 +478,12 @@ ifeq (ppc,$(TARGET_LJARCH)) DASM_AFLAGS+= -D PPE -D TOC endif endif @@ -1522,11 +1543,11 @@ index 323baf2..1fd84fc 100644 endif endif -diff --git a/src/host/buildvm.c b/src/host/buildvm.c -index 9ee47ad..9ebfb8f 100644 ---- a/src/host/buildvm.c -+++ b/src/host/buildvm.c -@@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/host/buildvm.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm.c +@@ -67,6 +67,8 @@ static int collect_reloc(BuildCtx *ctx, #include "../dynasm/dasm_ppc.h" #elif LJ_TARGET_MIPS #include "../dynasm/dasm_mips.h" @@ -1535,17 +1556,41 @@ index 9ee47ad..9ebfb8f 100644 #else #error "No support for this architecture (yet)" #endif -diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c -index 7baa011..b2a7daf 100644 ---- a/src/host/buildvm_asm.c -+++ b/src/host/buildvm_asm.c -@@ -156,6 +156,15 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm_asm.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/host/buildvm_asm.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/host/buildvm_asm.c +@@ -97,9 +97,15 @@ static void emit_asm_words(BuildCtx *ctx + #if LJ_TARGET_ARM64 && LJ_BE + ins = lj_bswap(ins); /* ARM64 instructions are always little-endian. */ + #endif +- if ((i & 15) == 0) ++ if ((i & 15) == 0) { ++#if LJ_TARGET_RISCV64 ++ while (ins == 0xffffffffu) { i += 4; ins = *(uint32_t *)(p+i); } ++#endif + fprintf(ctx->fp, "\t.long 0x%08x", ins); +- else ++ } else ++#if LJ_TARGET_RISCV64 ++ if (ins != 0xffffffffu) ++#endif + fprintf(ctx->fp, ",0x%08x", ins); + if ((i & 15) == 12) putc('\n', ctx->fp); + } +@@ -156,6 +162,21 @@ static void emit_asm_wordreloc(BuildCtx "Error: unsupported opcode %08x for %s symbol relocation.\n", ins, sym); exit(1); -+#elif LJ_TARGET_RISCV32 || LJ_TARGET_RISCV64 -+ if ((ins & 0x7f) == 0x6fu) { -+ fprintf(ctx->fp, "\tjal %s\n", sym); ++#elif LJ_TARGET_RISCV64 ++ if (ins == 0x7fffffffu) { ++ fprintf(ctx->fp, "\tcall %s\n", sym); ++ } else if ((ins & 0x7f) == 0x17u) { ++ fprintf(ctx->fp, "\tauipc x%d, %s\n", (ins >> 7) & 31, sym); ++ } else if ((ins & 0x7f) == 0x67u) { ++ fprintf(ctx->fp, "\tjalr x%d, x%d, %s\n", (ins >> 7) & 31, (ins >> 15) & 31, sym); ++ } else if ((ins & 0x7f) == 0x6fu) { ++ fprintf(ctx->fp, "\tjal x%d, %s\n", (ins >> 7) & 31, sym); + } else { + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", @@ -1555,21 +1600,21 @@ index 7baa011..b2a7daf 100644 #else #error "missing relocation support for this architecture" #endif -@@ -272,6 +281,9 @@ void emit_asm(BuildCtx *ctx) +@@ -249,6 +270,9 @@ void emit_asm(BuildCtx *ctx) #if LJ_TARGET_MIPS - fprintf(ctx->fp, ".set nomips16\n.abicalls\n.set noreorder\n.set nomacro\n"); + fprintf(ctx->fp, "\t.set nomips16\n\t.abicalls\n\t.set noreorder\n\t.set nomacro\n"); #endif +#if LJ_TARGET_RISCV64 + fprintf(ctx->fp, ".option arch, -c\n.option norelax\n"); +#endif + emit_asm_align(ctx, 4); - for (i = rel = 0; i < ctx->nsym; i++) { - int32_t ofs = ctx->sym[i].ofs; -diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua -index 90fe9da..88a6045 100644 ---- a/src/jit/bcsave.lua -+++ b/src/jit/bcsave.lua -@@ -97,6 +97,7 @@ local map_arch = { + #if LJ_TARGET_PS3 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/bcsave.lua +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/jit/bcsave.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/bcsave.lua +@@ -101,6 +101,7 @@ local map_arch = { mips64el = { e = "le", b = 64, m = 8, f = 0x80000007, }, mips64r6 = { e = "be", b = 64, m = 8, f = 0xa0000407, }, mips64r6el = { e = "le", b = 64, m = 8, f = 0xa0000407, }, @@ -1577,12 +1622,11 @@ index 90fe9da..88a6045 100644 } local map_os = { -diff --git a/src/jit/dis_riscv.lua b/src/jit/dis_riscv.lua -new file mode 100644 -index 0000000..82b41aa +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv.lua +=================================================================== --- /dev/null -+++ b/src/jit/dis_riscv.lua -@@ -0,0 +1,772 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv.lua +@@ -0,0 +1,793 @@ +------------------------------------------------------------------------------ +-- LuaJIT RISC-V disassembler module. +-- @@ -1613,13 +1657,13 @@ index 0000000..82b41aa + +local map_quad0 = { + shift = 13, mask = 7, -+ [0] = "c.addi4spnAW", "c.fldNMh", "c.lwAMn", "c.flwNMn", -+ false, "c.fsdNMh", "c.swAMn", "c.fswNMn" ++ [0] = "c.addi4spnZW", "c.fldNMh", "c.lwZMn", "c.flwNMn", ++ false, "c.fsdNMh", "c.swZMn", "c.fswNMn" +} + +local map_sub2quad1 = { + shift = 5, mask = 3, -+ [0] = "c.subMA", "c.xorMA", "c.orMA", "c.andMA" ++ [0] = "c.subMZ", "c.xorMZ", "c.orMZ", "c.andMZ" +} + +local map_sub1quad1 = { @@ -1921,7 +1965,7 @@ index 0000000..82b41aa + +local map_pri = { + [3] = map_load, [7] = map_fload, [15] = map_fence, [19] = map_ali, -+ [23] = "auipcDU", [27] = map_addi_shift, ++ [23] = "auipcDA", [27] = map_addi_shift, + [35] = map_store, [39] = map_fstore, [47] = map_aext, [51] = map_al, + [55] = "luiDU", [59] = map_arithw_shiftw, [67] = map_fmadd, [71] = map_fmsub, + [75] = map_fnmsub, [99] = map_branch, [79] = map_fnmadd, [83] = map_fext, @@ -2164,7 +2208,7 @@ index 0000000..82b41aa + operands[#operands] = x + x = temp + end -+ elseif p == "A" then ++ elseif p == "Z" then + x = map_gpr[8 + band(rshift(op, 2), 7)] + elseif p == "N" then + x = map_fgpr[8 + band(rshift(op, 2), 7)] @@ -2174,7 +2218,7 @@ index 0000000..82b41aa + x = map_gpr[band(rshift(op, 2), 31)] + elseif p == "W" then + local uimm = parse_W(op) -+ x = format("%s,%d", "x2", uimm) ++ x = format("%s,%d", "sp", uimm) + elseif p == "x" then + x = parse_x(op) + elseif p == "h" then @@ -2184,7 +2228,7 @@ index 0000000..82b41aa + operands[#operands] = format("%d(%s)", uimm, last) + elseif p == "X" then + local imm = parse_X(op) -+ x = format("%s,%d", "x2", imm) ++ x = format("%s,%d", "sp", imm) + elseif p == "O" then + x = format("(%s)", map_gpr[band(rshift(op, 15), 31)]) + elseif p == "H" then @@ -2197,7 +2241,15 @@ index 0000000..82b41aa + x = arshift(op, 20) + --different for jalr + if(name == "jalr") then -+ operands[#operands] = format("%d(%s)", x, last) ++ local reg = map_gpr[band(rshift(op, 15), 31)] ++ if(ctx.reltab[reg] == nil) then ++ operands[#operands] = format("%d(%s)", x, last) ++ else ++ local target = ctx.reltab[reg] + x ++ operands[#operands] = format("%d(%s) #0x%08x", x, last, target) ++ ctx.rel = target ++ ctx.reltab[reg] = nil --assume no reuses of the register ++ end + x = nil --not to add additional operand + end + elseif p == "i" then @@ -2215,8 +2267,14 @@ index 0000000..82b41aa + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3), + lshift(part3, 2)) + operands[#operands] = format("%d(%s)", uimm, last) ++ elseif p == "A" then ++ local value, dest = band(rshift(op, 12), 0xfffff), map_gpr[band(rshift(op, 7), 31)] ++ ctx.reltab[dest] = ctx.addr + ctx.pos + lshift(value, 12) ++ x = format("0x%x", value) + elseif p == "B" then -+ x = parse_B(op) ++ x = ctx.addr + ctx.pos + parse_B(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "U" then + local value = band(rshift(op, 12), 0xfffff) + x = string.format("0x%x", value) @@ -2226,11 +2284,15 @@ index 0000000..82b41aa + local part3 = band(rshift(op, 5), 3) --4:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 3)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "q" then -+ x = parse_q(op) ++ x = ctx.addr + ctx.pos + parse_q(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "J" then -+ x = parse_J(op) ++ x = ctx.addr + ctx.pos + parse_J(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "K" then + local value = parse_K(op) + x = string.format("0x%x", value) @@ -2240,28 +2302,30 @@ index 0000000..82b41aa + local part3 = band(rshift(op, 4), 7) --4:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 5), + lshift(part3, 2)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "1" then + local part1 = band(rshift(op, 12), 1) --5 + local part2 = band(rshift(op, 2), 31) --4:0 + local uimm = bor(lshift(0, 31), lshift(part1, 5), part2) + x = string.format("0x%x", uimm) + elseif p == "T" then -+ x = parse_T(op) ++ x = ctx.addr + ctx.pos + parse_T(op) ++ ctx.rel = x ++ x = format("0x%08x", x) + elseif p == "t" then + local part1 = band(rshift(op, 7), 7) --8:6 + local part2 = band(rshift(op, 10), 7) --5:3 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 3)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "u" then + local part1 = band(rshift(op, 7), 3) --7:6 + local part2 = band(rshift(op, 9), 15) --5:2 + local uimm = bor(lshift(0, 31), lshift(part1, 6), lshift(part2, 2)) -+ x = format("%d(%s)", uimm, "x2") ++ x = format("%d(%s)", uimm, "sp") + elseif p == "V" then + x = map_fgpr[band(rshift(op, 2), 31)] + elseif p == "0" then --PSEUDOINSTRUCTIONS -+ if (last == "x0" or last == 0) then ++ if (last == "zero" or last == 0) then + local n = #operands + operands[n] = nil + last = operands[n-1] @@ -2289,15 +2353,15 @@ index 0000000..82b41aa + local value = string.sub(operands[#operands], 1, 1) + local reg = string.sub(operands[#operands], 3, #(operands[#operands]) - 1) + if(value == "0" and -+ (operands[#operands - 1] == "x1" or operands[#operands - 1] == "x0")) then -+ if(operands[#operands - 1] == "x0") then ++ (operands[#operands - 1] == "ra" or operands[#operands - 1] == "zero")) then ++ if(operands[#operands - 1] == "zero") then + name = altname + end + operands[#operands] = nil + operands[#operands] = reg + end + elseif (p == "2" and alias_done == false) then -+ if (last == "x0" or last == 0) then ++ if (last == "zero" or last == 0) then + local a1, a2 = match(altname, "([^|]*)|(.*)") + name = a2 + operands[#operands] = nil @@ -2335,6 +2399,7 @@ index 0000000..82b41aa + ctx.get = get_le + ctx.map_pri = map_pri + ctx.map_compr = map_compr ++ ctx.reltab = {} + return ctx +end + @@ -2355,11 +2420,10 @@ index 0000000..82b41aa + disass = disass, + regname = regname +} -diff --git a/src/jit/dis_riscv64.lua b/src/jit/dis_riscv64.lua -new file mode 100644 -index 0000000..ff038d1 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv64.lua +=================================================================== --- /dev/null -+++ b/src/jit/dis_riscv64.lua ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/jit/dis_riscv64.lua @@ -0,0 +1,16 @@ +---------------------------------------------------------------------------- +-- LuaJIT RISC-V 64 disassembler wrapper module. @@ -2378,15 +2442,15 @@ index 0000000..ff038d1 + regname = dis_riscv.regname +} \ No newline at end of file -diff --git a/src/lib_jit.c b/src/lib_jit.c -index 2867d42..b2ccfa2 100644 ---- a/src/lib_jit.c -+++ b/src/lib_jit.c -@@ -648,6 +648,75 @@ JIT_PARAMDEF(JIT_PARAMINIT) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lib_jit.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lib_jit.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lib_jit.c +@@ -631,6 +631,81 @@ JIT_PARAMDEF(JIT_PARAMINIT) #include #endif -+#if LJ_TARGET_RISCV64 ++#if LJ_TARGET_RISCV64 && LJ_TARGET_POSIX +#include +#include +static sigjmp_buf sigbuf = {0}; @@ -2397,11 +2461,11 @@ index 2867d42..b2ccfa2 100644 + +static int riscv_compressed() +{ -+#if defined(__riscv_compressed) -+ // Don't bother checking for RVC -- would crash before getting here. ++#if defined(__riscv_c) || defined(__riscv_compressed) ++ /* Don't bother checking for RVC -- would crash before getting here. */ + return 1; +#elif defined(__GNUC__) -+ // c.nop; c.nop; ++ /* c.nop; c.nop; */ + __asm__(".4byte 0x00010001"); + return 1; +#else @@ -2411,9 +2475,12 @@ index 2867d42..b2ccfa2 100644 + +static int riscv_zba() +{ -+#if defined(__GNUC__) -+ // Don't bother verifying the result, just check if the instruction exists. -+ // add.uw zero, zero, zero ++#if defined(__riscv_b) || defined(__riscv_zba) ++ /* Don't bother checking for Zba -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) ++ /* Don't bother verifying the result, just check if the instruction exists. */ ++ /* add.uw zero, zero, zero */ + __asm__(".4byte 0x0800003b"); + return 1; +#else @@ -2423,9 +2490,12 @@ index 2867d42..b2ccfa2 100644 + +static int riscv_zbb() +{ -+#if defined(__GNUC__) ++#if defined(__riscv_b) || defined(__riscv_zbb) ++ /* Don't bother checking for Zbb -- would crash before getting here. */ ++ return 1; ++#elif defined(__GNUC__) + register int t asm ("a0"); -+ // addi a0, zero, 255; sext.b a0, a0; ++ /* addi a0, zero, 255; sext.b a0, a0; */ + __asm__("addi a0, zero, 255\n\t.4byte 0x60451513"); + return t < 0; +#else @@ -2437,11 +2507,11 @@ index 2867d42..b2ccfa2 100644 +{ +#if defined(__GNUC__) + register int t asm ("a0"); -+ // C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". -+ // Therefore assume XThead* are present if XTheadBb is present. -+ // addi a0, zero, 255; th.ext a0, a0, 7, 0; ++ /* C906 & C910 & C908 all have "xtheadc", XTheadBb subset "xtheadc". */ ++ /* Therefore assume XThead* are present if XTheadBb is present. */ ++ /* addi a0, zero, 255; th.ext a0, a0, 7, 0; */ + __asm__("addi a0, zero, 255\n\t.4byte 0x1c05250b"); -+ return t == -1; // In case of collision with other vendor extensions. ++ return t == -1; /* In case of collision with other vendor extensions. */ +#else + return 0; +#endif @@ -2458,13 +2528,13 @@ index 2867d42..b2ccfa2 100644 /* Arch-dependent CPU feature detection. */ static uint32_t jit_cpudetect(void) { -@@ -719,6 +788,22 @@ static uint32_t jit_cpudetect(void) +@@ -702,6 +777,22 @@ static uint32_t jit_cpudetect(void) } #endif +#elif LJ_TARGET_RISCV64 +#if LJ_HASJIT -+ // SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. ++ /* SIGILL-based detection of RVC, Zba, Zbb and XThead. Welcome to the future. */ + struct sigaction old = {0}, act = {0}; + act.sa_handler = detect_sigill; + sigaction(SIGILL, &act, &old); @@ -2474,17 +2544,30 @@ index 2867d42..b2ccfa2 100644 + flags |= riscv_probe(riscv_xthead, JIT_F_RVXThead); + sigaction(SIGILL, &old, NULL); + -+ // Detect V/P? -+ // V have no hardware available, P not ratified yet. ++ /* Detect V/P? */ ++ /* V have no hardware available, P not ratified yet. */ +#endif + #else #error "Missing CPU detection for this architecture" #endif -diff --git a/src/lj_arch.h b/src/lj_arch.h -index bddd757..453414f 100644 ---- a/src/lj_arch.h -+++ b/src/lj_arch.h +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_alloc.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_alloc.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_alloc.c +@@ -365,7 +365,7 @@ static void *CALL_MREMAP_(void *ptr, siz + #define CALL_MREMAP(addr, osz, nsz, mv) CALL_MREMAP_((addr), (osz), (nsz), (mv)) + #define CALL_MREMAP_NOMOVE 0 + #define CALL_MREMAP_MAYMOVE 1 +-#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64) ++#if LJ_64 && (!LJ_GC64 || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64) + #define CALL_MREMAP_MV CALL_MREMAP_NOMOVE + #else + #define CALL_MREMAP_MV CALL_MREMAP_MAYMOVE +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_arch.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_arch.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_arch.h @@ -31,6 +31,10 @@ #define LUAJIT_ARCH_mips32 6 #define LUAJIT_ARCH_MIPS64 7 @@ -2505,9 +2588,9 @@ index bddd757..453414f 100644 +#elif defined(__riscv) && __riscv_xlen == 64 +#define LUAJIT_TARGET LUAJIT_ARCH_RISCV64 #else - #error "No support for this architecture (yet)" + #error "Architecture not supported (in this version), see: https://luajit.org/status.html#architectures" #endif -@@ -435,6 +443,32 @@ +@@ -439,6 +447,30 @@ #define LJ_ARCH_VERSION 10 #endif @@ -2522,7 +2605,7 @@ index bddd757..453414f 100644 +#define LJ_ARCH_ENDIAN LUAJIT_LE /* Forget about BE for now */ +#define LJ_TARGET_RISCV64 1 +#define LJ_TARGET_GC64 1 -+#define LJ_TARGET_EHRETREG 0 // TODO ++#define LJ_TARGET_EHRETREG 10 +#define LJ_TARGET_EHRAREG 1 +#define LJ_TARGET_JUMPRANGE 30 /* JAL +-2^20 = +-1MB,\ + AUIPC+JALR +-2^31 = +-2GB, leave 1 bit to avoid AUIPC corner case */ @@ -2530,8 +2613,6 @@ index bddd757..453414f 100644 +#define LJ_TARGET_MASKROT 1 +#define LJ_TARGET_UNIFYROT 2 /* Want only IR_BROR, no ROLI */ +#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL -+// for now -+#define LUAJIT_NO_UNWIND 1 + +#else +#error "No support for RISC-V 64 Soft-float/Single-float" @@ -2540,9 +2621,9 @@ index bddd757..453414f 100644 #else #error "No target architecture defined" #endif -@@ -518,6 +552,13 @@ - /* MIPS32ON64 aka n32 ABI support might be desirable, but difficult. */ +@@ -531,6 +563,13 @@ #error "Only n64 ABI supported for MIPS64" + #undef LJ_TARGET_MIPS #endif +#elif LJ_TARGET_RISCV +#if !defined(__riscv_float_abi_double) @@ -2554,11 +2635,11 @@ index bddd757..453414f 100644 #endif #endif -diff --git a/src/lj_asm.c b/src/lj_asm.c -index 6f5e0c4..d881ea1 100644 ---- a/src/lj_asm.c -+++ b/src/lj_asm.c -@@ -185,6 +185,8 @@ IRFLDEF(FLOFS) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_asm.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm.c +@@ -227,6 +227,8 @@ static Reg rset_pickrandom(ASMState *as, #include "lj_emit_ppc.h" #elif LJ_TARGET_MIPS #include "lj_emit_mips.h" @@ -2567,7 +2648,7 @@ index 6f5e0c4..d881ea1 100644 #else #error "Missing instruction emitter for target CPU" #endif -@@ -1662,6 +1664,8 @@ static void asm_loop(ASMState *as) +@@ -1708,6 +1710,8 @@ static void asm_loop(ASMState *as) #include "lj_asm_ppc.h" #elif LJ_TARGET_MIPS #include "lj_asm_mips.h" @@ -2576,12 +2657,11 @@ index 6f5e0c4..d881ea1 100644 #else #error "Missing assembler for target CPU" #endif -diff --git a/src/lj_asm_riscv64.h b/src/lj_asm_riscv64.h -new file mode 100644 -index 0000000..e6d68e1 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm_riscv64.h +=================================================================== --- /dev/null -+++ b/src/lj_asm_riscv64.h -@@ -0,0 +1,1969 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_asm_riscv64.h +@@ -0,0 +1,1976 @@ +/* +** RISC-V IR assembler (SSA IR -> machine code). +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h @@ -2863,16 +2943,18 @@ index 0000000..e6d68e1 +{ + IRRef lref = ir->op1, rref = ir->op2; + IRIns *irm; -+ if (lref != rref && ++ if ((as->flags & JIT_F_OPT_FMA) && ++ lref != rref && + ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) && + ra_noreg(irm->r)) || + (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) && + (rref = lref, riscvi = riscvir, ra_noreg(irm->r))))) { + Reg dest = ra_dest(as, ir, RSET_FPR); + Reg add = ra_hintalloc(as, rref, dest, RSET_FPR); -+ Reg left = ra_alloc2(as, irm, rset_exclude(rset_exclude(RSET_FPR, dest), add)); ++ Reg left = ra_alloc2(as, irm, ++ rset_exclude(rset_exclude(RSET_FPR, dest), add)); + Reg right = (left >> 8); left &= 255; -+ emit_ds1s2s3(as, riscvi, (dest & 0x1f), (left & 0x1f), (right & 0x1f), (add & 0x1f)); ++ emit_ds1s2s3(as, riscvi, dest, left, right, add); + return 1; + } + return 0; @@ -3163,7 +3245,7 @@ index 0000000..e6d68e1 + asm_gencall(as, ci, args); + /* Store the result to the spill slot or temp slots. */ + Reg tmp = ra_releasetmp(as, ASMREF_TMP1); -+ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, ofs); ++ emit_opk(as, RISCVI_ADDI, tmp, RID_SP, tmp, ofs); +} + +/* -- Memory references --------------------------------------------------- */ @@ -3210,7 +3292,7 @@ index 0000000..e6d68e1 + } + } + /* g->tmptv holds the TValue(s). */ -+ emit_opk(as, RISCVI_ADDI, dest, RID_GL, offsetof(global_State, tmptv)); ++ emit_opk(as, RISCVI_ADDI, dest, RID_GL, dest, offsetof(global_State, tmptv)); +} + +static void asm_aref(ASMState *as, IRIns *ir) @@ -3230,7 +3312,7 @@ index 0000000..e6d68e1 + } + base = ra_alloc1(as, ir->op1, RSET_GPR); + idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); -+ emit_sh3add(as, dest, base, idx); ++ emit_sh3add(as, dest, base, idx, RID_TMP); +} + +/* Inlined hash lookup. Specialized for key type and for const keys. @@ -3347,12 +3429,12 @@ index 0000000..e6d68e1 + emit_lso(as, RISCVI_LW, tmp1, key, (int32_t)offsetof(GCstr, sid)); + } else { /* Must match with hash*() in lj_tab.c. */ + emit_ds1s2(as, RISCVI_SUBW, tmp1, tmp1, tmp2); -+ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, (-HASH_ROT3)&0x1f); ++ emit_roti(as, RISCVI_RORIW, tmp2, tmp2, dest, (-HASH_ROT3)&0x1f); + emit_ds1s2(as, RISCVI_XOR, tmp1, tmp1, tmp2); -+ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, (-HASH_ROT2-HASH_ROT1)&0x1f); ++ emit_roti(as, RISCVI_RORIW, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&0x1f); + emit_ds1s2(as, RISCVI_SUBW, tmp2, tmp2, dest); + emit_ds1s2(as, RISCVI_XOR, tmp2, tmp2, tmp1); -+ emit_roti(as, RISCVI_RORIW, dest, tmp1, (-HASH_ROT1)&0x1f); ++ emit_roti(as, RISCVI_RORIW, dest, tmp1, RID_TMP, (-HASH_ROT1)&0x1f); + if (irt_isnum(kt)) { + emit_dsshamt(as, RISCVI_SLLIW, tmp1, tmp1, 1); + emit_dsshamt(as, RISCVI_SRAI, tmp1, tmp1, 32); // hi @@ -3379,7 +3461,6 @@ index 0000000..e6d68e1 + Reg node = ra_alloc1(as, ir->op1, RSET_GPR); + RegSet allow = rset_exclude(RSET_GPR, node); + Reg idx = node; -+ Reg key = ra_scratch(as, allow); + int64_t k; + lj_assertA(ofs % sizeof(Node) == 0, "unaligned HREFK slot"); + if (bigofs) { @@ -3397,9 +3478,8 @@ index 0000000..e6d68e1 + } else { + k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); + } -+ key = ra_scratch(as, allow); -+ asm_guard(as, RISCVI_BNE, key, ra_allock(as, k, allow)); -+ emit_lso(as, RISCVI_LD, key, idx, kofs); ++ asm_guard(as, RISCVI_BNE, RID_TMP, ra_allock(as, k, allow)); ++ emit_lso(as, RISCVI_LD, RID_TMP, idx, kofs); + if (bigofs) + emit_ds1s2(as, RISCVI_ADD, dest, node, ra_allock(as, ofs, allow)); +} @@ -3407,22 +3487,29 @@ index 0000000..e6d68e1 +static void asm_uref(ASMState *as, IRIns *ir) +{ + Reg dest = ra_dest(as, ir, RSET_GPR); -+ if (irref_isk(ir->op1)) { ++ int guarded = (irt_t(ir->t) & (IRT_GUARD|IRT_TYPE)) == (IRT_GUARD|IRT_PGC); ++ if (irref_isk(ir->op1) && !guarded) { + GCfunc *fn = ir_kfunc(IR(ir->op1)); + MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; + emit_lsptr(as, RISCVI_LD, dest, v, RSET_GPR); + } else { -+ Reg uv = ra_scratch(as, RSET_GPR); -+ Reg func = ra_alloc1(as, ir->op1, RSET_GPR); -+ if (ir->o == IR_UREFC) { -+ asm_guard(as, RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_dsi(as, RISCVI_ADDI, dest, uv, (int32_t)offsetof(GCupval, tv)); -+ emit_lso(as, RISCVI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed)); ++ if (guarded) ++ asm_guard(as, ir->o == IR_UREFC ? RISCVI_BEQ : RISCVI_BNE, RID_TMP, RID_ZERO); ++ if (ir->o == IR_UREFC) ++ emit_dsi(as, RISCVI_ADDI, dest, dest, (int32_t)offsetof(GCupval, tv)); ++ else ++ emit_lso(as, RISCVI_LD, dest, dest, (int32_t)offsetof(GCupval, v)); ++ if (guarded) ++ emit_lso(as, RISCVI_LBU, RID_TMP, dest, (int32_t)offsetof(GCupval, closed)); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ GCobj *o = gcref(fn->l.uvptr[(ir->op2 >> 8)]); ++ emit_loada(as, dest, o); + } else { -+ emit_lso(as, RISCVI_LD, dest, uv, (int32_t)offsetof(GCupval, v)); ++ emit_lso(as, RISCVI_LD, dest, ra_alloc1(as, ir->op1, RSET_GPR), ++ (int32_t)offsetof(GCfuncL, uvptr) + ++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } -+ emit_lso(as, RISCVI_LD, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + -+ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); + } +} + @@ -3494,9 +3581,9 @@ index 0000000..e6d68e1 + } + } + ofs = field_ofs[ir->op2]; ++ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + } + rset_clear(allow, idx); -+ lj_assertA(!irt_isfp(ir->t), "bad FP FLOAD"); + emit_lso(as, riscvi, dest, idx, ofs); +} + @@ -3907,8 +3994,7 @@ index 0000000..e6d68e1 + if (ir->o == IR_SUBOV) k = (int)(~(unsigned int)k+1u); + if (checki12(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ + left = ra_alloc1(as, ir->op1, RSET_GPR); -+ asm_guard(as, k >= 0 ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_ds1s2(as, RISCVI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left); ++ asm_guard(as, k >= 0 ? RISCVI_BLT : RISCVI_BGE, dest, dest == left ? RID_TMP : left); + emit_dsi(as, RISCVI_ADDI, dest, left, k); + if (dest == left) emit_mv(as, RID_TMP, left); + return; @@ -3962,7 +4048,7 @@ index 0000000..e6d68e1 +{ + Reg dest = ra_dest(as, ir, RSET_GPR); + Reg left = ra_alloc1(as, ir->op1, RSET_GPR); -+ RegSet allow = rset_exclude(RSET_GPR, dest); ++ RegSet allow = rset_exclude(rset_exclude(RSET_GPR, dest), left); + if (as->flags & JIT_F_RVZbb) { + if (!irt_is64(ir->t)) + emit_dsshamt(as, RISCVI_SRAI, dest, dest, 32); @@ -4066,7 +4152,7 @@ index 0000000..e6d68e1 + emit_dsshamt(as, riscvik, dest, left, shift); + break; + case RISCVI_RORI: case RISCVI_RORIW: -+ emit_roti(as, riscvik, dest, left, shift); ++ emit_roti(as, riscvik, dest, left, RID_TMP, shift); + break; + default: + lj_assertA(0, "bad shift instruction"); @@ -4081,7 +4167,7 @@ index 0000000..e6d68e1 + break; + case RISCVI_ROR: case RISCVI_ROL: + case RISCVI_RORW: case RISCVI_ROLW: -+ emit_rot(as, riscvi, dest, left, right); ++ emit_rot(as, riscvi, dest, left, right, RID_TMP); + break; + default: + lj_assertA(0, "bad shift instruction"); @@ -4132,10 +4218,12 @@ index 0000000..e6d68e1 + } else { + emit_ds1s2(as, RISCVI_OR, dest, dest, RID_TMP); + if (dest != right) { -+ emit_andn(as, RID_TMP, right, RID_TMP, RID_TMP); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, right, RID_TMP); ++ emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP); + emit_ds1s2(as, RISCVI_AND, dest, left, RID_TMP); + } else { -+ emit_andn(as, RID_TMP, left, RID_TMP, RID_TMP); ++ emit_ds1s2(as, RISCVI_AND, RID_TMP, left, RID_TMP); ++ emit_ds(as, RISCVI_NOT, RID_TMP, RID_TMP); + emit_ds1s2(as, RISCVI_AND, dest, right, RID_TMP); + } + emit_dsi(as, RISCVI_ADDI, RID_TMP, RID_TMP, -1); @@ -4208,9 +4296,8 @@ index 0000000..e6d68e1 + } + } + right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); -+ asm_guard(as, ((op^(op>>1))&1) ? RISCVI_BNE : RISCVI_BEQ, RID_TMP, RID_ZERO); -+ emit_ds1s2(as, (op&4) ? RISCVI_SLTU : RISCVI_SLT, -+ RID_TMP, (op&2) ? right : left, (op&2) ? left : right); ++ asm_guard(as, ((op&4) ? RISCVI_BGEU : RISCVI_BGE) ^ RISCVF_FUNCT3((op^(op>>1))&1), ++ (op&2) ? right : left, (op&2) ? left : right); +} + +static void asm_comp(ASMState *as, IRIns *ir) @@ -4412,7 +4499,7 @@ index 0000000..e6d68e1 +} + +/* Coalesce BASE register for a side trace. */ -+static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) ++static Reg asm_head_side_base(ASMState *as, IRIns *irp) +{ + IRIns *ir = IR(REF_BASE); + Reg r = ir->r; @@ -4421,15 +4508,15 @@ index 0000000..e6d68e1 + if (rset_test(as->modset, r) || irt_ismarked(ir->t)) + ir->r = RID_INIT; /* No inheritance for modified BASE register. */ + if (irp->r == r) { -+ rset_clear(allow, r); /* Mark same BASE register as coalesced. */ ++ return r; /* Same BASE register already coalesced. */ + } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { -+ rset_clear(allow, irp->r); + emit_mv(as, r, irp->r); /* Move from coalesced parent reg. */ ++ return irp->r; + } else { + emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ + } + } -+ return allow; ++ return RID_NONE; +} + +/* -- Tail of trace ------------------------------------------------------- */ @@ -4489,7 +4576,7 @@ index 0000000..e6d68e1 + } + if (nslots > as->evenspill) /* Leave room for args in stack slots. */ + as->evenspill = nslots; -+ return REGSP_HINT(RID_RET); ++ return REGSP_HINT(irt_isfp(ir->t) ? RID_FPRET : RID_RET); +} + +static void asm_setup_target(ASMState *as) @@ -4544,18 +4631,18 @@ index 0000000..e6d68e1 + lj_assertJ(checki32(delta), "jump target out of range"); + p[0] = RISCVI_AUIPC | RISCVF_D(RID_TMP) | RISCVF_IMMU(RISCVF_HI(delta)); + p[1] = RISCVI_JALR | RISCVF_S1(RID_TMP) | RISCVF_IMMI(RISCVF_LO(delta)); -+ if (!cstart) cstart = p + 2; ++ if (!cstart) cstart = p; + } + } + } + if (cstart) lj_mcode_sync(cstart, px+1); + lj_mcode_patch(J, mcarea, 1); +} -diff --git a/src/lj_ccall.c b/src/lj_ccall.c -index 25f54de..e108555 100644 ---- a/src/lj_ccall.c -+++ b/src/lj_ccall.c -@@ -574,6 +574,97 @@ +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_ccall.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.c +@@ -575,6 +575,97 @@ goto done; \ } @@ -4653,7 +4740,7 @@ index 25f54de..e108555 100644 #else #error "Missing calling convention definitions for this architecture" #endif -@@ -889,6 +980,51 @@ static void ccall_copy_struct(CCallState *cc, CType *ctr, void *dp, void *sp, +@@ -891,6 +982,51 @@ static void ccall_copy_struct(CCallState #endif @@ -4705,7 +4792,7 @@ index 25f54de..e108555 100644 /* -- Common C call handling ---------------------------------------------- */ /* Infer the destination CTypeID for a vararg argument. */ -@@ -935,6 +1071,10 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, +@@ -937,6 +1073,10 @@ static int ccall_set_args(lua_State *L, #endif #endif @@ -4716,7 +4803,7 @@ index 25f54de..e108555 100644 /* Clear unused regs to get some determinism in case of misdeclaration. */ memset(cc->gpr, 0, sizeof(cc->gpr)); #if CCALL_NUM_FPR -@@ -1060,7 +1200,11 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, +@@ -1077,7 +1217,11 @@ static int ccall_set_args(lua_State *L, if (isfp && d->size == sizeof(float)) ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ #endif @@ -4729,7 +4816,7 @@ index 25f54de..e108555 100644 if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) #if LJ_TARGET_MIPS64 || (isfp && nsp == 0) -@@ -1090,13 +1234,21 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, +@@ -1107,6 +1251,14 @@ static int ccall_set_args(lua_State *L, CTSize i = (sz >> 2) - 1; do { ((uint64_t *)dp)[i] = ((uint32_t *)dp)[i]; } while (i--); } @@ -4744,19 +4831,20 @@ index 25f54de..e108555 100644 #else UNUSED(isfp); #endif - } - if (fid) lj_err_caller(L, LJ_ERR_FFI_NUMARG); /* Too few arguments. */ +@@ -1116,7 +1268,7 @@ static int ccall_set_args(lua_State *L, + if ((int32_t)nsp < 0) nsp = 0; + #endif -#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) +#if LJ_TARGET_X64 || (LJ_TARGET_PPC && !LJ_ABI_SOFTFP) || LJ_TARGET_RISCV64 cc->nfpr = nfpr; /* Required for vararg functions. */ #endif - cc->nsp = nsp; -diff --git a/src/lj_ccall.h b/src/lj_ccall.h -index 0b3c524..aa51d5f 100644 ---- a/src/lj_ccall.h -+++ b/src/lj_ccall.h -@@ -126,6 +126,21 @@ typedef union FPRArg { + cc->nsp = (nsp + CTSIZE_PTR-1) & ~(CTSIZE_PTR-1); +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_ccall.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccall.h +@@ -129,6 +129,21 @@ typedef union FPRArg { struct { LJ_ENDIAN_LOHI(float f; , float g;) }; } FPRArg; @@ -4778,7 +4866,7 @@ index 0b3c524..aa51d5f 100644 #else #error "Missing calling convention definitions for this architecture" #endif -@@ -168,7 +183,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { +@@ -175,7 +190,7 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) uint8_t resx87; /* Result on x87 stack: 1:float, 2:double. */ #elif LJ_TARGET_ARM64 void *retp; /* Aggregate return pointer in x8. */ @@ -4787,11 +4875,11 @@ index 0b3c524..aa51d5f 100644 uint8_t nfpr; /* Number of arguments in FPRs. */ #endif #if LJ_32 -diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c -index 43e4430..94a9207 100644 ---- a/src/lj_ccallback.c -+++ b/src/lj_ccallback.c -@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccallback.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_ccallback.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_ccallback.c +@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs #define CALLBACK_MCODE_HEAD 52 @@ -4802,7 +4890,7 @@ index 43e4430..94a9207 100644 #else /* Missing support for this architecture. */ -@@ -238,6 +242,39 @@ static void *callback_mcode_init(global_State *g, uint32_t *page) +@@ -238,6 +242,39 @@ static void *callback_mcode_init(global_ } return p; } @@ -4842,14 +4930,10 @@ index 43e4430..94a9207 100644 #else /* Missing support for this architecture. */ #define callback_mcode_init(g, p) (p) -@@ -512,6 +549,31 @@ void lj_ccallback_mcode_free(CTState *cts) - } - #endif +@@ -516,6 +553,31 @@ void lj_ccallback_mcode_free(CTState *ct + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; -+#define CALLBACK_HANDLE_RET \ -+ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ -+ ((float *)dp)[1] = *(float *)dp; -+ +#elif LJ_TARGET_RISCV64 + +#define CALLBACK_HANDLE_REGARG \ @@ -4871,10 +4955,14 @@ index 43e4430..94a9207 100644 + } \ + } + - #define CALLBACK_HANDLE_RET \ - if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ - ((float *)dp)[1] = *(float *)dp; -@@ -662,7 +724,7 @@ static void callback_conv_result(CTState *cts, lua_State *L, TValue *o) ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ((float *)dp)[1] = *(float *)dp; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -662,7 +724,7 @@ static void callback_conv_result(CTState *(int32_t *)dp = ctr->size == 1 ? (int32_t)*(int8_t *)dp : (int32_t)*(int16_t *)dp; } @@ -4883,97 +4971,16 @@ index 43e4430..94a9207 100644 /* Always sign-extend results to 64 bits. Even a soft-fp 'float'. */ if (ctr->size <= 4 && (LJ_ABI_SOFTFP || ctype_isinteger_or_bool(ctr->info))) -diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c -index ded382a..32688a0 100644 ---- a/src/lj_dispatch.c -+++ b/src/lj_dispatch.c -@@ -56,6 +56,15 @@ static const ASMFunction dispatch_got[] = { - #undef GOTFUNC - #endif - -+#if LJ_TARGET_RISCV64 -+#include -+#define GOTFUNC(name) (ASMFunction)name, -+static const ASMFunction dispatch_got[] = { -+ GOTDEF(GOTFUNC) -+}; -+#undef GOTFUNC -+#endif -+ - /* Initialize instruction dispatch table and hot counters. */ - void lj_dispatch_init(GG_State *GG) - { -@@ -76,7 +85,7 @@ void lj_dispatch_init(GG_State *GG) - GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); - for (i = 0; i < GG_NUM_ASMFF; i++) - GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); --#if LJ_TARGET_MIPS -+#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 - memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); - #endif - } -diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h -index 52762ee..51f10ed 100644 ---- a/src/lj_dispatch.h -+++ b/src/lj_dispatch.h -@@ -66,6 +66,35 @@ GOTDEF(GOTENUM) - }; - #endif - -+#if LJ_TARGET_RISCV64 -+/* Need our own global offset table to wrap RISC-V PIC intern / extern calls */ -+ -+#if LJ_HASJIT -+#define JITGOTDEF(_) _(lj_err_trace) _(lj_trace_exit) _(lj_trace_hot) -+#else -+#define JITGOTDEF(_) -+#endif -+#if LJ_HASFFI -+#define FFIGOTDEF(_) \ -+ _(lj_meta_equal_cd) _(lj_ccallback_enter) _(lj_ccallback_leave) -+#else -+#define FFIGOTDEF(_) -+#endif -+ -+#define GOTDEF(_) \ -+ _(floor) _(ceil) _(trunc) _(log) _(log10) _(exp) _(sin) _(cos) _(tan) \ -+ _(asin) _(acos) _(atan) _(sinh) _(cosh) _(tanh) _(frexp) _(modf) _(atan2) \ -+ _(pow) _(fmod) _(ldexp) \ -+ JITGOTDEF(_) FFIGOTDEF(_) -+ -+enum { -+#define GOTENUM(name) LJ_GOT_##name, -+GOTDEF(GOTENUM) -+#undef GOTENUM -+ LJ_GOT__MAX -+}; -+#endif -+ - /* Type of hot counter. Must match the code in the assembler VM. */ - /* 16 bits are sufficient. Only 0.0015% overhead with maximum slot penalty. */ - typedef uint16_t HotCount; -@@ -93,7 +122,7 @@ typedef struct GG_State { - /* Make g reachable via K12 encoded DISPATCH-relative addressing. */ - uint8_t align1[(16-sizeof(global_State))&15]; - #endif --#if LJ_TARGET_MIPS -+#if LJ_TARGET_MIPS || LJ_TARGET_RISCV64 - ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ - #endif - #if LJ_HASJIT -diff --git a/src/lj_emit_riscv.h b/src/lj_emit_riscv.h -new file mode 100644 -index 0000000..c7273d5 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_emit_riscv.h +=================================================================== --- /dev/null -+++ b/src/lj_emit_riscv.h -@@ -0,0 +1,516 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_emit_riscv.h +@@ -0,0 +1,519 @@ +/* +** RISC-V instruction emitter. +** Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h +*/ + -+#include "lj_target.h" -+#include +static intptr_t get_k64val(ASMState *as, IRRef ref) +{ + IRIns *ir = IR(ref); @@ -5068,7 +5075,8 @@ index 0000000..c7273d5 + } +} + -+static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, int32_t shamt) ++static void emit_roti(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg tmp, ++ int32_t shamt) +{ + if (as->flags & JIT_F_RVZbb || as->flags & JIT_F_RVXThead) { + if (as->flags & JIT_F_RVXThead) switch (riscvi) { @@ -5093,13 +5101,13 @@ index 0000000..c7273d5 + lj_assertA(0, "invalid roti op"); + return; + } -+ emit_ds1s2(as, RISCVI_OR, rd, rd, RID_TMP); ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_dsshamt(as, bi, rd, rs1, (shwid - shamt)&shmsk); -+ emit_dsshamt(as, ai, RID_TMP, rs1, shamt&shmsk); ++ emit_dsshamt(as, ai, tmp, rs1, shamt&shmsk); + } +} + -+static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2) ++static void emit_rot(ASMState *as, RISCVIns riscvi, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { + emit_ds1s2(as, riscvi, rd, rs1, rs2); @@ -5123,15 +5131,15 @@ index 0000000..c7273d5 + return; + } + if (rd == rs2) { -+ emit_ds1s2(as, RISCVI_OR, rd, rd, RID_TMP); -+ emit_ds1s2(as, sbi, RID_TMP, rs1, RID_TMP); ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); -+ emit_ds2(as, RISCVI_NEG, RID_TMP, rs2); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); + } else { -+ emit_ds1s2(as, RISCVI_OR, rd, rd, RID_TMP); ++ emit_ds1s2(as, RISCVI_OR, rd, rd, tmp); + emit_ds1s2(as, sai, rd, rs1, rs2); -+ emit_ds1s2(as, sbi, RID_TMP, rs1, RID_TMP); -+ emit_ds2(as, RISCVI_NEG, RID_TMP, rs2); ++ emit_ds1s2(as, sbi, tmp, rs1, tmp); ++ emit_ds2(as, RISCVI_NEG, tmp, rs2); + } + } +} @@ -5209,6 +5217,7 @@ index 0000000..c7273d5 + } +} + ++/* +static void emit_andn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { @@ -5218,7 +5227,9 @@ index 0000000..c7273d5 + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} ++*/ + ++/* +static void emit_orn(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp) +{ + if (as->flags & JIT_F_RVZbb) { @@ -5228,6 +5239,7 @@ index 0000000..c7273d5 + emit_ds(as, RISCVI_NOT, tmp, rs2); + } +} ++*/ + +static void emit_xnor(ASMState *as, Reg rd, Reg rs1, Reg rs2) +{ @@ -5239,7 +5251,7 @@ index 0000000..c7273d5 + } +} + -+static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, unsigned int shamt) ++static void emit_shxadd(ASMState *as, Reg rd, Reg rs1, Reg rs2, Reg tmp, unsigned int shamt) +{ + if (as->flags & JIT_F_RVZba) { + switch (shamt) { @@ -5251,14 +5263,14 @@ index 0000000..c7273d5 + } else if (as->flags & JIT_F_RVXThead) { + emit_dsi(as, RISCVI_TH_ADDSL|RISCVF_IMMI(shamt<<5), rd, rs1, rs2); + } else { -+ emit_ds1s2(as, RISCVI_ADD, rd, rs1, RID_TMP); -+ emit_dsshamt(as, RISCVI_SLLI, RID_TMP, rs2, 3); ++ emit_ds1s2(as, RISCVI_ADD, rd, rs1, tmp); ++ emit_dsshamt(as, RISCVI_SLLI, tmp, rs2, shamt); + } +} + -+#define emit_sh1add(as, rd, rs1, rs2) emit_shxadd(as, rd, rs1, rs2, 1) -+#define emit_sh2add(as, rd, rs1, rs2) emit_shxadd(as, rd, rs1, rs2, 2) -+#define emit_sh3add(as, rd, rs1, rs2) emit_shxadd(as, rd, rs1, rs2, 3) ++#define emit_sh1add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 1) ++#define emit_sh2add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 2) ++#define emit_sh3add(as, rd, rs1, rs2, tmp) emit_shxadd(as, rd, rs1, rs2, tmp, 3) + +static void emit_loadk12(ASMState *as, Reg rd, int32_t i) +{ @@ -5440,7 +5452,7 @@ index 0000000..c7273d5 + +/* Emit an arithmetic operation with a constant operand. */ +static void emit_opk(ASMState *as, RISCVIns riscvi, Reg dest, Reg src, -+ intptr_t k) ++ Reg tmp, intptr_t k) +{ + if (checki12(k)) emit_dsi(as, riscvi, dest, src, k); + else { @@ -5451,8 +5463,8 @@ index 0000000..c7273d5 + case RISCVI_ANDI: riscvi = RISCVI_AND; break; + default: lj_assertA(0, "NYI arithmetic RISCVIns"); return; + } -+ emit_ds1s2(as, riscvi, dest, src, RID_TMP); -+ emit_loadu64(as, RID_TMP, (uintptr_t)k); ++ emit_ds1s2(as, riscvi, dest, src, tmp); ++ emit_loadu64(as, tmp, (uintptr_t)k); + } +} + @@ -5478,16 +5490,16 @@ index 0000000..c7273d5 +static void emit_addptr(ASMState *as, Reg r, int32_t ofs) +{ + if (ofs) -+ emit_opk(as, RISCVI_ADDI, r, r, ofs); ++ emit_opk(as, RISCVI_ADDI, r, r, RID_TMP, ofs); +} + + +#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) -diff --git a/src/lj_frame.h b/src/lj_frame.h -index aa1dc11..dace63d 100644 ---- a/src/lj_frame.h -+++ b/src/lj_frame.h -@@ -264,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_frame.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_frame.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_frame.h +@@ -264,6 +264,15 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CAL #endif #define CFRAME_OFS_MULTRES 0 #define CFRAME_SHIFT_MULTRES 3 @@ -5503,10 +5515,50 @@ index aa1dc11..dace63d 100644 #else #error "Missing CFRAME_* definitions for this architecture" #endif -diff --git a/src/lj_jit.h b/src/lj_jit.h -index 7f08173..5f91644 100644 ---- a/src/lj_jit.h -+++ b/src/lj_jit.h +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_gdbjit.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_gdbjit.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_gdbjit.c +@@ -306,6 +306,9 @@ enum { + #elif LJ_TARGET_MIPS + DW_REG_SP = 29, + DW_REG_RA = 31, ++#elif LJ_TARGET_RISCV64 ++ DW_REG_SP = 2, ++ DW_REG_RA = 1, + #else + #error "Unsupported target architecture" + #endif +@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = + .machine = 20, + #elif LJ_TARGET_MIPS + .machine = 8, ++#elif LJ_TARGET_RISCV64 ++ .machine = 243, + #else + #error "Unsupported target architecture" + #endif +@@ -591,6 +596,16 @@ static void LJ_FASTCALL gdbjit_ehframe(G + for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } + for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } + } ++#elif LJ_TARGET_RISCV64 ++ { ++ int i; ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|i); DUV(27-i+7); } ++ DB(DW_CFA_offset|9); DUV(17); ++ DB(DW_CFA_offset|8); DUV(18); ++ for (i = 27; i >= 18; i--) { DB(DW_CFA_offset|32|i); DUV(27-i+19); } ++ DB(DW_CFA_offset|32|9); DUV(29); ++ DB(DW_CFA_offset|32|8); DUV(30); ++ } + #else + #error "Unsupported target architecture" + #endif +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_jit.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_jit.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_jit.h @@ -67,6 +67,15 @@ #endif #endif @@ -5523,10 +5575,45 @@ index 7f08173..5f91644 100644 #else #define JIT_F_CPUSTRING "" -diff --git a/src/lj_target.h b/src/lj_target.h -index 1971692..5423a2e 100644 ---- a/src/lj_target.h -+++ b/src/lj_target.h +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_mcode.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_mcode.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_mcode.c +@@ -38,6 +38,12 @@ + void sys_icache_invalidate(void *start, size_t len); + #endif + ++#if LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#include ++#include ++#include ++#endif ++ + /* Synchronize data/instruction cache. */ + void lj_mcode_sync(void *start, void *end) + { +@@ -52,6 +58,17 @@ void lj_mcode_sync(void *start, void *en + sys_icache_invalidate(start, (char *)end-(char *)start); + #elif LJ_TARGET_PPC + lj_vm_cachesync(start, end); ++#elif LJ_TARGET_RISCV64 && LJ_TARGET_LINUX ++#if (defined(__GNUC__) || defined(__clang__)) ++ __asm__ volatile("fence rw, rw"); ++#else ++ lj_vm_fence_rw_rw(); ++#endif ++#ifdef __GLIBC__ ++ __riscv_flush_icache(start, end, 0); ++#else ++ syscall(__NR_riscv_flush_icache, start, end, 0UL); ++#endif + #elif defined(__GNUC__) || defined(__clang__) + __clear_cache(start, end); + #else +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_target.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target.h @@ -55,7 +55,7 @@ typedef uint32_t RegSP; /* Bitset for registers. 32 registers suffice for most architectures. ** Note that one set holds bits for both GPRs and FPRs. @@ -5534,18 +5621,9 @@ index 1971692..5423a2e 100644 -#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 +#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 typedef uint64_t RegSet; - #else - typedef uint32_t RegSet; -@@ -69,7 +69,7 @@ typedef uint32_t RegSet; - #define rset_set(rs, r) (rs |= RID2RSET(r)) - #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) - #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) --#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 -+#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_RISCV64 - #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) - #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) - #else -@@ -144,6 +144,8 @@ typedef uint32_t RegCost; + #define RSET_BITS 6 + #define rset_picktop_(rs) ((Reg)lj_fls64(rs)) +@@ -143,6 +143,8 @@ typedef uint32_t RegCost; #include "lj_target_ppc.h" #elif LJ_TARGET_MIPS #include "lj_target_mips.h" @@ -5554,11 +5632,10 @@ index 1971692..5423a2e 100644 #else #error "Missing include for target CPU" #endif -diff --git a/src/lj_target_riscv.h b/src/lj_target_riscv.h -new file mode 100644 -index 0000000..5089493 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target_riscv.h +=================================================================== --- /dev/null -+++ b/src/lj_target_riscv.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_target_riscv.h @@ -0,0 +1,513 @@ +/* +** Definitions for RISC-V CPUs. @@ -6073,11 +6150,25 @@ index 0000000..5089493 +} RISCVRM; + +#endif -diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c -index 4fa79ae..842cfba 100644 ---- a/src/lj_vmmath.c -+++ b/src/lj_vmmath.c -@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double y, int op) +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vm.h +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_vm.h ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vm.h +@@ -37,6 +37,9 @@ LJ_ASMF int lj_vm_cpuid(uint32_t f, uint + #if LJ_TARGET_PPC + void lj_vm_cachesync(void *start, void *end); + #endif ++#if LJ_TARGET_RISCV64 ++void lj_vm_fence_rw_rw(); ++#endif + LJ_ASMF double lj_vm_foldarith(double x, double y, int op); + #if LJ_HASJIT + LJ_ASMF double lj_vm_foldfpm(double x, int op); +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vmmath.c +=================================================================== +--- luajit-5.1.2.1.0+git.1707061634.0d313b2.orig/src/lj_vmmath.c ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/lj_vmmath.c +@@ -69,7 +69,8 @@ double lj_vm_foldarith(double x, double /* -- Helper functions for generated machine code ------------------------- */ @@ -6087,17 +6178,16 @@ index 4fa79ae..842cfba 100644 int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) { uint32_t y, ua, ub; -diff --git a/src/vm_riscv64.dasc b/src/vm_riscv64.dasc -new file mode 100644 -index 0000000..c329123 +Index: luajit-5.1.2.1.0+git.1707061634.0d313b2/src/vm_riscv64.dasc +=================================================================== --- /dev/null -+++ b/src/vm_riscv64.dasc -@@ -0,0 +1,4677 @@ ++++ luajit-5.1.2.1.0+git.1707061634.0d313b2/src/vm_riscv64.dasc +@@ -0,0 +1,4866 @@ +|// Low-level VM code for RISC-V 64 CPUs. +|// Bytecode interpreter, fast functions and helper functions. -+|// Copyright (C) 2005-2022 Mike Pall. See Copyright Notice in luajit.h ++|// Copyright (C) 2005-2023 Mike Pall. See Copyright Notice in luajit.h +|// -+|// Contributed by Raymond Wong from PLCT Lab, ISCAS. ++|// Contributed by gns from PLCT Lab, ISCAS. +|// Sponsored by PLCT Lab, ISCAS. +| +|.arch riscv64 @@ -6524,19 +6614,19 @@ index 0000000..c329123 +|.endmacro +| +|// Assumes J is relative to GL. Some J members might be out of range though. -+#define GG_G2GOT (GG_OFS(got) - GG_OFS(g)) +#define GL_J(field) (GG_G2J + (int)offsetof(jit_State, field)) -+#define GL_GOT(name) (GG_G2GOT + sizeof(void*)*LJ_GOT_##name) +| +#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) +| -+|.macro load_got, func -+| ld CFUNCADDR, GL_GOT(func)(GL) ++|.macro call_intern, curfunc, func ++|->curfunc .. _pcrel_ .. func: ++| auipc CFUNCADDR, extern %pcrel_hi(func) ++| jalr CFUNCADDR, extern %pcrel_lo(lj_ .. curfunc .. _pcrel_ .. func) ++|.endmacro ++|.macro call_extern, func ++| call extern func ++| empty +|.endmacro -+|// JAL should be enough for *most* internal jumps. -+|.macro call_intern, func; jalr CFUNCADDR; .endmacro -+|.macro call_extern; jalr CFUNCADDR; .endmacro -+|.macro jmp_extern; jr CFUNCADDR; .endmacro +| +|// Set current VM state. Uses TMP0. +|.macro li_vmstate, st; li TMP0, ~LJ_VMST_..st; .endmacro @@ -6575,11 +6665,18 @@ index 0000000..c329123 +|.macro cleartp, dst, reg; slli dst, reg, 17; srli dst, dst, 17; .endmacro +| +|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. -+|.macro settp, dst, tp; -+| cleartp dst ++|.macro settp_a, dst; cleartp dst; .endmacro ++|.macro settp_a, dst, src; cleartp dst, src; .endmacro ++|.macro settp_b, dst, tp; +| slli x31, tp, 47 +| or dst, dst, x31 +|.endmacro ++|.macro settp_b, dst, src, tp; ++| slli x31, tp, 47 ++| or dst, src, x31 ++|.endmacro ++|.macro settp, dst, tp; settp_a dst; settp_b dst, tp; .endmacro ++|.macro settp, dst, src, tp; settp_a dst, src; settp_b dst, dst, tp; .endmacro +| +|// Extract (negative) type tag. +|.macro gettp, dst, src; srai dst, src, 47; .endmacro @@ -6719,7 +6816,7 @@ index 0000000..c329123 + | mv MULTRES, RD + | srliw CARG2, TMP2, 3 + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern vm_leave_unw, lj_state_growstack // (lua_State *L, int n) + | lw TMP2, SAVE_NRES(sp) + | ld BASE, L->top // Need the (realloced) L->top in BASE. + | mv RD, MULTRES @@ -6780,7 +6877,7 @@ index 0000000..c329123 + | // L->base = new base, L->top = top + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern vm_growstack_l, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | ld RC, L->top + | ld LFUNC:RB, FRAME_FUNC(BASE) @@ -6984,7 +7081,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) ++ | // (lua_State *L, TValue *o, TValue *k) ++ | call_intern vmeta_tgetv, lj_meta_tget + | // Returns TValue * (finished) or NULL (metamethod). + | beqz CRET1, >3 + | ld TMP0, 0(CRET1) @@ -7004,7 +7102,7 @@ index 0000000..c329123 + | j ->vm_call_dispatch_f + | + |->vmeta_tgetr: -+ | jal extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | call_intern vmeta_tgetr, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | mv TMP1, TISNIL + | bxeqz CRET1, ->BC_TGETR_Z @@ -7041,7 +7139,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) ++ | // (lua_State *L, TValue *o, TValue *k) ++ | call_intern vmeta_tsetv, lj_meta_tset + | // Returns TValue * (finished) or NULL (metamethod). + | ld TMP2, 0(RA) + | beqz CRET1, >3 @@ -7066,7 +7165,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) ++ | // (lua_State *L, GCtab *t, int32_t key) ++ | call_intern vmeta_tsetr, lj_tab_setinth + | // Returns TValue *. + | j ->BC_TSETR_Z + | @@ -7081,7 +7181,8 @@ index 0000000..c329123 + | mv CARG1, L + | decode_OP1 CARG4, INS + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) ++ | // (lua_State *L, TValue *o1, *o2, int op) ++ | call_intern vmeta_comp, lj_meta_comp + | // Returns 0/1 or TValue * (metamethod). + |3: + | sltiu TMP1, CRET1, 2 @@ -7129,7 +7230,8 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) ++ | // (lua_State *L, GCobj *o1, *o2, int ne) ++ | call_intern vmeta_equal, lj_meta_equal + | // Returns 0/1 or TValue * (metamethod). + | j <3 + | @@ -7140,7 +7242,7 @@ index 0000000..c329123 + | mv CARG2, INS + | sd BASE, L->base + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_equal_cd // (lua_State *L, BCIns op) ++ | call_intern vmeta_equal_cd, lj_meta_equal_cd // (lua_State *L, BCIns op) + | // Returns 0/1 or TValue * (metamethod). + | j <3 + |.endif @@ -7152,7 +7254,8 @@ index 0000000..c329123 + | srliw CARG2, RA, 3 + | srliw CARG3, RD, 3 + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) ++ | // (lua_State *L, TValue *o, BCReg tp) ++ | call_intern vmeta_istype, lj_meta_istype + | j ->cont_nop + | + |//-- Arithmetic metamethods --------------------------------------------- @@ -7168,7 +7271,8 @@ index 0000000..c329123 + | mv CARG3, RB + | mv CARG4, RC + | decode_OP1 CARG5, INS -+ | jal extern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | call_intern vmeta_arith, lj_meta_arith + | // Returns NULL (finished) or TValue * (metamethod). + | bxeqz CRET1, ->cont_nop + | @@ -7191,7 +7295,7 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | sd PC, SAVE_PC(sp) -+ | jal extern lj_meta_len // (lua_State *L, TValue *o) ++ | call_intern vmeta_len, lj_meta_len // (lua_State *L, TValue *o) + | // Returns NULL (retry) or TValue * (metamethod base). +#if LJ_52 + | bxnez CRET1, ->vmeta_binop // Binop call for compatibility. @@ -7211,7 +7315,8 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | add CARG3, BASE, RC + | mv MULTRES, NARGS8:RC -+ | jal extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | // (lua_State *L, TValue *func, TValue *top) ++ | call_intern vmeta_call, lj_meta_call + | ld LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. + | cleartp LFUNC:RB @@ -7225,7 +7330,8 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | add CARG3, RA, RC + | mv MULTRES, NARGS8:RC -+ | jal extern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | // (lua_State *L, TValue *func, TValue *top) ++ | call_intern vmeta_callt, lj_meta_call + | ld RB, FRAME_FUNC(RA) // Guaranteed to be a function here. + | ld TMP1, FRAME_PC(BASE) + | addi NARGS8:RC, MULTRES, 8 // Got one more argument now. @@ -7240,7 +7346,7 @@ index 0000000..c329123 + | mv CARG2, RA + | sd PC, SAVE_PC(sp) + | mv MULTRES, INS -+ | jal extern lj_meta_for // (lua_State *L, TValue *base) ++ | call_intern vmeta_for, lj_meta_for // (lua_State *L, TValue *base) + |.if JIT + | decode_OP1 TMP0, MULTRES + | li TMP1, BC_JFORI @@ -7330,9 +7436,8 @@ index 0000000..c329123 + | + |.ffunc_1 type + | gettp TMP0, CARG1 -+ | sltu TMP2, TISNUM, TMP0 + | not TMP3, TMP0 -+ | bnez TMP2, >1 ++ | bltu TISNUM, TMP0, >1 + | li TMP3, ~LJ_TISNUM + |1: + | slli TMP3, TMP3, 3 @@ -7375,8 +7480,7 @@ index 0000000..c329123 + | beq RC, TMP0, >5 + | bnez NODE:TMP2, <3 + |4: -+ | mv CARG1, RB -+ | settp CARG1, TMP3 ++ | settp CARG1, RB, TMP3 + | j ->fff_restv // Not found, keep default result. + |5: + | bxne CARG1, TISNIL, ->fff_restv @@ -7419,7 +7523,7 @@ index 0000000..c329123 + | addi CARG3, BASE, 8 + | bxnez TMP0, ->fff_fallback + | mv CARG1, L -+ | jal extern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) ++ | call_intern ff_rawget, lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) + | // Returns cTValue *. + | ld CARG1, 0(CRET1) + | j ->fff_restv @@ -7452,7 +7556,7 @@ index 0000000..c329123 + | ffgccheck + | mv CARG1, L + | mv CARG2, BASE -+ | jal extern lj_strfmt_number // (lua_State *L, cTValue *o) ++ | call_intern ff_tostring, lj_strfmt_number // (lua_State *L, cTValue *o) + | // Returns GCstr *. + | li TMP1, LJ_TSTR + |// ld BASE, L->base @@ -7468,7 +7572,7 @@ index 0000000..c329123 + | sd TISNIL, 0(TMP0) // Set missing 2nd arg to nil. + | addi CARG2, BASE, 8 + | addi CARG3, BASE, -16 -+ | jal extern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | call_intern ff_next, lj_tab_next // (GCtab *t, cTValue *key, TValue *o) + | // Returns 1=found, 0=end, -1=error. + |// addi RA, BASE, -16 + | li RD, (2+1)*8 @@ -7510,7 +7614,7 @@ index 0000000..c329123 + | sltu TMP3, TMP2, TMP0 + | addi RA, BASE, -16 + | zext.w TMP0, TMP2 -+ | settp TMP0, TISNUM ++ | settp_b TMP0, TISNUM + | sd TMP0, 0(RA) + | beqz TMP3, >2 // Not in array part? + | slli TMP3, TMP2, 3 @@ -7527,7 +7631,7 @@ index 0000000..c329123 + | li RD, (0+1)*8 + | bxeqz TMP0, ->fff_res + | mv CARG2, TMP2 -+ | jal extern lj_tab_getinth // (GCtab *t, int32_t key) ++ | call_intern ff_ipairs_aux, lj_tab_getinth // (GCtab *t, int32_t key) + | // Returns cTValue * or NULL. + | li RD, (0+1)*8 + | bxeqz CRET1, ->fff_res @@ -7555,6 +7659,9 @@ index 0000000..c329123 + |//-- Base library: catch errors ---------------------------------------- + | + |.ffunc pcall ++ | ld TMP1, L->maxstack ++ | add TMP2, BASE, NARGS8:RC ++ | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:RC, NARGS8:RC, -8 + | lbu TMP3, GL->hookmask + | mv TMP2, BASE @@ -7575,6 +7682,9 @@ index 0000000..c329123 + | j ->vm_call_dispatch + | + |.ffunc xpcall ++ | ld TMP1, L->maxstack ++ | add TMP2, BASE, NARGS8:RC ++ | bxltu TMP1, TMP2, ->fff_fallback + | addi NARGS8:TMP0, NARGS8:RC, -16 + | ld CARG1, 0(BASE) + | ld CARG2, 8(BASE) @@ -7707,13 +7817,19 @@ index 0000000..c329123 + |.else + | mv CARG1, L + | mv CARG2, L:RA -+ | jal extern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) ++ | // (lua_State *L, lua_State *co) ++ | call_intern ff_coroutine_wrap_aux, lj_ffh_coroutine_wrap_err + |.endif + | + |9: // Handle stack expansion on return from yield. + | mv CARG1, L + | srliw CARG2, RD, 3 -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | // (lua_State *L, int n) ++ |.if resume ++ | call_intern ff_coroutine_resume, lj_state_growstack ++ |.else ++ | call_intern ff_coroutine_wrap_aux, lj_state_growstack ++ |.endif + | mv CRET1, x0 + | j <4 + |.endmacro @@ -7803,15 +7919,13 @@ index 0000000..c329123 + | + |.macro math_extern, func + | .ffunc_n math_ .. func -+ | load_got func -+ | call_extern ++ | call_extern func + | j ->fff_resn + |.endmacro + | + |.macro math_extern2, func + | .ffunc_nn math_ .. func -+ | load_got func -+ | call_extern ++ | call_extern func + | j ->fff_resn + |.endmacro + | @@ -7827,9 +7941,8 @@ index 0000000..c329123 + | ld CARG1, 0(BASE) + | fld FARG1, 0(BASE) + | bxne NARGS8:RC, TMP1, ->fff_fallback // Need exactly 1 argument. -+ | load_got log + | checknum CARG1, ->fff_fallback -+ | call_extern ++ | call_extern log + | j ->fff_resn + | + | math_extern log10 @@ -7850,17 +7963,15 @@ index 0000000..c329123 + |.ffunc_2 math_ldexp + | checknum CARG1, ->fff_fallback + | checkint CARG2, ->fff_fallback -+ | load_got ldexp // (double x, int exp) + | fld FARG1, 0(BASE) + | lw CARG1, 8(BASE) -+ | call_extern ++ | call_extern ldexp // (double x, int exp) + | j ->fff_resn + | + |.ffunc_n math_frexp -+ | load_got frexp + | ld PC, FRAME_PC(BASE) + | addi CARG1, GL, offsetof(global_State, tmptv) -+ | call_extern ++ | call_extern frexp + | lw TMP1, GL->tmptv + | fcvt.d.w FARG2, TMP1 + | fsd FRET1, -16(BASE) @@ -7869,10 +7980,9 @@ index 0000000..c329123 + | j ->fff_res + | + |.ffunc_n math_modf -+ | load_got modf + | addi CARG1, BASE, -16 + | ld PC, FRAME_PC(BASE) -+ | call_extern ++ | call_extern modf + | fsd FRET1, -8(BASE) + | li RD, (2+1)*8 + | j ->fff_res @@ -7900,7 +8010,7 @@ index 0000000..c329123 + | or CARG1, CARG1, CARG2 + | addi RA, RA, 8 + | zext.w CARG1, CARG1 -+ | settp CARG1, TISNUM ++ | settp_b CARG1, TISNUM + | j <1 + |3: // Convert intermediate result to number and continue below. + | fcvt.d.w FARG1, CARG1 @@ -7945,7 +8055,7 @@ index 0000000..c329123 + | lbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). + | addiw RD, RD, 1 + | slliw RD, RD, 3 // RD = ((str->len != 0)+1)*8 -+ | settp TMP2, TISNUM ++ | settp_b TMP2, TISNUM + | sd TMP2, -16(BASE) + | j ->fff_res + | @@ -7968,7 +8078,8 @@ index 0000000..c329123 + | sd BASE, L->base + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_str_new // (lua_State *L, char *str, size_t l) ++ | // (lua_State *L, const char *str, size_t l) ++ | call_intern fff_newstr, lj_str_new + | // Returns GCstr *. + | ld BASE, L->base + |->fff_resstr: @@ -8035,9 +8146,9 @@ index 0000000..c329123 + | sd BASE, L->base + | sd TMP0, SBUF:CARG1->w + | sd PC, SAVE_PC(sp) -+ | jal extern lj_buf_putstr_ .. name ++ | call_intern ff_string_ .. name, lj_buf_putstr_ .. name + |// mv SBUF:CARG1, SBUF:CRET1 -+ | jal extern lj_buf_tostr ++ | call_intern ff_string_ .. name, lj_buf_tostr + | ld BASE, L->base + | j ->fff_resstr + |.endmacro @@ -8207,7 +8318,7 @@ index 0000000..c329123 + |5: // Grow stack for fallback handler. + | li CARG2, LUA_MINSTACK + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern vm_call_tail, lj_state_growstack // (lua_State *L, int n) + | ld BASE, L->base + | mv CRET1, x0 // Set zero-flag to force retry. + | j <1 @@ -8220,14 +8331,14 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) // Redundant (but a defined value). + | mv CARG1, L + | sd TMP0, L->top -+ | jal extern lj_gc_step // (lua_State *L) ++ | call_intern fff_gc_step, lj_gc_step // (lua_State *L) + | ld BASE, L->base -+ |// mv ra, MULTRES ++ | mv ra, MULTRES // Help return address predictor. + | ld TMP0, L->top + | ld CFUNC:RB, FRAME_FUNC(BASE) + | cleartp CFUNC:RB + | sub NARGS8:RC, TMP0, BASE -+ | jr MULTRES ++ | ret + | + |//----------------------------------------------------------------------- + |//-- Special dispatch targets ------------------------------------------- @@ -8275,7 +8386,7 @@ index 0000000..c329123 + | sd BASE, L->base + | mv CARG1, L + | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. -+ | jal extern lj_dispatch_ins // (lua_State *L, const BCIns *pc) ++ | call_intern vm_inshook, lj_dispatch_ins // (lua_State *L, const BCIns *pc) + |3: + | ld BASE, L->base + |4: // Re-dispatch to static ins. @@ -8307,7 +8418,7 @@ index 0000000..c329123 + | slli TMP1, TMP1, 3 + | add TMP1, BASE, TMP1 + | sd TMP1, L->top -+ | jal extern lj_trace_hot // (jit_State *J, const BCIns *pc) ++ | call_intern vm_hotloop, lj_trace_hot // (jit_State *J, const BCIns *pc) + | j <3 + |.endif + | @@ -8329,7 +8440,7 @@ index 0000000..c329123 + | sub RA, RA, BASE + | sd TMP0, L->top + | mv CARG1, L -+ | jal extern lj_dispatch_call // (lua_State *L, const BCIns *pc) ++ | call_intern vm_hotcall, lj_dispatch_call // (lua_State *L, const BCIns *pc) + | // Returns ASMFunction. + | ld BASE, L->base + | ld TMP0, L->top @@ -8380,7 +8491,8 @@ index 0000000..c329123 + | sd L, (offsetof(jit_State, L)-2047)(CARG2) + | sd BASE, L->base + | mv CARG2, PC -+ | jal extern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) ++ | // (jit_State *J, const BCIns *pc) ++ | call_intern cont_stitch, lj_dispatch_stitch + | ld BASE, L->base + | j ->cont_nop + | @@ -8396,7 +8508,8 @@ index 0000000..c329123 + | mv CARG2, PC + | sd BASE, L->base + | sw MULTRES, TMPD(sp) -+ | jal extern lj_dispatch_profile // (lua_State *L, const BCIns *pc) ++ | // (lua_State *L, const BCIns *pc) ++ | call_intern vm_profhook, lj_dispatch_profile + | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. + | addi PC, PC, -4 + | ld BASE, L->base @@ -8458,7 +8571,7 @@ index 0000000..c329123 + | sw TMP1, (offsetof(jit_State, exitno)-2047)(CARG2) // Store exit number. + | sd x0, GL->jit_base + | mv CARG2, sp -+ | jal extern lj_trace_exit // (jit_State *J, ExitState *ex) ++ | call_intern vm_exit_handler, lj_trace_exit // (jit_State *J, ExitState *ex) + | // Returns MULTRES (unscaled) or negated error code. + | ld TMP1, L->cframe + | ld BASE, L->base @@ -8476,7 +8589,8 @@ index 0000000..c329123 + | sd BASE, L->base + |1: + | ld LFUNC:RB, FRAME_FUNC(BASE) -+ | bltz CRET1, >9 // Check for error from exit. ++ | sltiu TMP0, CRET1, -LUA_ERRERR // Check for error from exit. ++ | beqz TMP0, >9 + | lui TMP3, 0x43380 // TOBIT = Hiword of 2^52 + 2^51 (double). + | slli MULTRES, CRET1, 3 + | cleartp LFUNC:RB @@ -8490,16 +8604,19 @@ index 0000000..c329123 + | fmv.d.x TOBIT, TMP3 + | // Modified copy of ins_next which handles function header dispatch, too. + | lw INS, 0(PC) -+ | addi PC, PC, 4 ++ | addi PC, PC, 4 ++ | addiw CRET1, CRET1, 17 // Static dispatch? + | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 + | sw TISNIL, GL->vmstate ++ | decode_RD8a RD, INS ++ | beqz CRET1, >5 + | decode_OP8 TMP1, INS -+ | sltiu TMP2, TMP1, BC_FUNCF*8 + | add TMP0, DISPATCH, TMP1 -+ | decode_RD8 RD, INS ++ | sltiu TMP2, TMP1, BC_FUNCF*8 + | ld TMP3, 0(TMP0) -+ | decode_RA8 RA, INS -+ | beqz TMP2, >2 ++ | decode_RA8 RA, INS ++ | beqz TMP2, >2 ++ | decode_RD8b RD + | jr TMP3 + |2: + | sltiu TMP2, TMP1, (BC_FUNCC+2)*8 // Fast function? @@ -8521,10 +8638,25 @@ index 0000000..c329123 + | add RA, RA, BASE + | jr TMP3 + | ++ |5: // Dispatch to static entry of original ins replaced by BC_JLOOP. ++ | ld TMP0, GL_J(trace)(GL) ++ | decode_RD8b RD ++ | add TMP0, TMP0, RD ++ | ld TRACE:TMP2, 0(TMP0) ++ | lw INS, TRACE:TMP2->startins ++ | decode_OP8 TMP1, INS ++ | add TMP0, DISPATCH, TMP1 ++ | decode_RD8a RD, INS ++ | ld TMP3, GG_DISP2STATIC(TMP0) ++ | decode_RA8a RA, INS ++ | decode_RD8b RD ++ | decode_RA8b RA ++ | jr TMP3 ++ | + |9: // Rethrow error from the right C frame. + | negw CARG2, CRET1 + | mv CARG1, L -+ | jal extern lj_err_trace // (lua_State *L, int errcode) ++ | call_intern vm_exit_interp, lj_err_trace // (lua_State *L, int errcode) + |.endif + | + |//----------------------------------------------------------------------- @@ -8613,6 +8745,13 @@ index 0000000..c329123 + |//-- Miscellaneous functions -------------------------------------------- + |//----------------------------------------------------------------------- + | ++ |// void lj_vm_fence_rw_rw() ++ |->vm_fence_rw_rw: ++ |.if JIT or FFI ++ | .long 0x0330000f ++ | ret ++ |.endif ++ | + |.define NEXT_TAB, TAB:CARG1 + |.define NEXT_IDX, CARG2 + |.define NEXT_ASIZE, CARG3 @@ -8688,7 +8827,6 @@ index 0000000..c329123 + | addxi DISPATCH, x7, GG_G2DISP + | srli x5, x5, 12 + | sw x5, CTSTATE->cb.slot -+ | load_got lj_ccallback_enter + | sd CARG1, CTSTATE->cb.gpr[0] + | fsd FARG1, CTSTATE->cb.fpr[0] + | sd CARG2, CTSTATE->cb.gpr[1] @@ -8710,7 +8848,7 @@ index 0000000..c329123 + | sd x0, SAVE_PC(sp) // Any value outside of bytecode is ok. + | mv CARG1, CTSTATE + | mv CARG2, sp -+ | call_intern lj_ccallback_enter // (CTState *cts, void *cf) ++ | call_intern vm_ffi_callback, lj_ccallback_enter // (CTState *cts, void *cf) + | // Returns lua_State *. + | ld BASE, L:CRET1->base + | ld RC, L:CRET1->top @@ -8730,14 +8868,14 @@ index 0000000..c329123 + | + |->cont_ffi_callback: // Return from FFI callback. + |.if FFI -+ | load_got lj_ccallback_leave + | ld CTSTATE, GL->ctype_state + | sd BASE, L->base + | sd RB, L->top + | sd L, CTSTATE->L + | mv CARG1, CTSTATE + | mv CARG2, RA -+ | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) ++ | // (CTState *cts, TValue *o) ++ | call_intern cont_ffi_callback, lj_ccallback_leave + | fld FRET1, CTSTATE->cb.fpr[0] + | ld CRET1, CTSTATE->cb.gpr[0] + | fld FRET2, CTSTATE->cb.fpr[1] @@ -8755,7 +8893,6 @@ index 0000000..c329123 + | mv TMP2, sp + | sub sp, sp, TMP1 + | sd ra, -8(TMP2) -+ | slliw CARG2, CARG2, 3 + | sd x18, -16(TMP2) + | sd CCSTATE, -24(TMP2) + | mv x18, TMP2 @@ -9206,7 +9343,7 @@ index 0000000..c329123 + | negw TMP0, TMP0 + | bxeq TMP0, TMP1, ->vmeta_unm // Meta handler deals with -2^31. + | zext.w TMP0, TMP0 -+ | settp TMP0, TISNUM ++ | settp_b TMP0, TISNUM + | j >2 + |1: + | sltiu TMP3, CARG3, LJ_TISNUM @@ -9226,9 +9363,9 @@ index 0000000..c329123 + | addi TMP2, TMP1, -LJ_TSTR + | cleartp STR:CARG1, TMP0 + | bnez TMP2, >2 -+ | lw CARG1, STR:CARG1->len ++ | lwu CARG1, STR:CARG1->len + |1: -+ | settp CARG1, TISNUM ++ | settp_b CARG1, TISNUM + | sd CARG1, 0(RA) + | ins_next + |2: @@ -9240,7 +9377,7 @@ index 0000000..c329123 + |3: +#endif + |->BC_LEN_Z: -+ | jal extern lj_tab_len // (GCtab *t) ++ | call_intern BC_LEN, lj_tab_len // (GCtab *t) + | // Returns uint32_t (but less than 2^31). + | j <1 +#if LJ_52 @@ -9339,7 +9476,7 @@ index 0000000..c329123 + | bxne CRET1, TMP2, ->vmeta_arith // 63-32bit not all 0 or 1: overflow. + |.endif + | zext.w CRET1, CRET1 -+ | settp CRET1, TISNUM ++ | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. @@ -9352,7 +9489,7 @@ index 0000000..c329123 + | ins_arithfp, fpins, TMP0, TMP1 + |.endmacro + | -+ |.macro ins_arithmod, fpins ++ |.macro ins_arithmod, fpins, BC + | ins_arithpre + | ins_arithead TMP0, TMP1, CARG1, CARG2 + | bne TMP0, TISNUM, >1 @@ -9361,9 +9498,9 @@ index 0000000..c329123 + | sext.w CARG2, CARG2 + | add RA, BASE, RA + | bxeqz CARG2, ->vmeta_arith -+ | jal extern lj_vm_modi ++ | call_intern BC, lj_vm_modi + | zext.w CRET1, CRET1 -+ | settp CRET1, TISNUM ++ | settp_b CRET1, TISNUM + | sd CRET1, 0(RA) + | ins_next + |1: // Check for two numbers. @@ -9382,8 +9519,14 @@ index 0000000..c329123 + case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: + | ins_arithdiv fdiv.d + break; -+ case BC_MODVN: case BC_MODNV: case BC_MODVV: -+ | ins_arithmod fpmod ++ case BC_MODVN: ++ | ins_arithmod fpmod, BC_MODVN ++ break; ++ case BC_MODNV: ++ | ins_arithmod fpmod, BC_MODNV ++ break; ++ case BC_MODVV: ++ | ins_arithmod fpmod, BC_MODVV + break; + case BC_POW: + | ins_arithpre @@ -9395,11 +9538,10 @@ index 0000000..c329123 + | sltiu TMP1, TMP1, LJ_TISNUM + | and TMP0, TMP0, TMP1 + | add RA, BASE, RA -+ | load_got pow + | bxeqz TMP0, ->vmeta_arith + | fld FARG1, 0(RB) + | fld FARG2, 0(RC) -+ | call_extern ++ | call_extern pow + | ins_next1 + | fsd FRET1, 0(RA) + | ins_next2 @@ -9417,7 +9559,7 @@ index 0000000..c329123 + | srliw CARG3, CARG3, 3 + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | call_intern BC_CAT, lj_meta_cat // (lua_State *L, TValue *top, int left) + | // Returns NULL (finished) or TValue * (metamethod). + | ld BASE, L->base + | bxnez CRET1, ->vmeta_binop @@ -9458,7 +9600,7 @@ index 0000000..c329123 + | add RA, BASE, RA + | zext.w RD, RD + | ins_next1 -+ | settp RD, TISNUM ++ | settp_b RD, TISNUM + | sd RD, 0(RA) + | ins_next2 + break; @@ -9539,7 +9681,7 @@ index 0000000..c329123 + | beqz TMP3, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL -+ | jal extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | call_intern BC_USETV, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETS: @@ -9568,7 +9710,7 @@ index 0000000..c329123 + | beqz TMP0, <1 + | // Crossed a write barrier. Move the barrier forward. + | mv CARG1, GL -+ | jal extern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | call_intern BC_USETS, lj_gc_barrieruv // (global_State *g, TValue *tv) + | j <1 + break; + case BC_USETN: @@ -9604,7 +9746,7 @@ index 0000000..c329123 + | mv CARG1, L + | beqz TMP2, >1 + | add CARG2, BASE, RA -+ | jal extern lj_func_closeuv // (lua_State *L, TValue *level) ++ | call_intern BC_UCLO, lj_func_closeuv // (lua_State *L, TValue *level) + | ld BASE, L->base + |1: + | ins_next @@ -9620,7 +9762,7 @@ index 0000000..c329123 + | cleartp CARG3 + | mv CARG1, L + | // (lua_State *L, GCproto *pt, GCfuncL *parent) -+ | jal extern lj_func_newL_gc ++ | call_intern BC_FNEW, lj_func_newL_gc + | // Returns GCfuncL *. + | li TMP0, LJ_TFUNC + | ld BASE, L->base @@ -9653,15 +9795,15 @@ index 0000000..c329123 + | not TMP4, TMP4 + | and CARG2, CARG2, TMP4 + | or CARG2, CARG2, TMP0 -+ | // (lua_State *L, int32_t asize, uint32_t hbits) + | mv CARG1, L -+ | jal extern lj_tab_new ++ | // (lua_State *L, int32_t asize, uint32_t hbits) ++ | call_intern BC_TNEW, lj_tab_new + | // Returns Table *. + } else { + | sub TMP1, KBASE, RD + | mv CARG1, L -+ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 -+ | jal extern lj_tab_dup // (lua_State *L, Table *kt) ++ | ld CARG2, -8(TMP1) // KBASE-8-str_const*8 ++ | call_intern BC_TDUP, lj_tab_dup // (lua_State *L, Table *kt) + | // Returns Table *. + } + | li TMP0, LJ_TTAB @@ -9674,7 +9816,11 @@ index 0000000..c329123 + |5: + | mv MULTRES, RD + | mv CARG1, L -+ | jal extern lj_gc_step_fixtop // (lua_State *L) ++ if (op == BC_TNEW) { ++ | call_intern BC_TNEW, lj_gc_step_fixtop // (lua_State *L) ++ } else { ++ | call_intern BC_TDUP, lj_gc_step_fixtop // (lua_State *L) ++ } + | mv RD, MULTRES + | j <1 + break; @@ -9711,7 +9857,7 @@ index 0000000..c329123 + | bne TMP3, TISNUM, >5 // Integer key? + | sext.w TMP2, TMP2 + | ld TMP1, TAB:RB->array -+ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? (keys = [0, asize-1]) ++ | bxgeu TMP2, TMP0, ->vmeta_tgetv // Integer key and in array part? + | slliw TMP2, TMP2, 3 + | add TMP2, TMP1, TMP2 + | ld CRET1, 0(TMP2) @@ -9939,7 +10085,8 @@ index 0000000..c329123 + | mv CARG2, TAB:RB + | sd PC, SAVE_PC(sp) + | mv CARG1, L -+ | jal extern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k ++ | // (lua_State *L, GCtab *t, TValue *k) ++ | call_intern BC_TSETS, lj_tab_newkey + | // Returns TValue *. + | ld BASE, L->base + | fsd FTMP0, 0(CRET1) @@ -10047,7 +10194,8 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | mv BASE, RD + | mv CARG1, L -+ | jal extern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) ++ | // (lua_State *L, GCtab *t, int nasize) ++ | call_intern BC_TSETM, lj_tab_reasize + | // Must not reallocate the stack. + | mv RD, BASE + | ld BASE, L->base // Reload BASE for lack of a saved register. @@ -10317,7 +10465,7 @@ index 0000000..c329123 + | sd PC, SAVE_PC(sp) + | srliw CARG2, TMP1, 3 + | mv CARG1, L -+ | jal extern lj_state_growstack // (lua_State *L, int n) ++ | call_intern BC_VARG, lj_state_growstack // (lua_State *L, int n) + | mv RC, BASE + | ld BASE, L->base + | add RA, BASE, RA @@ -10486,7 +10634,7 @@ index 0000000..c329123 + | or CARG3, CARG3, TMP1 + | or CARG2, CARG3, TMP3 // CARG2=1: overflow; CARG2=0: continue + | zext.w CARG1, CARG1 -+ | settp CARG1, TISNUM ++ | settp_b CARG1, TISNUM + | sd CARG1, FORL_IDX*8(RA) + } + |1: @@ -10669,6 +10817,7 @@ index 0000000..c329123 + | settp LFUNC:RB, TMP0 + | add TMP0, RA, RC + | sd LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. ++ | addi TMP2, TMP2, -8 + | addi TMP3, RC, 16+FRAME_VARG + | ld KBASE, -4+PC2PROTO(k)(PC) + | sd TMP3, 8(TMP1) // Store delta + FRAME_VARG. @@ -10768,5 +10917,135 @@ index 0000000..c329123 +/* Emit pseudo frame-info for all assembler functions. */ +static void emit_asm_debug(BuildCtx *ctx) +{ -+ ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.4byte .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.4byte 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 3\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.4byte .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte .Lbegin\n" ++ "\t.8byte %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n" /* offset ra */, ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 3\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.4byte .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.4byte lj_vm_ffi_call\n" ++ "\t.4byte %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 3\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#if !LJ_NO_UNWIND ++ fprintf(ctx->fp, "\t.section .eh_frame,\"a\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe1:\n" ++ "\t.4byte .LECIE1-.LSCIE1\n" ++ ".LSCIE1:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zPR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 6\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.4byte lj_err_unwind_dwarf-.\n" ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE1:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE2:\n" ++ "\t.4byte .LEFDE2-.LASFDE2\n" ++ ".LASFDE2:\n" ++ "\t.4byte .LASFDE2-.Lframe1\n" ++ "\t.4byte .Lbegin-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x81\n\t.uleb128 2*6\n", /* offset ra */ ++ fcofs, CFRAME_SIZE); ++ for (i = 27; i >= 18; i--) /* offset x27-x18 (s11-s2) */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(27-i+7)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89\n\t.uleb128 2*17\n" /* offset x9 (s1) */ ++ "\t.byte 0x88\n\t.uleb128 2*18\n" /* offset x8 (s0/fp) */); ++ for (i = 27; i >= 18; i--) /* offset f31-f18 */ ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(27-i+19)); ++ fprintf(ctx->fp, ++ "\t.byte 0x89+32\n\t.uleb128 2*29\n" /* offset f9 (fs1) */ ++ "\t.byte 0x88+32\n\t.uleb128 2*30\n" /* offset f8 (fs0) */ ++ "\t.align 2\n" ++ ".LEFDE2:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".Lframe2:\n" ++ "\t.4byte .LECIE2-.LSCIE2\n" ++ ".LSCIE2:\n" ++ "\t.4byte 0\n" ++ "\t.byte 0x1\n" ++ "\t.string \"zR\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 1\n" /* Return address is in ra. */ ++ "\t.uleb128 1\n" /* augmentation length */ ++ "\t.byte 0x1b\n" ++ "\t.byte 0xc\n\t.uleb128 2\n\t.uleb128 0\n" /* def_cfa sp 0 */ ++ "\t.align 2\n" ++ ".LECIE2:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE3:\n" ++ "\t.4byte .LEFDE3-.LASFDE3\n" ++ ".LASFDE3:\n" ++ "\t.4byte .LASFDE3- .Lframe2\n" ++ "\t.4byte lj_vm_ffi_call-.\n" ++ "\t.4byte %d\n" ++ "\t.uleb128 0\n" /* augmentation length */ ++ "\t.byte 0x81\n\t.uleb128 2*1\n" /* offset ra */ ++ "\t.byte 0x92\n\t.uleb128 2*2\n" /* offset x18 */ ++ "\t.byte 0xd\n\t.uleb128 0x12\n" ++ "\t.align 2\n" ++ ".LEFDE3:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#endif ++ break; ++ default: ++ break; ++ } +} diff --git a/luajit-0d313b24.patch b/luajit-0d313b24.patch new file mode 100644 index 0000000000000000000000000000000000000000..7863a08b238f90a9d568bf494bb690447b14e9b0 --- /dev/null +++ b/luajit-0d313b24.patch @@ -0,0 +1,143 @@ +diff --git a/src/lj_api.c b/src/lj_api.c +index d4048d79..1ad71678 100644 +--- a/src/lj_api.c ++++ b/src/lj_api.c +@@ -1052,6 +1052,7 @@ LUA_API int lua_setmetatable(lua_State *L, int idx) + /* Flush cache, since traces specialize to basemt. But not during __gc. */ + if (lj_trace_flushall(L)) + lj_err_caller(L, LJ_ERR_NOGCMM); ++ o = index2adr(L, idx); /* Stack may have been reallocated. */ + if (tvisbool(o)) { + /* NOBARRIER: basemt is a GC root. */ + setgcref(basemt_it(g, LJ_TTRUE), obj2gco(mt)); +diff --git a/src/lj_ffrecord.c b/src/lj_ffrecord.c +index 30dc6bfc..03d0e6ec 100644 +--- a/src/lj_ffrecord.c ++++ b/src/lj_ffrecord.c +@@ -98,6 +98,14 @@ static ptrdiff_t results_wanted(jit_State *J) + return -1; + } + ++static TValue *rec_stop_stitch_cp(lua_State *L, lua_CFunction dummy, void *ud) ++{ ++ jit_State *J = (jit_State *)ud; ++ lj_record_stop(J, LJ_TRLINK_STITCH, 0); ++ UNUSED(L); UNUSED(dummy); ++ return NULL; ++} ++ + /* Trace stitching: add continuation below frame to start a new trace. */ + static void recff_stitch(jit_State *J) + { +@@ -108,10 +116,7 @@ static void recff_stitch(jit_State *J) + TValue *nframe = base + 1 + LJ_FR2; + const BCIns *pc = frame_pc(base-1); + TValue *pframe = frame_prevl(base-1); +- +- /* Check for this now. Throwing in lj_record_stop messes up the stack. */ +- if (J->cur.nsnap >= (MSize)J->param[JIT_P_maxsnap]) +- lj_trace_err(J, LJ_TRERR_SNAPOV); ++ int errcode; + + /* Move func + args up in Lua stack and insert continuation. */ + memmove(&base[1], &base[-1-LJ_FR2], sizeof(TValue)*nslot); +@@ -136,13 +141,19 @@ static void recff_stitch(jit_State *J) + J->baseslot += 2 + LJ_FR2; + J->framedepth++; + +- lj_record_stop(J, LJ_TRLINK_STITCH, 0); ++ errcode = lj_vm_cpcall(L, NULL, J, rec_stop_stitch_cp); + + /* Undo Lua stack changes. */ + memmove(&base[-1-LJ_FR2], &base[1], sizeof(TValue)*nslot); + setframe_pc(base-1, pc); + L->base -= 2 + LJ_FR2; + L->top -= 2 + LJ_FR2; ++ ++ if (errcode) { ++ if (errcode == LUA_ERRRUN) ++ copyTV(L, L->top-1, L->top + (1 + LJ_FR2)); ++ lj_err_throw(L, errcode); /* Propagate errors. */ ++ } + } + + /* Fallback handler for fast functions that are not recorded (yet). */ +diff --git a/src/lj_gc.c b/src/lj_gc.c +index c3a0c258..eebc751b 100644 +--- a/src/lj_gc.c ++++ b/src/lj_gc.c +@@ -108,6 +108,9 @@ static void gc_mark_start(global_State *g) + gc_markobj(g, tabref(mainthread(g)->env)); + gc_marktv(g, &g->registrytv); + gc_mark_gcroot(g); ++#if LJ_HASFFI ++ if (ctype_ctsG(g)) gc_markobj(g, ctype_ctsG(g)->finalizer); ++#endif + g->gc.state = GCSpropagate; + } + +diff --git a/src/lj_record.c b/src/lj_record.c +index b7af5896..48bbbb20 100644 +--- a/src/lj_record.c ++++ b/src/lj_record.c +@@ -903,6 +903,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + { + TValue *frame = J->L->base - 1; + ptrdiff_t i; ++ BCReg baseadj = 0; + for (i = 0; i < gotresults; i++) + (void)getslot(J, rbase+i); /* Ensure all results have a reference. */ + while (frame_ispcall(frame)) { /* Immediately resolve pcall() returns. */ +@@ -911,6 +912,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + lj_trace_err(J, LJ_TRERR_NYIRETL); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); + gotresults++; ++ baseadj += cbase; + rbase += cbase; + J->baseslot -= (BCReg)cbase; + J->base -= cbase; +@@ -935,6 +937,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + if (--J->framedepth < 0) /* NYI: return of vararg func to lower frame. */ + lj_trace_err(J, LJ_TRERR_NYIRETL); + lj_assertJ(J->baseslot > 1+LJ_FR2, "bad baseslot for return"); ++ baseadj += cbase; + rbase += cbase; + J->baseslot -= (BCReg)cbase; + J->base -= cbase; +@@ -948,7 +951,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + if ((pt->flags & PROTO_NOJIT)) + lj_trace_err(J, LJ_TRERR_CJITOFF); + if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) { +- if (check_downrec_unroll(J, pt)) { ++ if (!J->cur.root && check_downrec_unroll(J, pt)) { + J->maxslot = (BCReg)(rbase + gotresults); + lj_snap_purge(J); + lj_record_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno); /* Down-rec. */ +@@ -970,6 +973,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + lj_trace_err(J, LJ_TRERR_LLEAVE); + } else if (J->needsnap) { /* Tailcalled to ff with side-effects. */ + lj_trace_err(J, LJ_TRERR_NYIRETL); /* No way to insert snapshot here. */ ++ } else if (1 + pt->framesize >= LJ_MAX_JSLOTS) { ++ lj_trace_err(J, LJ_TRERR_STACKOV); + } else { /* Return to lower frame. Guard for the target we return to. */ + TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO); + TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame)); +@@ -1003,7 +1008,8 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + BCReg bslot = bc_b(*(frame_contpc(frame)-1)); + TRef tr = gotresults ? J->base[cbase+rbase] : TREF_NIL; + if (bslot != J->maxslot) { /* Concatenate the remainder. */ +- TValue *b = J->L->base, save; /* Simulate lower frame and result. */ ++ /* Simulate lower frame and result. */ ++ TValue *b = J->L->base - baseadj, save; + /* Can't handle MM_concat + CALLT + fast func side-effects. */ + if (J->postproc != LJ_POST_NONE) + lj_trace_err(J, LJ_TRERR_NYIRETL); +@@ -1016,7 +1022,7 @@ void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults) + J->L->base = b - cbase; + tr = rec_cat(J, bslot, cbase-(2<L->base + cbase; /* Undo. */ +- J->L->base = b; ++ J->L->base = b + baseadj; + copyTV(J->L, b-(2</dev/null - SED_PC= sed -e "s|^prefix=.*|prefix=$(PREFIX)|" \ - -e "s|^multilib=.*|multilib=$(MULTILIB)|" -+ifneq ($(INSTALL_DEFINC),$(INSTALL_INC)) -+ SED_PC+= -e "s|^includedir=.*|includedir=$(INSTALL_INC)|" -+endif - - FILE_T= luajit - FILE_A= libluajit.a -@@ -121,7 +125,7 @@ install: $(INSTALL_DEP) - $(RM) $(INSTALL_DYN) $(INSTALL_SHORT1) $(INSTALL_SHORT2) - cd src && test -f $(FILE_SO) && \ - $(INSTALL_X) $(FILE_SO) $(INSTALL_DYN) && \ -- $(LDCONFIG) $(INSTALL_LIB) && \ -+ ( $(LDCONFIG) $(INSTALL_LIB) || : ) && \ - $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT1) && \ - $(SYMLINK) $(INSTALL_SONAME) $(INSTALL_SHORT2) || : - cd etc && $(INSTALL_F) $(FILE_MAN) $(INSTALL_MAN) -diff --git a/README b/README -index 2b9ae9d2..1faef255 100644 ---- a/README -+++ b/README -@@ -3,9 +3,9 @@ README for LuaJIT 2.1.0-beta3 - - LuaJIT is a Just-In-Time (JIT) compiler for the Lua programming language. - --Project Homepage: http://luajit.org/ -+Project Homepage: https://luajit.org/ - --LuaJIT is Copyright (C) 2005-2017 Mike Pall. -+LuaJIT is Copyright (C) 2005-2022 Mike Pall. - LuaJIT is free software, released under the MIT license. - See full Copyright Notice in the COPYRIGHT file or in luajit.h. - -diff --git a/doc/bluequad-print.css b/doc/bluequad-print.css -index 62e1c165..a49d309f 100644 ---- a/doc/bluequad-print.css -+++ b/doc/bluequad-print.css -@@ -1,4 +1,4 @@ --/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2022 Mike Pall. - * - * You are welcome to use the general ideas of this design for your own sites. - * But please do not steal the stylesheet, the layout or the color scheme. -diff --git a/doc/bluequad.css b/doc/bluequad.css -index be2c4bf2..4c1a9082 100644 ---- a/doc/bluequad.css -+++ b/doc/bluequad.css -@@ -1,4 +1,4 @@ --/* Copyright (C) 2004-2017 Mike Pall. -+/* Copyright (C) 2004-2022 Mike Pall. - * - * You are welcome to use the general ideas of this design for your own sites. - * But please do not steal the stylesheet, the layout or the color scheme. -diff --git a/doc/changes.html b/doc/changes.html -deleted file mode 100644 -index a66a8d95..00000000 ---- a/doc/changes.html -+++ /dev/null -@@ -1,883 +0,0 @@ -- -- -- --LuaJIT Change History -- -- -- -- -- -- -- -- -- --
--Lua --
-- -- --
--

--This is a list of changes between the released versions of LuaJIT.
--The current stable version is LuaJIT 2.0.5.
--

--

--Please check the --» Online Change History --to see whether newer versions are available. --

-- --
--

LuaJIT 2.1.0-beta3 — 2017-05-01

--
    --
  • Rewrite memory block allocator.
  • --
  • Add various extension from Lua 5.2/5.3.
  • --
  • Remove old Lua 5.0 compatibility defines.
  • --
  • Set arg table before evaluating LUA_INIT and -e chunks.
  • --
  • Fix FOLD rules for math.abs() and FP negation.
  • --
  • Fix soft-float math.abs() and negation.
  • --
  • Fix formatting of some small denormals at low precision.
  • --
  • LJ_GC64: Add JIT compiler support.
  • --
  • x64/LJ_GC64: Add JIT compiler backend.
  • --
  • x86/x64: Generate BMI2 shifts and rotates, if available.
  • --
  • Windows/x86: Add full exception interoperability.
  • --
  • ARM64: Add big-endian support.
  • --
  • ARM64: Add JIT compiler backend.
  • --
  • MIPS: Fix TSETR barrier.
  • --
  • MIPS: Support MIPS16 interlinking.
  • --
  • MIPS soft-float: Fix code generation for HREF.
  • --
  • MIPS64: Add MIPS64 hard-float JIT compiler backend.
  • --
  • MIPS64: Add MIPS64 hard-float/soft-float support to interpreter.
  • --
  • FFI: Compile bitfield loads/stores.
  • --
  • Various fixes common with the 2.0 branch.
  • --
-- --

LuaJIT 2.1.0-beta2 — 2016-03-03

--
    --
  • Enable trace stitching.
  • --
  • Use internal implementation for converting FP numbers to strings.
  • --
  • Parse Unicode escape '\u{XX...}' in string literals.
  • --
  • Add MIPS soft-float support.
  • --
  • Switch MIPS port to dual-number mode.
  • --
  • x86/x64: Add support for AES-NI, AVX and AVX2 to DynASM.
  • --
  • FFI: Add ssize_t declaration.
  • --
  • FFI: Parse #line NN and #NN.
  • --
  • Various minor fixes.
  • --
-- --

LuaJIT 2.1.0-beta1 — 2015-08-25

--

--This is a brief summary of the major changes in LuaJIT 2.1 compared to 2.0. --Please take a look at the commit history for more details. --

--
    --
  • Changes to the VM core: --
      --
    • Add low-overhead profiler (-jp).
    • --
    • Add LJ_GC64 mode: 64 bit GC object references (really: 47 bit). Interpreter-only for now.
    • --
    • Add LJ_FR2 mode: Two-slot frame info. Required by LJ_GC64 mode.
    • --
    • Add table.new() and table.clear().
    • --
    • Parse binary number literals (0bxxx).
    • --
  • --
  • Improvements to the JIT compiler: --
      --
    • Add trace stitching (disabled for now).
    • --
    • Compile various builtins: string.char(), string.reverse(), string.lower(), string.upper(), string.rep(), string.format(), table.concat(), bit.tohex(), getfenv(0), debug.getmetatable().
    • --
    • Compile string.find() for fixed string searches (no patterns).
    • --
    • Compile BC_TSETM, e.g. {1,2,3,f()}.
    • --
    • Compile string concatenations (BC_CAT).
    • --
    • Compile __concat metamethod.
    • --
    • Various minor optimizations.
    • --
  • --
  • Internal Changes: --
      --
    • Add support for embedding LuaJIT bytecode for builtins.
    • --
    • Replace various builtins with embedded bytecode.
    • --
    • Refactor string buffers and string formatting.
    • --
    • Remove obsolete non-truncating number to integer conversions.
    • --
  • --
  • Ports: --
      --
    • Add Xbox One port (LJ_GC64 mode).
    • --
    • ARM64: Add port of the interpreter (LJ_GC64 mode).
    • --
    • x64: Add separate port of the interpreter to LJ_GC64 mode.
    • --
    • x86/x64: Drop internal x87 math functions. Use libm functions.
    • --
    • x86: Remove x87 support from interpreter. SSE2 is mandatory now.
    • --
    • PPC/e500: Drop support for this architecture.
    • --
  • --
  • FFI library: --
      --
    • FFI: Add 64 bit bitwise operations.
    • --
    • FFI: Compile VLA/VLS and large cdata allocations with default initialization.
    • --
    • FFI: Compile conversions from functions to function pointers.
    • --
    • FFI: Compile lightuserdata to void * conversion.
    • --
    • FFI: Compile ffi.gc(cdata, nil), too.
    • --
    • FFI: Add ffi.typeinfo().
    • --
  • --
--
-- --
--

LuaJIT 2.0.5 — 2017-05-01

--
    --
  • Add workaround for MSVC 2015 stdio changes.
  • --
  • Limit mcode alloc probing, depending on the available pool size.
  • --
  • Fix overly restrictive range calculation in mcode allocation.
  • --
  • Fix out-of-scope goto handling in parser.
  • --
  • Remove internal __mode = "K" and replace with safe check.
  • --
  • Add "proto" field to jit.util.funcinfo().
  • --
  • Fix GC step size calculation.
  • --
  • Initialize uv->immutable for upvalues of loaded chunks.
  • --
  • Fix for cdata vs. non-cdata arithmetics/comparisons.
  • --
  • Drop leftover regs in 'for' iterator assignment, too.
  • --
  • Fix PHI remarking in SINK pass.
  • --
  • Don't try to record outermost pcall() return to lower frame.
  • --
  • Add guard for obscure aliasing between open upvalues and SSA slots.
  • --
  • Remove assumption that lj_math_random_step() doesn't clobber FPRs.
  • --
  • Fix handling of non-numeric strings in arithmetic coercions.
  • --
  • Fix recording of select(n, ...) with off-trace varargs
  • --
  • Fix install for cross-builds.
  • --
  • Don't allocate unused 2nd result register in JIT compiler backend.
  • --
  • Drop marks from replayed instructions when sinking.
  • --
  • Fix unsinking check.
  • --
  • Properly handle OOM in trace_save().
  • --
  • Limit number of arguments given to io.lines() and fp:lines().
  • --
  • Fix narrowing of TOBIT.
  • --
  • OSX: Fix build with recent XCode.
  • --
  • x86/x64: Don't spill an explicit REF_BASE in the IR.
  • --
  • x86/x64: Fix instruction length decoder.
  • --
  • x86/x64: Search for exit jumps with instruction length decoder.
  • --
  • ARM: Fix BLX encoding for Thumb interworking calls.
  • --
  • MIPS: Don't use RID_GP as a scratch register.
  • --
  • MIPS: Fix emitted code for U32 to float conversion.
  • --
  • MIPS: Backport workaround for compact unwind tables.
  • --
  • MIPS: Fix cross-endian jit.bcsave.
  • --
  • MIPS: Fix BC_ISNEXT fallback path.
  • --
  • MIPS: Fix use of ffgccheck delay slots in interpreter.
  • --
  • FFI: Fix FOLD rules for int64_t comparisons.
  • --
  • FFI: Fix SPLIT pass for CONV i64.u64.
  • --
  • FFI: Fix ipairs() recording.
  • --
  • FFI: Don't propagate qualifiers into subtypes of complex.
  • --
-- --

LuaJIT 2.0.4 — 2015-05-14

--
    --
  • Fix stack check in narrowing optimization.
  • --
  • Fix Lua/C API typecheck error for special indexes.
  • --
  • Fix string to number conversion.
  • --
  • Fix lexer error for chunks without tokens.
  • --
  • Don't compile IR_RETF after CALLT to ff with-side effects.
  • --
  • Fix BC_UCLO/BC_JMP join optimization in Lua parser.
  • --
  • Fix corner case in string to number conversion.
  • --
  • Gracefully handle lua_error() for a suspended coroutine.
  • --
  • Avoid error messages when building with Clang.
  • --
  • Fix snapshot #0 handling for traces with a stack check on entry.
  • --
  • Fix fused constant loads under high register pressure.
  • --
  • Invalidate backpropagation cache after DCE.
  • --
  • Fix ABC elimination.
  • --
  • Fix debug info for main chunk of stripped bytecode.
  • --
  • Fix FOLD rule for string.sub(s, ...) == k.
  • --
  • Fix FOLD rule for STRREF of SNEW.
  • --
  • Fix frame traversal while searching for error function.
  • --
  • Prevent GC estimate miscalculation due to buffer growth.
  • --
  • Prevent adding side traces for stack checks.
  • --
  • Fix top slot calculation for snapshots with continuations.
  • --
  • Fix check for reuse of SCEV results in FORL.
  • --
  • Add PS Vita port.
  • --
  • Fix compatibility issues with Illumos.
  • --
  • Fix DragonFly build (unsupported).
  • --
  • OpenBSD/x86: Better executable memory allocation for W^X mode.
  • --
  • x86: Fix argument checks for ipairs() iterator.
  • --
  • x86: lj_math_random_step() clobbers XMM regs on OSX Clang.
  • --
  • x86: Fix code generation for unused result of math.random().
  • --
  • x64: Allow building with LUAJIT_USE_SYSMALLOC and LUAJIT_USE_VALGRIND.
  • --
  • x86/x64: Fix argument check for bit shifts.
  • --
  • x86/x64: Fix code generation for fused test/arith ops.
  • --
  • ARM: Fix write barrier check in BC_USETS.
  • --
  • PPC: Fix red zone overflow in machine code generation.
  • --
  • PPC: Don't use mcrxr on PPE.
  • --
  • Various archs: Fix excess stack growth in interpreter.
  • --
  • FFI: Fix FOLD rule for TOBIT + CONV num.u32.
  • --
  • FFI: Prevent DSE across ffi.string().
  • --
  • FFI: No meta fallback when indexing pointer to incomplete struct.
  • --
  • FFI: Fix initialization of unions of subtypes.
  • --
  • FFI: Fix cdata vs. non-cdata arithmetic and comparisons.
  • --
  • FFI: Fix __index/__newindex metamethod resolution for ctypes.
  • --
  • FFI: Fix compilation of reference field access.
  • --
  • FFI: Fix frame traversal for backtraces with FFI callbacks.
  • --
  • FFI: Fix recording of indexing a struct pointer ctype object itself.
  • --
  • FFI: Allow non-scalar cdata to be compared for equality by address.
  • --
  • FFI: Fix pseudo type conversions for type punning.
  • --
-- --

LuaJIT 2.0.3 — 2014-03-12

--
    --
  • Add PS4 port.
  • --
  • Add support for multilib distro builds.
  • --
  • Fix OSX build.
  • --
  • Fix MinGW build.
  • --
  • Fix Xbox 360 build.
  • --
  • Improve ULOAD forwarding for open upvalues.
  • --
  • Fix GC steps threshold handling when called by JIT-compiled code.
  • --
  • Fix argument checks for math.deg() and math.rad().
  • --
  • Fix jit.flush(func|true).
  • --
  • Respect jit.off(func) when returning to a function, too.
  • --
  • Fix compilation of string.byte(s, nil, n).
  • --
  • Fix line number for relocated bytecode after closure fixup
  • --
  • Fix frame traversal for backtraces.
  • --
  • Fix ABC elimination.
  • --
  • Fix handling of redundant PHIs.
  • --
  • Fix snapshot restore for exit to function header.
  • --
  • Fix type punning alias analysis for constified pointers
  • --
  • Fix call unroll checks in the presence of metamethod frames.
  • --
  • Fix initial maxslot for down-recursive traces.
  • --
  • Prevent BASE register coalescing if parent uses IR_RETF.
  • --
  • Don't purge modified function from stack slots in BC_RET.
  • --
  • Fix recording of BC_VARG.
  • --
  • Don't access dangling reference to reallocated IR.
  • --
  • Fix frame depth display for bytecode dump in -jdump.
  • --
  • ARM: Fix register allocation when rematerializing FPRs.
  • --
  • x64: Fix store to upvalue for lightuserdata values.
  • --
  • FFI: Add missing GC steps for callback argument conversions.
  • --
  • FFI: Properly unload loaded DLLs.
  • --
  • FFI: Fix argument checks for ffi.string().
  • --
  • FFI/x64: Fix passing of vector arguments to calls.
  • --
  • FFI: Rehash finalizer table after GC cycle, if needed.
  • --
  • FFI: Fix cts->L for cdata unsinking in snapshot restore.
  • --
-- --

LuaJIT 2.0.2 — 2013-06-03

--
    --
  • Fix memory access check for fast string interning.
  • --
  • Fix MSVC intrinsics for older versions.
  • --
  • Add missing GC steps for io.* functions.
  • --
  • Fix spurious red zone overflows in machine code generation.
  • --
  • Fix jump-range constrained mcode allocation.
  • --
  • Inhibit DSE for implicit loads via calls.
  • --
  • Fix builtin string to number conversion for overflow digits.
  • --
  • Fix optional argument handling while recording builtins.
  • --
  • Fix optional argument handling in table.concat().
  • --
  • Add partial support for building with MingW64 GCC 4.8-SEH.
  • --
  • Add missing PHI barrier to string.sub(str, a, b) == kstr FOLD rule.
  • --
  • Fix compatibility issues with Illumos.
  • --
  • ARM: Fix cache flush/sync for exit stubs of JIT-compiled code.
  • --
  • MIPS: Fix cache flush/sync for JIT-compiled code jump area.
  • --
  • PPC: Add plt suffix for external calls from assembler code.
  • --
  • FFI: Fix snapshot substitution in SPLIT pass.
  • --
  • FFI/x86: Fix register allocation for 64 bit comparisons.
  • --
  • FFI: Fix tailcall in lowest frame to C function with bool result.
  • --
  • FFI: Ignore long type specifier in ffi.istype().
  • --
  • FFI: Fix calling conventions for 32 bit OSX and iOS simulator (struct returns).
  • --
  • FFI: Fix calling conventions for ARM hard-float EABI (nested structs).
  • --
  • FFI: Improve error messages for arithmetic and comparison operators.
  • --
  • FFI: Insert no-op type conversion for pointer to integer cast.
  • --
  • FFI: Fix unroll limit for ffi.fill().
  • --
  • FFI: Must sink XBAR together with XSTOREs.
  • --
  • FFI: Preserve intermediate string for const char * conversion.
  • --
-- --

LuaJIT 2.0.1 — 2013-02-19

--
    --
  • Don't clear frame for out-of-memory error.
  • --
  • Leave hook when resume catches error thrown from hook.
  • --
  • Add missing GC steps for template table creation.
  • --
  • Fix discharge order of comparisons in Lua parser.
  • --
  • Improve buffer handling for io.read().
  • --
  • OSX: Add support for Mach-O object files to -b option.
  • --
  • Fix PS3 port.
  • --
  • Fix/enable Xbox 360 port.
  • --
  • x86/x64: Always mark ref for shift count as non-weak.
  • --
  • x64: Don't fuse implicitly 32-to-64 extended operands.
  • --
  • ARM: Fix armhf call argument handling.
  • --
  • ARM: Fix code generation for integer math.min/math.max.
  • --
  • PPC/e500: Fix lj_vm_floor() for Inf/NaN.
  • --
  • FFI: Change priority of table initializer variants for structs.
  • --
  • FFI: Fix code generation for bool call result check on x86/x64.
  • --
  • FFI: Load FFI library on-demand for bytecode with cdata literals.
  • --
  • FFI: Fix handling of qualified transparent structs/unions.
  • --
-- --

LuaJIT 2.0.0 — 2012-11-08

--
    --
  • Correctness and completeness: --
      --
    • Fix Android/x86 build.
    • --
    • Fix recording of equality comparisons with __eq metamethods.
    • --
    • Fix detection of immutable upvalues.
    • --
    • Replace error with PANIC for callbacks from JIT-compiled code.
    • --
    • Fix builtin string to number conversion for INT_MIN.
    • --
    • Don't create unneeded array part for template tables.
    • --
    • Fix CONV.num.int sinking.
    • --
    • Don't propagate implicitly widened number to index metamethods.
    • --
    • ARM: Fix ordered comparisons of number vs. non-number.
    • --
    • FFI: Fix code generation for replay of sunk float fields.
    • --
    • FFI: Fix signedness of bool.
    • --
    • FFI: Fix recording of bool call result check on x86/x64.
    • --
    • FFI: Fix stack-adjustment for __thiscall callbacks.
    • --
  • --
-- --

LuaJIT 2.0.0-beta11 — 2012-10-16

--
    --
  • New features: --
      --
    • Use ARM VFP instructions, if available (build-time detection).
    • --
    • Add support for ARM hard-float EABI (armhf).
    • --
    • Add PS3 port.
    • --
    • Add many features from Lua 5.2, e.g. goto/labels. -- Refer to this list.
    • --
    • FFI: Add parameterized C types.
    • --
    • FFI: Add support for copy constructors.
    • --
    • FFI: Equality comparisons never raise an error (treat as unequal instead).
    • --
    • FFI: Box all accessed or returned enums.
    • --
    • FFI: Check for __new metamethod when calling a constructor.
    • --
    • FFI: Handle __pairs/__ipairs metamethods for cdata objects.
    • --
    • FFI: Convert io.* file handle to FILE * pointer (but as a void *).
    • --
    • FFI: Detect and support type punning through unions.
    • --
    • FFI: Improve various error messages.
    • --
  • --
  • Build-system reorganization: --
      --
    • Reorganize directory layout:
      -- lib/*src/jit/*
      -- src/buildvm_*.dascsrc/vm_*.dasc
      -- src/buildvm_*.h → removed
      -- src/buildvm*src/host/*
    • --
    • Add minified Lua interpreter plus Lua BitOp (minilua) to run DynASM.
    • --
    • Change DynASM bit operations to use Lua BitOp
    • --
    • Translate only vm_*.dasc for detected target architecture.
    • --
    • Improve target detection for msvcbuild.bat.
    • --
    • Fix build issues on Cygwin and MinGW with optional MSys.
    • --
    • Handle cross-compiles with FPU/no-FPU or hard-fp/soft-fp ABI mismatch.
    • --
    • Remove some library functions for no-JIT/no-FFI builds.
    • --
    • Add uninstall target to top-level Makefile.
    • --
  • --
  • Correctness and completeness: --
      --
    • Preserve snapshot #0 PC for all traces.
    • --
    • Fix argument checks for coroutine.create().
    • --
    • Command line prints version and JIT status to stdout, not stderr.
    • --
    • Fix userdata __gc separations at Lua state close.
    • --
    • Fix TDUP to HLOAD forwarding for LJ_DUALNUM builds.
    • --
    • Fix buffer check in bytecode writer.
    • --
    • Make os.date() thread-safe.
    • --
    • Add missing declarations for MSVC intrinsics.
    • --
    • Fix dispatch table modifications for return hooks.
    • --
    • Workaround for MSVC conversion bug (doubleuint32_tint32_t).
    • --
    • Fix FOLD rule (i-j)-i => 0-j.
    • --
    • Never use DWARF unwinder on Windows.
    • --
    • Fix shrinking of direct mapped blocks in builtin allocator.
    • --
    • Limit recursion depth in string.match() et al.
    • --
    • Fix late despecialization of ITERN after loop has been entered.
    • --
    • Fix 'f' and 'L' options for debug.getinfo() and lua_getinfo().
    • --
    • Fix package.searchpath().
    • --
    • OSX: Change dylib names to be consistent with other platforms.
    • --
    • Android: Workaround for broken sprintf("%g", -0.0).
    • --
    • x86: Remove support for ancient CPUs without CMOV (before Pentium Pro).
    • --
    • x86: Fix register allocation for calls returning register pair.
    • --
    • x86/x64: Fix fusion of unsigned byte comparisons with swapped operands.
    • --
    • ARM: Fix tonumber() argument check.
    • --
    • ARM: Fix modulo operator and math.floor()/math.ceil() for inf/nan.
    • --
    • ARM: Invoke SPLIT pass for leftover IR_TOBIT.
    • --
    • ARM: Fix BASE register coalescing.
    • --
    • PPC: Fix interpreter state setup in callbacks.
    • --
    • PPC: Fix string.sub() range check.
    • --
    • MIPS: Support generation of MIPS/MIPSEL bytecode object files.
    • --
    • MIPS: Fix calls to floor()/ceil()/trunc().
    • --
    • ARM/PPC: Detect more target architecture variants.
    • --
    • ARM/PPC/e500/MIPS: Fix tailcalls from fast functions, esp. tostring().
    • --
    • ARM/PPC/MIPS: Fix rematerialization of FP constants.
    • --
    • FFI: Don't call FreeLibrary() on our own EXE/DLL.
    • --
    • FFI: Resolve metamethods for constructors, too.
    • --
    • FFI: Properly disable callbacks on iOS (would require executable memory).
    • --
    • FFI: Fix cdecl string parsing during recording.
    • --
    • FFI: Show address pointed to for tostring(ref), too.
    • --
    • FFI: Fix alignment of C call argument/return structure.
    • --
    • FFI: Initialize all fields of standard types.
    • --
    • FFI: Fix callback handling when new C types are declared in callback.
    • --
    • FFI: Fix recording of constructors for pointers.
    • --
    • FFI: Always resolve metamethods for pointers to structs.
    • --
    • FFI: Correctly propagate alignment when interning nested types.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • Add allocation sinking and store sinking optimization.
    • --
    • Constify immutable upvalues.
    • --
    • Add builtin string to integer or FP number conversion. Improves cross-platform consistency and correctness.
    • --
    • Create string hash slots in template tables for non-const values, too. Avoids later table resizes.
    • --
    • Eliminate HREFK guard for template table references.
    • --
    • Add various new FOLD rules.
    • --
    • Don't use stack unwinding for lua_yield() (slow on x64).
    • --
    • ARM, PPC, MIPS: Improve XLOAD operand fusion and register hinting.
    • --
    • PPC, MIPS: Compile math.sqrt() to sqrt instruction, if available.
    • --
    • FFI: Fold KPTR + constant offset in SPLIT pass.
    • --
    • FFI: Optimize/inline ffi.copy() and ffi.fill().
    • --
    • FFI: Compile and optimize array/struct copies.
    • --
    • FFI: Compile ffi.typeof(cdata|ctype), ffi.sizeof(), ffi.alignof(), ffi.offsetof() and ffi.gc().
    • --
  • --
-- --

LuaJIT 2.0.0-beta10 — 2012-05-09

--
    --
  • New features: --
      --
    • The MIPS of LuaJIT is complete. It requires a CPU conforming to the --MIPS32 R1 architecture with hardware FPU. O32 hard-fp ABI, --little-endian or big-endian.
    • --
    • Auto-detect target arch via cross-compiler. No need for --TARGET=arch anymore.
    • --
    • Make DynASM compatible with Lua 5.2.
    • --
    • From Lua 5.2: Try __tostring metamethod on non-string error --messages..
    • --
  • --
  • Correctness and completeness: --
      --
    • Fix parsing of hex literals with exponents.
    • --
    • Fix bytecode dump for certain number constants.
    • --
    • Fix argument type in error message for relative arguments.
    • --
    • Fix argument error handling on Lua stacks without a frame.
    • --
    • Add missing mcode limit check in assembler backend.
    • --
    • Fix compilation on OpenBSD.
    • --
    • Avoid recursive GC steps after GC-triggered trace exit.
    • --
    • Replace <unwind.h> definitions with our own.
    • --
    • Fix OSX build issues. Bump minimum required OSX version to 10.4.
    • --
    • Fix discharge order of comparisons in Lua parser.
    • --
    • Ensure running __gc of userdata created in __gc --at state close.
    • --
    • Limit number of userdata __gc separations at state close.
    • --
    • Fix bytecode JMP slot range when optimizing --and/or with constant LHS.
    • --
    • Fix DSE of USTORE.
    • --
    • Make lua_concat() work from C hook with partial frame.
    • --
    • Add required PHIs for implicit conversions, e.g. via XREF --forwarding.
    • --
    • Add more comparison variants to Valgrind suppressions file.
    • --
    • Disable loading bytecode with an extra header (BOM or #!).
    • --
    • Fix PHI stack slot syncing.
    • --
    • ARM: Reorder type/value tests to silence Valgrind.
    • --
    • ARM: Fix register allocation for ldrd-optimized --HREFK.
    • --
    • ARM: Fix conditional branch fixup for OBAR.
    • --
    • ARM: Invoke SPLIT pass for double args in FFI call.
    • --
    • ARM: Handle all CALL* ops with double results in --SPLIT pass.
    • --
    • ARM: Fix rejoin of POW in SPLIT pass.
    • --
    • ARM: Fix compilation of math.sinh, math.cosh, --math.tanh.
    • --
    • ARM, PPC: Avoid pointless arg clearing in BC_IFUNCF.
    • --
    • PPC: Fix resume after yield from hook.
    • --
    • PPC: Fix argument checking for rawget().
    • --
    • PPC: Fix fusion of floating-point XLOAD/XSTORE.
    • --
    • PPC: Fix HREFK code generation for huge tables.
    • --
    • PPC: Use builtin D-Cache/I-Cache sync code.
    • --
  • --
  • FFI library: --
      --
    • Ignore empty statements in ffi.cdef().
    • --
    • Ignore number parsing errors while skipping definitions.
    • --
    • Don't touch frame in callbacks with tailcalls to fast functions.
    • --
    • Fix library unloading on POSIX systems.
    • --
    • Finalize cdata before userdata when closing the state.
    • --
    • Change ffi.load() library name resolution for Cygwin.
    • --
    • Fix resolving of function name redirects on Windows/x86.
    • --
    • Fix symbol resolving error messages on Windows.
    • --
    • Fix blacklisting of C functions calling callbacks.
    • --
    • Fix result type of pointer difference.
    • --
    • Use correct PC in FFI metamethod error message.
    • --
    • Allow 'typedef _Bool int BOOL;' for the Windows API.
    • --
    • Don't record test for bool result of call, if ignored.
    • --
  • --
-- --

LuaJIT 2.0.0-beta9 — 2011-12-14

--
    --
  • New features: --
      --
    • PPC port of LuaJIT is complete. Default is the dual-number port --(usually faster). Single-number port selectable via src/Makefile --at build time.
    • --
    • Add FFI callback support.
    • --
    • Extend -b to generate .c, .h or .obj/.o --files with embedded bytecode.
    • --
    • Allow loading embedded bytecode with require().
    • --
    • From Lua 5.2: Change to '\z' escape. Reject undefined escape --sequences.
    • --
  • --
  • Correctness and completeness: --
      --
    • Fix OSX 10.7 build. Fix install_name and versioning on OSX.
    • --
    • Fix iOS build.
    • --
    • Install dis_arm.lua, too.
    • --
    • Mark installed shared library as executable.
    • --
    • Add debug option to msvcbuild.bat and improve error handling.
    • --
    • Fix data-flow analysis for iterators.
    • --
    • Fix forced unwinding triggered by external unwinder.
    • --
    • Record missing for loop slot loads (return to lower frame).
    • --
    • Always use ANSI variants of Windows system functions.
    • --
    • Fix GC barrier for multi-result table constructor (TSETM).
    • --
    • Fix/add various FOLD rules.
    • --
    • Add potential PHI for number conversions due to type instability.
    • --
    • Do not eliminate PHIs only referenced from other PHIs.
    • --
    • Correctly anchor implicit number to string conversions in Lua/C API.
    • --
    • Fix various stack limit checks.
    • --
    • x64: Use thread-safe exceptions for external unwinding (GCC platforms).
    • --
    • x64: Fix result type of cdata index conversions.
    • --
    • x64: Fix math.random() and bit.bswap() code generation.
    • --
    • x64: Fix lightuserdata comparisons.
    • --
    • x64: Always extend stack-passed arguments to pointer size.
    • --
    • ARM: Many fixes to code generation backend.
    • --
    • PPC/e500: Fix dispatch for binop metamethods.
    • --
    • PPC/e500: Save/restore condition registers when entering/leaving the VM.
    • --
    • PPC/e500: Fix write barrier in stores of strings to upvalues.
    • --
  • --
  • FFI library: --
      --
    • Fix C comment parsing.
    • --
    • Fix snapshot optimization for cdata comparisons.
    • --
    • Fix recording of const/enum lookups in namespaces.
    • --
    • Fix call argument and return handling for I8/U8/I16/U16 types.
    • --
    • Fix unfused loads of float fields.
    • --
    • Fix ffi.string() recording.
    • --
    • Save GetLastError() around ffi.load() and symbol --resolving, too.
    • --
    • Improve ld script detection in ffi.load().
    • --
    • Record loads/stores to external variables in namespaces.
    • --
    • Compile calls to stdcall, fastcall and vararg functions.
    • --
    • Treat function ctypes like pointers in comparisons.
    • --
    • Resolve __call metamethod for pointers, too.
    • --
    • Record C function calls with bool return values.
    • --
    • Record ffi.errno().
    • --
    • x86: Fix number to uint32_t conversion rounding.
    • --
    • x86: Fix 64 bit arithmetic in assembler backend.
    • --
    • x64: Fix struct-by-value calling conventions.
    • --
    • ARM: Ensure invocation of SPLIT pass for float conversions.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • Display trace types with -jv and -jdump.
    • --
    • Record isolated calls. But prefer recording loops over calls.
    • --
    • Specialize to prototype for non-monomorphic functions. Solves the --trace-explosion problem for closure-heavy programming styles.
    • --
    • Always generate a portable vmdef.lua. Easier for distros.
    • --
  • --
-- --

LuaJIT 2.0.0-beta8 — 2011-06-23

--
    --
  • New features: --
      --
    • Soft-float ARM port of LuaJIT is complete.
    • --
    • Add support for bytecode loading/saving and -b command line --option.
    • --
    • From Lua 5.2: __len metamethod for tables --(disabled by default).
    • --
  • --
  • Correctness and completeness: --
      --
    • ARM: Misc. fixes for interpreter.
    • --
    • x86/x64: Fix bit.* argument checking in interpreter.
    • --
    • Catch early out-of-memory in memory allocator initialization.
    • --
    • Fix data-flow analysis for paths leading to an upvalue close.
    • --
    • Fix check for missing arguments in string.format().
    • --
    • Fix Solaris/x86 build (note: not a supported target).
    • --
    • Fix recording of loops with instable directions in side traces.
    • --
    • x86/x64: Fix fusion of comparisons with u8/u16 --XLOAD.
    • --
    • x86/x64: Fix register allocation for variable shifts.
    • --
  • --
  • FFI library: --
      --
    • Add ffi.errno(). Save errno/GetLastError() --around allocations etc.
    • --
    • Fix __gc for VLA/VLS cdata objects.
    • --
    • Fix recording of casts from 32 bit cdata pointers to integers.
    • --
    • tonumber(cdata) returns nil for non-numbers.
    • --
    • Show address pointed to for tostring(pointer).
    • --
    • Print NULL pointers as "cdata<... *>: NULL".
    • --
    • Support __tostring metamethod for pointers to structs, too.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • More tuning for loop unrolling heuristics.
    • --
    • Flatten and compress in-memory debug info (saves ~70%).
    • --
  • --
-- --

LuaJIT 2.0.0-beta7 — 2011-05-05

--
    --
  • New features: --
      --
    • ARM port of the LuaJIT interpreter is complete.
    • --
    • FFI library: Add ffi.gc(), ffi.metatype(), --ffi.istype().
    • --
    • FFI library: Resolve ld script redirection in ffi.load().
    • --
    • From Lua 5.2: package.searchpath(), fp:read("*L"), --load(string).
    • --
    • From Lua 5.2, disabled by default: empty statement, --table.unpack(), modified coroutine.running().
    • --
  • --
  • Correctness and completeness: --
      --
    • FFI library: numerous fixes.
    • --
    • Fix type mismatches in store-to-load forwarding.
    • --
    • Fix error handling within metamethods.
    • --
    • Fix table.maxn().
    • --
    • Improve accuracy of x^-k on x64.
    • --
    • Fix code generation for Intel Atom in x64 mode.
    • --
    • Fix narrowing of POW.
    • --
    • Fix recording of retried fast functions.
    • --
    • Fix code generation for bit.bnot() and multiplies.
    • --
    • Fix error location within cpcall frames.
    • --
    • Add workaround for old libgcc unwind bug.
    • --
    • Fix lua_yield() and getmetatable(lightuserdata) on x64.
    • --
    • Misc. fixes for PPC/e500 interpreter.
    • --
    • Fix stack slot updates for down-recursion.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • Add dual-number mode (int/double) for the VM. Enabled for ARM.
    • --
    • Improve narrowing of arithmetic operators and for loops.
    • --
    • Tune loop unrolling heuristics and increase trace recorder limits.
    • --
    • Eliminate dead slots in snapshots using bytecode data-flow analysis.
    • --
    • Avoid phantom stores to proxy tables.
    • --
    • Optimize lookups in empty proxy tables.
    • --
    • Improve bytecode optimization of and/or operators.
    • --
  • --
-- --

LuaJIT 2.0.0-beta6 — 2011-02-11

--
    --
  • New features: --
      --
    • PowerPC/e500v2 port of the LuaJIT interpreter is complete.
    • --
    • Various minor features from Lua 5.2: Hex escapes in literals, --'\*' escape, reversible string.format("%q",s), --"%g" pattern, table.sort checks callbacks, --os.exit(status|true|false[,close]).
    • --
    • Lua 5.2 __pairs and __ipairs metamethods --(disabled by default).
    • --
    • Initial release of the FFI library.
    • --
  • --
  • Correctness and completeness: --
      --
    • Fix string.format() for non-finite numbers.
    • --
    • Fix memory leak when compiled to use the built-in allocator.
    • --
    • x86/x64: Fix unnecessary resize in TSETM bytecode.
    • --
    • Fix various GC issues with traces and jit.flush().
    • --
    • x64: Fix fusion of indexes for array references.
    • --
    • x86/x64: Fix stack overflow handling for coroutine results.
    • --
    • Enable low-2GB memory allocation on FreeBSD/x64.
    • --
    • Fix collectgarbage("count") result if more than 2GB is in use.
    • --
    • Fix parsing of hex floats.
    • --
    • x86/x64: Fix loop branch inversion with trailing --HREF+NE/EQ.
    • --
    • Add jit.os string.
    • --
    • coroutine.create() permits running C functions, too.
    • --
    • Fix OSX build to work with newer ld64 versions.
    • --
    • Fix bytecode optimization of and/or operators.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • Emit specialized bytecode for pairs()/next().
    • --
    • Improve bytecode coalescing of nil constants.
    • --
    • Compile calls to vararg functions.
    • --
    • Compile select().
    • --
    • Improve alias analysis, esp. for loads from allocations.
    • --
    • Tuning of various compiler heuristics.
    • --
    • Refactor and extend IR conversion instructions.
    • --
    • x86/x64: Various backend enhancements related to the FFI.
    • --
    • Add SPLIT pass to split 64 bit IR instructions for 32 bit CPUs.
    • --
  • --
-- --

LuaJIT 2.0.0-beta5 — 2010-08-24

--
    --
  • Correctness and completeness: --
      --
    • Fix trace exit dispatch to function headers.
    • --
    • Fix Windows and OSX builds with LUAJIT_DISABLE_JIT.
    • --
    • Reorganize and fix placement of generated machine code on x64.
    • --
    • Fix TNEW in x64 interpreter.
    • --
    • Do not eliminate PHIs for values only referenced from side exits.
    • --
    • OS-independent canonicalization of strings for non-finite numbers.
    • --
    • Fix string.char() range check on x64.
    • --
    • Fix tostring() resolving within print().
    • --
    • Fix error handling for next().
    • --
    • Fix passing of constant arguments to external calls on x64.
    • --
    • Fix interpreter argument check for two-argument SSE math functions.
    • --
    • Fix C frame chain corruption caused by lua_cpcall().
    • --
    • Fix return from pcall() within active hook.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • Replace on-trace GC frame syncing with interpreter exit.
    • --
    • Improve hash lookup specialization by not removing dead keys during GC.
    • --
    • Turn traces into true GC objects.
    • --
    • Avoid starting a GC cycle immediately after library init.
    • --
    • Add weak guards to improve dead-code elimination.
    • --
    • Speed up string interning.
    • --
  • --
-- --

LuaJIT 2.0.0-beta4 — 2010-03-28

--
    --
  • Correctness and completeness: --
      --
    • Fix precondition for on-trace creation of table keys.
    • --
    • Fix {f()} on x64 when table is resized.
    • --
    • Fix folding of ordered comparisons with same references.
    • --
    • Fix snapshot restores for multi-result bytecodes.
    • --
    • Fix potential hang when recording bytecode with nested closures.
    • --
    • Fix recording of getmetatable(), tonumber() and bad argument types.
    • --
    • Fix SLOAD fusion across returns to lower frames.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • Add array bounds check elimination. -Oabc is enabled by default.
    • --
    • More tuning for x64, e.g. smaller table objects.
    • --
  • --
-- --

LuaJIT 2.0.0-beta3 — 2010-03-07

--
    --
  • LuaJIT x64 port: --
      --
    • Port integrated memory allocator to Linux/x64, Windows/x64 and OSX/x64.
    • --
    • Port interpreter and JIT compiler to x64.
    • --
    • Port DynASM to x64.
    • --
    • Many 32/64 bit cleanups in the VM.
    • --
    • Allow building the interpreter with either x87 or SSE2 arithmetics.
    • --
    • Add external unwinding and C++ exception interop (default on x64).
    • --
  • --
  • Correctness and completeness: --
      --
    • Fix constructor bytecode generation for certain conditional values.
    • --
    • Fix some cases of ordered string comparisons.
    • --
    • Fix lua_tocfunction().
    • --
    • Fix cutoff register in JMP bytecode for some conditional expressions.
    • --
    • Fix PHI marking algorithm for references from variant slots.
    • --
    • Fix package.cpath for non-default PREFIX.
    • --
    • Fix DWARF2 frame unwind information for interpreter on OSX.
    • --
    • Drive the GC forward on string allocations in the parser.
    • --
    • Implement call/return hooks (zero-cost if disabled).
    • --
    • Implement yield from C hooks.
    • --
    • Disable JIT compiler on older non-SSE2 CPUs instead of aborting.
    • --
  • --
  • Structural and performance enhancements: --
      --
    • Compile recursive code (tail-, up- and down-recursion).
    • --
    • Improve heuristics for bytecode penalties and blacklisting.
    • --
    • Split CALL/FUNC recording and clean up fast function call semantics.
    • --
    • Major redesign of internal function call handling.
    • --
    • Improve FOR loop const specialization and integerness checks.
    • --
    • Switch to pre-initialized stacks. Avoid frame-clearing.
    • --
    • Colocation of prototypes and related data: bytecode, constants, debug info.
    • --
    • Cleanup parser and streamline bytecode generation.
    • --
    • Add support for weak IR references to register allocator.
    • --
    • Switch to compressed, extensible snapshots.
    • --
    • Compile returns to frames below the start frame.
    • --
    • Improve alias analysis of upvalues using a disambiguation hash value.
    • --
    • Compile floor/ceil/trunc to SSE2 helper calls or SSE4.1 instructions.
    • --
    • Add generic C call handling to IR and backend.
    • --
    • Improve KNUM fuse vs. load heuristics.
    • --
    • Compile various io.*() functions.
    • --
    • Compile math.sinh(), math.cosh(), math.tanh() --and math.random().
    • --
  • --
-- --

LuaJIT 2.0.0-beta2 — 2009-11-09

--
    --
  • Reorganize build system. Build static+shared library on POSIX.
  • --
  • Allow C++ exception conversion on all platforms --using a wrapper function.
  • --
  • Automatically catch C++ exceptions and rethrow Lua error --(DWARF2 only).
  • --
  • Check for the correct x87 FPU precision at strategic points.
  • --
  • Always use wrappers for libm functions.
  • --
  • Resurrect metamethod name strings before copying them.
  • --
  • Mark current trace, even if compiler is idle.
  • --
  • Ensure FILE metatable is created only once.
  • --
  • Fix type comparisons when different integer types are involved.
  • --
  • Fix getmetatable() recording.
  • --
  • Fix TDUP with dead keys in template table.
  • --
  • jit.flush(tr) returns status. --Prevent manual flush of a trace that's still linked.
  • --
  • Improve register allocation heuristics for invariant references.
  • --
  • Compile the push/pop variants of table.insert() and --table.remove().
  • --
  • Compatibility with MSVC link /debug.
  • --
  • Fix lua_iscfunction().
  • --
  • Fix math.random() when compiled with -fpic (OSX).
  • --
  • Fix table.maxn().
  • --
  • Bump MACOSX_DEPLOYMENT_TARGET to 10.4
  • --
  • luaL_check*() and luaL_opt*() now support --negative arguments, too.
    --This matches the behavior of Lua 5.1, but not the specification.
  • --
-- --

LuaJIT 2.0.0-beta1 — 2009-10-31

--
    --
  • This is the first public release of LuaJIT 2.0.
  • --
  • The whole VM has been rewritten from the ground up, so there's --no point in listing differences over earlier versions.
  • --
--
--
--
-- -- -- -diff --git a/doc/contact.html b/doc/contact.html -index fe4751c0..6d609286 100644 ---- a/doc/contact.html -+++ b/doc/contact.html -@@ -1,17 +1,16 @@ -- -+ - - - Contact -- -- -- -+ -+ - - - - - -
--Lua -+Lua -
- -
-

- If you want to report bugs, propose fixes or suggest enhancements, - please use the --GitHub issue tracker. -+» GitHub issue tracker. -

-

- Please send general questions to the --» LuaJIT mailing list. -+» LuaJIT mailing list. -

-

- You can also send any questions you have directly to me: -@@ -90,10 +84,17 @@ xD("fyZKB8xv\"FJytmz8.KAB0u52D") -

- - -+

-+Note: I cannot reply to GMail, Google Workplace, Outlook or Office365 -+mail addresses, since they prefer to mindlessly filter out mails sent -+from small domains using independent mail servers, such as mine. If you -+don't like that, please complain to Google or Microsoft, not me. -+

-+ -

Copyright

-

- All documentation is --Copyright © 2005-2017 Mike Pall. -+Copyright © 2005-2022 Mike Pall. -

- - -@@ -101,7 +102,7 @@ Copyright © 2005-2017 Mike Pall. -
-