From 4e1d0c47e7afb87894c8fc1674f5808fe44f9b9d Mon Sep 17 00:00:00 2001 From: swcompiler Date: Tue, 25 Mar 2025 09:44:08 +0800 Subject: [PATCH] Add sw64 support --- add-sw64-support.patch | 10241 +++++++++++++++++++++++++++++++++++++++ luajit.spec | 14 +- 2 files changed, 10253 insertions(+), 2 deletions(-) create mode 100644 add-sw64-support.patch diff --git a/add-sw64-support.patch b/add-sw64-support.patch new file mode 100644 index 0000000..98c750d --- /dev/null +++ b/add-sw64-support.patch @@ -0,0 +1,10241 @@ +diff --git a/Makefile b/Makefile +index 0f93308..86714ae 100644 +--- a/Makefile ++++ b/Makefile +@@ -88,7 +88,7 @@ FILES_INC= lua.h lualib.h lauxlib.h luaconf.h lua.hpp luajit.h + FILES_JITLIB= bc.lua bcsave.lua dump.lua p.lua v.lua zone.lua \ + dis_x86.lua dis_x64.lua dis_arm.lua dis_arm64.lua \ + dis_arm64be.lua dis_ppc.lua dis_mips.lua dis_mipsel.lua \ +- dis_mips64.lua dis_mips64el.lua vmdef.lua ++ dis_mips64.lua dis_mips64el.lua vmdef.lua dis_sw64.lua + + ifeq (,$(findstring Windows,$(OS))) + HOST_SYS:= $(shell uname -s) +diff --git a/dynasm/dasm_sw64.h b/dynasm/dasm_sw64.h +new file mode 100644 +index 0000000..319f79b +--- /dev/null ++++ b/dynasm/dasm_sw64.h +@@ -0,0 +1,419 @@ ++/* ++** DynASM SW64 encoding engine. ++** Copyright (C) 2023 Mike Pall. All rights reserved. ++** Released under the MIT license. See dynasm.lua for full copyright notice. ++*/ ++ ++#include ++#include ++#include ++#include ++ ++#define DASM_ARCH "sw64" ++ ++#ifndef DASM_EXTERN ++#define DASM_EXTERN(a, b, c, d) 0 ++#endif ++ ++/* Action definitions. */ ++enum { ++ DASM_STOP, DASM_SECTION, DASM_ESC, DASM_REL_EXT, ++ /* The following actions need a buffer position. */ ++ DASM_ALIGN, DASM_REL_LG, DASM_LABEL_LG, ++ /* The following actions also have an argument. */ ++ DASM_REL_PC, DASM_LABEL_PC, DASM_IMM, DASM_IMMS, ++ DASM__MAX ++}; ++ ++/* Maximum number of section buffer positions for a single dasm_put() call. */ ++#define DASM_MAXSECPOS 25 ++ ++/* DynASM encoder status codes. Action list offset or number are or'ed in. */ ++#define DASM_S_OK 0x00000000 ++#define DASM_S_NOMEM 0x01000000 ++#define DASM_S_PHASE 0x02000000 ++#define DASM_S_MATCH_SEC 0x03000000 ++#define DASM_S_RANGE_I 0x11000000 ++#define DASM_S_RANGE_SEC 0x12000000 ++#define DASM_S_RANGE_LG 0x13000000 ++#define DASM_S_RANGE_PC 0x14000000 ++#define DASM_S_RANGE_REL 0x15000000 ++#define DASM_S_UNDEF_LG 0x21000000 ++#define DASM_S_UNDEF_PC 0x22000000 ++ ++/* Macros to convert positions (8 bit section + 24 bit index). */ ++#define DASM_POS2IDX(pos) ((pos)&0x00ffffff) ++#define DASM_POS2BIAS(pos) ((pos)&0xff000000) ++#define DASM_SEC2POS(sec) ((sec) << 24) ++#define DASM_POS2SEC(pos) ((pos) >> 24) ++#define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) ++ ++/* Action list type. */ ++typedef const unsigned int *dasm_ActList; ++ ++/* Per-section structure. */ ++typedef struct dasm_Section { ++ int *rbuf; /* Biased buffer pointer (negative section bias). */ ++ int *buf; /* True buffer pointer. */ ++ size_t bsize; /* Buffer size in bytes. */ ++ int pos; /* Biased buffer position. */ ++ int epos; /* End of biased buffer position - max single put. */ ++ int ofs; /* Byte offset into section. */ ++} dasm_Section; ++ ++/* Core structure holding the DynASM encoding state. */ ++struct dasm_State { ++ size_t psize; /* Allocated size of this structure. */ ++ dasm_ActList actionlist; /* Current actionlist pointer. */ ++ int *lglabels; /* Local/global chain/pos ptrs. */ ++ size_t lgsize; ++ int *pclabels; /* PC label chains/pos ptrs. */ ++ size_t pcsize; ++ void **globals; /* Array of globals (bias -10). */ ++ dasm_Section *section; /* Pointer to active section. */ ++ size_t codesize; /* Total size of all code sections. */ ++ int maxsection; /* 0 <= sectionidx < maxsection. */ ++ int status; /* Status code. */ ++ dasm_Section sections[1]; /* All sections. Alloc-extended. */ ++}; ++ ++/* The size of the core structure depends on the max. number of sections. */ ++#define DASM_PSZ(ms) (sizeof(dasm_State) + (ms - 1) * sizeof(dasm_Section)) ++ ++ ++/* Initialize DynASM state. */ ++void dasm_init(Dst_DECL, int maxsection) ++{ ++ dasm_State *D; ++ size_t psz = 0; ++ int i; ++ Dst_REF = NULL; ++ DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); ++ D = Dst_REF; ++ D->psize = psz; ++ D->lglabels = NULL; ++ D->lgsize = 0; ++ D->pclabels = NULL; ++ D->pcsize = 0; ++ D->globals = NULL; ++ D->maxsection = maxsection; ++ for (i = 0; i < maxsection; i++) { ++ D->sections[i].buf = NULL; /* Need this for pass3. */ ++ D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); ++ D->sections[i].bsize = 0; ++ D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ ++ } ++} ++ ++/* Free DynASM state. */ ++void dasm_free(Dst_DECL) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ for (i = 0; i < D->maxsection; i++) ++ if (D->sections[i].buf) ++ DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); ++ if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); ++ if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); ++ DASM_M_FREE(Dst, D, D->psize); ++} ++ ++/* Setup global label array. Must be called before dasm_setup(). */ ++void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) ++{ ++ dasm_State *D = Dst_REF; ++ D->globals = gl - 10; /* Negative bias to compensate for locals. */ ++ DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10 + maxgl) * sizeof(int)); ++} ++ ++/* Grow PC label array. Can be called after dasm_setup(), too. */ ++void dasm_growpc(Dst_DECL, unsigned int maxpc) ++{ ++ dasm_State *D = Dst_REF; ++ size_t osz = D->pcsize; ++ DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc * sizeof(int)); ++ memset((void *)(((unsigned char *)D->pclabels) + osz), 0, D->pcsize - osz); ++} ++ ++/* Setup encoder. */ ++void dasm_setup(Dst_DECL, const void *actionlist) ++{ ++ dasm_State *D = Dst_REF; ++ int i; ++ D->actionlist = (dasm_ActList)actionlist; ++ D->status = DASM_S_OK; ++ D->section = &D->sections[0]; ++ memset((void *)D->lglabels, 0, D->lgsize); ++ if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); ++ for (i = 0; i < D->maxsection; i++) { ++ D->sections[i].pos = DASM_SEC2POS(i); ++ D->sections[i].ofs = 0; ++ } ++} ++ ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) { \ ++ D->status = DASM_S_##st|(p-D->actionlist-1); return; } } while (0) ++#define CKPL(kind, st) \ ++ do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ ++ D->status = DASM_S_RANGE_##st|(p-D->actionlist-1); return; } } while (0) ++#else ++#define CK(x, st) ((void)0) ++#define CKPL(kind, st) ((void)0) ++#endif ++ ++/* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ ++void dasm_put(Dst_DECL, int start, ...) ++{ ++ va_list ap; ++ dasm_State *D = Dst_REF; ++ dasm_ActList p = D->actionlist + start; ++ dasm_Section *sec = D->section; ++ int pos = sec->pos, ofs = sec->ofs; ++ int *b; ++ ++ if (pos >= sec->epos) { ++ DASM_M_GROW(Dst, int, sec->buf, sec->bsize, ++ sec->bsize + 2 * DASM_MAXSECPOS * sizeof(int)); ++ sec->rbuf = sec->buf - DASM_POS2BIAS(pos); ++ sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); ++ } ++ ++ b = sec->rbuf; ++ b[pos++] = start; ++ ++ va_start(ap, start); ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ if (action >= DASM__MAX) { ++ ofs += 4; ++ } else { ++ int *pl, n = action >= DASM_REL_PC ? va_arg(ap, int) : 0; ++ switch (action) { ++ case DASM_STOP: goto stop; ++ case DASM_SECTION: ++ n = (ins & 255); CK(n < D->maxsection, RANGE_SEC); ++ D->section = &D->sections[n]; goto stop; ++ case DASM_ESC: p++; ofs += 4; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs += (ins & 255); b[pos++] = ofs; break; ++ case DASM_REL_LG: ++ n = (ins & 2047) - 10; pl = D->lglabels + n; ++ /* Bkwd rel or global. */ ++ if (n >= 0) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } ++ pl += 10; n = *pl; ++ if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ ++ goto linkrel; ++ case DASM_REL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putrel: ++ n = *pl; ++ if (n < 0) { /* Label exists. Get label pos and store it. */ ++ b[pos] = -n; ++ } else { ++ linkrel: ++ b[pos] = n; /* Else link to rel chain, anchored at label. */ ++ *pl = pos; ++ } ++ pos++; ++ break; ++ case DASM_LABEL_LG: ++ pl = D->lglabels + (ins & 2047) - 10; CKPL(lg, LG); goto putlabel; ++ case DASM_LABEL_PC: ++ pl = D->pclabels + n; CKPL(pc, PC); ++ putlabel: ++ n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; ++ } ++ *pl = -pos; /* Label exists now. */ ++ b[pos++] = ofs; /* Store pass1 offset estimate. */ ++ break; ++ case DASM_IMM: case DASM_IMMS: ++#ifdef DASM_CHECKS ++ CK((n & ((1 << ((ins >> 10) & 31)) - 1)) == 0, RANGE_I); ++#endif ++ n >>= ((ins >> 10) & 31); ++#ifdef DASM_CHECKS ++ if (ins & 0x8000) ++ CK(((n + (1<<(((ins>>5)&31)-1)))>>((ins>>5)&31)) == 0, RANGE_I); ++ else ++ CK((n >> ((ins >> 5) & 31)) == 0, RANGE_I); ++#endif ++ b[pos++] = n; ++ break; ++ } ++ } ++ } ++stop: ++ va_end(ap); ++ sec->pos = pos; ++ sec->ofs = ofs; ++} ++#undef CK ++ ++/* Pass 2: Link sections, shrink aligns, fix label offsets. */ ++int dasm_link(Dst_DECL, size_t *szp) ++{ ++ dasm_State *D = Dst_REF; ++ int secnum; ++ int ofs = 0; ++ ++#ifdef DASM_CHECKS ++ *szp = 0; ++ if (D->status != DASM_S_OK) return D->status; ++ { ++ int pc; ++ for (pc = 0; pc * sizeof(int) < D->pcsize; pc++) ++ if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; ++ } ++#endif ++ ++ { /* Handle globals not defined in this translation unit. */ ++ int idx; ++ for (idx = 20; idx * sizeof(int) < D->lgsize; idx++) { ++ int n = D->lglabels[idx]; ++ /* Undefined label: Collapse rel chain and replace with marker (< 0). */ ++ while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } ++ } ++ } ++ ++ /* Combine all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->rbuf; ++ int pos = DASM_SEC2POS(secnum); ++ int lastpos = sec->pos; ++ ++ while (pos != lastpos) { ++ dasm_ActList p = D->actionlist + b[pos++]; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: p++; break; ++ case DASM_REL_EXT: break; ++ case DASM_ALIGN: ofs -= (b[pos++] + ofs) & (ins & 255); break; ++ case DASM_REL_LG: case DASM_REL_PC: pos++; break; ++ case DASM_LABEL_LG: case DASM_LABEL_PC: b[pos++] += ofs; break; ++ case DASM_IMM: case DASM_IMMS: pos++; break; ++ } ++ } ++ stop: (void)0; ++ } ++ ofs += sec->ofs; /* Next section starts right after current section. */ ++ } ++ ++ D->codesize = ofs; /* Total size of all code sections */ ++ *szp = ofs; ++ return DASM_S_OK; ++} ++ ++#ifdef DASM_CHECKS ++#define CK(x, st) \ ++ do { if (!(x)) return DASM_S_##st|(p-D->actionlist-1); } while (0) ++#else ++#define CK(x, st) ((void)0) ++#endif ++ ++/* Pass 3: Encode sections. */ ++int dasm_encode(Dst_DECL, void *buffer) ++{ ++ dasm_State *D = Dst_REF; ++ char *base = (char *)buffer; ++ unsigned int *cp = (unsigned int *)buffer; ++ int secnum; ++ ++ /* Encode all code sections. No support for data sections (yet). */ ++ for (secnum = 0; secnum < D->maxsection; secnum++) { ++ dasm_Section *sec = D->sections + secnum; ++ int *b = sec->buf; ++ int *endb = sec->rbuf + sec->pos; ++ ++ while (b != endb) { ++ dasm_ActList p = D->actionlist + *b++; ++ while (1) { ++ unsigned int ins = *p++; ++ unsigned int action = (ins >> 16) - 0xff00; ++ int n = (action >= DASM_ALIGN && action < DASM__MAX) ? *b++ : 0; ++ switch (action) { ++ case DASM_STOP: case DASM_SECTION: goto stop; ++ case DASM_ESC: *cp++ = *p++; break; ++ case DASM_REL_EXT: ++ n = DASM_EXTERN(Dst, (unsigned char *)cp, (ins & 2047), 1); ++ goto patchrel; ++ case DASM_ALIGN: ++ ins &= 255; while ((((char *)cp - base) & ins)) *cp++ = 0x60000000; ++ break; ++ case DASM_REL_LG: ++ CK(n >= 0, UNDEF_LG); ++ case DASM_REL_PC: ++ CK(n >= 0, UNDEF_PC); ++ n = *DASM_POS2PTR(D, n); ++ if (ins & 2048) ++ n = n - (int)((char *)cp - base); ++ else ++ n = (n + (int)(size_t)base) & 0x0fffffff; ++ patchrel: ++ CK((n & 3) == 0 && ((n + ((ins & 2048) ? 0x00020000 : 0)) >> ++ ((ins & 2048) ? 18 : 28)) == 0, ++ RANGE_REL); ++ cp[-1] |= ((n >> 2) & ((ins & 2048) ? 0x001fffff : 0x03ffffff)); ++ break; ++ case DASM_LABEL_LG: ++ ins &= 2047; if (ins >= 20) D->globals[ins-10] = (void *)(base + n); ++ break; ++ case DASM_LABEL_PC: break; ++ case DASM_IMMS: ++ cp[-1] |= ((n>>3) & 4); n &= 0x1f; ++ /* fallthrough */ ++ case DASM_IMM: ++ cp[-1] |= (n & ((1 << ((ins >> 5) & 31)) - 1)) << (ins & 31); ++ break; ++ default: *cp++ = ins; break; ++ } ++ } ++ stop: (void)0; ++ } ++ } ++ ++ if (base + D->codesize != (char *)cp) /* Check for phase errors. */ ++ return DASM_S_PHASE; ++ return DASM_S_OK; ++} ++#undef CK ++ ++/* Get PC label offset. */ ++int dasm_getpclabel(Dst_DECL, unsigned int pc) ++{ ++ dasm_State *D = Dst_REF; ++ if (pc * sizeof(int) < D->pcsize) { ++ int pos = D->pclabels[pc]; ++ if (pos < 0) return *DASM_POS2PTR(D, -pos); ++ if (pos > 0) return -1; /* Undefined. */ ++ } ++ return -2; /* Unused or out of range. */ ++} ++ ++#ifdef DASM_CHECKS ++/* Optional sanity checker to call between isolated encoding steps. */ ++int dasm_checkstep(Dst_DECL, int secmatch) ++{ ++ dasm_State *D = Dst_REF; ++ if (D->status == DASM_S_OK) { ++ int i; ++ for (i = 1; i <= 9; i++) { ++ if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_LG|i; break; } ++ D->lglabels[i] = 0; ++ } ++ } ++ if (D->status == DASM_S_OK && secmatch >= 0 && ++ D->section != &D->sections[secmatch]) ++ D->status = DASM_S_MATCH_SEC | (D->section - D->sections); ++ return D->status; ++} ++#endif ++ +diff --git a/dynasm/dasm_sw64.lua b/dynasm/dasm_sw64.lua +new file mode 100644 +index 0000000..3787d6c +--- /dev/null ++++ b/dynasm/dasm_sw64.lua +@@ -0,0 +1,767 @@ ++------------------------------------------------------------------------------ ++-- DynASM SW64 module. ++-- ++-- Copyright (C) 2023 Mike Pall. All rights reserved. ++-- See dynasm.lua for full copyright notice. ++------------------------------------------------------------------------------ ++ ++-- Module information: ++local _info = { ++ arch = "sw64", ++ description = "DynASM SW64 module", ++ version = "1.4.0", ++ vernum = 10400, ++ release = "2023-02-03", ++ author = "Mike Pall", ++ license = "MIT", ++} ++ ++-- Exported glue functions for the arch-specific module. ++local _M = { _info = _info } ++ ++-- Cache library functions. ++local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs ++local assert, setmetatable = assert, setmetatable ++local _s = string ++local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char ++local match, gmatch = _s.match, _s.gmatch ++local concat, sort = table.concat, table.sort ++local bit = bit or require("bit") ++local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift ++local tohex = bit.tohex ++ ++-- Inherited tables and callbacks. ++local g_opt, g_arch ++local wline, werror, wfatal, wwarn ++ ++-- Action name list. ++-- CHECK: Keep this in sync with the C code! ++local action_names = { ++ "STOP", "SECTION", "ESC", "REL_EXT", ++ "ALIGN", "REL_LG", "LABEL_LG", ++ "REL_PC", "LABEL_PC", "IMM", "IMMS", ++} ++ ++-- Maximum number of section buffer positions for dasm_put(). ++-- CHECK: Keep this in sync with the C code! ++local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. ++ ++-- Action name -> action number. ++local map_action = {} ++for n,name in ipairs(action_names) do ++ map_action[name] = n-1 ++end ++ ++-- Action list buffer. ++local actlist = {} ++ ++-- Argument list for next dasm_put(). Start with offset 0 into action list. ++local actargs = { 0 } ++ ++-- Current number of section buffer positions for dasm_put(). ++local secpos = 1 ++ ++------------------------------------------------------------------------------ ++ ++-- Dump action names and numbers. ++local function dumpactions(out) ++ out:write("DynASM encoding engine action codes:\n") ++ for n,name in ipairs(action_names) do ++ local num = map_action[name] ++ out:write(format(" %-10s %02X %d\n", name, num, num)) ++ end ++ out:write("\n") ++end ++ ++-- Write action list buffer as a huge static C array. ++local function writeactions(out, name) ++ local nn = #actlist ++ if nn == 0 then nn = 1; actlist[0] = map_action.STOP end ++ out:write("static const unsigned int ", name, "[", nn, "] = {\n") ++ for i = 1,nn-1 do ++ assert(out:write("0x", tohex(actlist[i]), ",\n")) ++ end ++ assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n")) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Add word to action list. ++local function wputxw(n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[#actlist+1] = n ++end ++ ++-- Add action to list with optional arg. Advance buffer pos, too. ++local function waction(action, val, a, num) ++ local w = assert(map_action[action], "bad action name `"..action.."'") ++ wputxw(0xff000000 + w * 0x10000 + (val or 0)) ++ if a then actargs[#actargs+1] = a end ++ if a or num then secpos = secpos + (num or 1) end ++end ++ ++-- Flush action list (intervening C code or buffer pos overflow). ++local function wflush(term) ++ if #actlist == actargs[1] then return end -- Nothing to flush. ++ if not term then waction("STOP") end -- Terminate action list. ++ wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true) ++ actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). ++ secpos = 1 -- The actionlist offset occupies a buffer position, too. ++end ++ ++-- Put escaped word. ++local function wputw(n) ++ if n >= 0xff000000 then waction("ESC") end ++ wputxw(n) ++end ++ ++-- Reserve position for word. ++local function wpos() ++ local pos = #actlist+1 ++ actlist[pos] = "" ++ return pos ++end ++ ++-- Store word to reserved position. ++local function wputpos(pos, n) ++ assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range") ++ actlist[pos] = n ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Global label name -> global label number. With auto assignment on 1st use. ++local next_global = 20 ++local map_global = setmetatable({}, { __index = function(t, name) ++ if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end ++ local n = next_global ++ if n > 2047 then werror("too many global labels") end ++ next_global = n + 1 ++ t[name] = n ++ return n ++end}) ++ ++-- Dump global labels. ++local function dumpglobals(out, lvl) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("Global labels:\n") ++ for i=20,next_global-1 do ++ out:write(format(" %s\n", t[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write global label enum. ++local function writeglobals(out, prefix) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("enum {\n") ++ for i=20,next_global-1 do ++ out:write(" ", prefix, t[i], ",\n") ++ end ++ out:write(" ", prefix, "_MAX\n};\n") ++end ++ ++-- Write global label names. ++local function writeglobalnames(out, name) ++ local t = {} ++ for name, n in pairs(map_global) do t[n] = name end ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=20,next_global-1 do ++ out:write(" \"", t[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Extern label name -> extern label number. With auto assignment on 1st use. ++local next_extern = 0 ++local map_extern_ = {} ++local map_extern = setmetatable({}, { __index = function(t, name) ++ -- No restrictions on the name for now. ++ local n = next_extern ++ if n > 2047 then werror("too many extern labels") end ++ next_extern = n + 1 ++ t[name] = n ++ map_extern_[n] = name ++ return n ++end}) ++ ++-- Dump extern labels. ++local function dumpexterns(out, lvl) ++ out:write("Extern labels:\n") ++ for i=0,next_extern-1 do ++ out:write(format(" %s\n", map_extern_[i])) ++ end ++ out:write("\n") ++end ++ ++-- Write extern label names. ++local function writeexternnames(out, name) ++ out:write("static const char *const ", name, "[] = {\n") ++ for i=0,next_extern-1 do ++ out:write(" \"", map_extern_[i], "\",\n") ++ end ++ out:write(" (const char *)0\n};\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Arch-specific maps. ++local map_archdef = { zero="r31", sp="r30", ra="r26", pv="r27", fzero="f31" } -- Ext. register name -> int. name. ++ ++local map_type = {} -- Type name -> { ctype, reg } ++local ctypenum = 0 -- Type number (for Dt... macros). ++ ++-- Reverse defines for registers. ++function _M.revdef(s) ++ if s == "r30" then return "sp" ++ elseif s == "r26" then return "ra" ++ elseif s == "r31" then return "zero" ++ elseif s == "f31" then return "fzero" ++ elseif s == "r27" then return "pv" end ++ return s ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Template strings for SW64 instructions. ++local map_op = { ++ ldi_2 = "f8000000Ao", --0x3e ++ ldih_2 = "fc000000Ao", --0x3f ++ ++ ldl_2 = "8c000000Ao", --0x23 ++ ldw_2 = "88000000Ao", --0x22 ++ ldhu_2 = "84000000Ao", --0x21 ++ ldbu_2 = "80000000Ao", --0x20 ++ ++ fstd_2 = "bc000000Fo", --0x2f ++ fldd_2 = "9c000000Fo", --0x27 ++ flds_2 = "98000000Fo", --0x26 ++ fsts_2 = "b8000000Fo", --0x2e ++ ifmovd_2= "601f0820AI", ++ ifmovs_2= "601f0800AI", ++ fimovd_2= "401f0f00FD", ++ fcvtds_2= "63e00420GI", ++ fcvtsd_2= "63e00400GI", ++ fcvtld_2= "63e005e0GI", ++ fcvtls_2= "63e005a0GI", ++ fcvtdl_2= "63e004e0GI", ++ fcvtdln_2 = "63e004a0GI", ++ fcvtdlp_2 = "63e00460GI", ++ fcvtdlz_2 = "63e00480GI", ++ fcvtwl_2 = "63e00500GI", ++ fcvtlw_2 = "63e00520GI", ++ fcpys_3 = "60000600FGI", ++ fcpysn_3 = "60000640FGI", ++ ++ faddd_3 = "60000020FGI", ++ fsubd_3 = "60000060FGI", ++ fmuld_3 = "600000a0FGI", ++ fdivd_3 = "600000e0FGI", ++ ++ fcmpeq_3 = "60000200FGI", ++ fcmple_3 = "60000220FGI", ++ fcmplt_3 = "60000240FGI", ++ fcmpun_3 = "60000260FGI", ++ ++ stl_2 = "ac000000Ao", --0x2b ++ stw_2 = "a8000000Ao", --0x2a ++ sth_2 = "a4000000Ao", --0x29 ++ stb_2 = "a0000000Ao", --0x28 ++ ++ addli_3 = "48000100AjD", --0x12.08 ++ subli_3 = "48000120AjD", --0x12.09 ++ mulli_3 = "48000300AjD", --0x12.18 ++ mull_3 = "40000300ABD", --0x10.18 ++ mulw_3 = "40000200ABD", --0x10.10 ++ addl_3 = "40000100ABD", --0x10.08 ++ subl_3 = "40000120ABD", --0x10.09 ++ subw_3 = "40000020ABD", --0x10.01 ++ subwi_3 = "48000020AjD", --0x12.01 ++ s8addl_3 = "40000180ABD", --0x10.0c ++ s8addli_3 = "48000180AjD", --0x12.0c ++ s8addw_3 = "40000080ABD", --0x10.04 ++ s8addwi_3 = "48000080AjD", --0x12.04 ++ s4addl_3 = "40000140ABD", --0x10.0a ++ s4addli_3 = "48000140AjD", --0x12.0a ++ s4addw_3 = "40000040ABD", --0x10.02 ++ s4addwi_3 = "48000040AjD", --0x12.02 ++ addw_3 = "40000000ABD", --0x10.00 ++ addwi_3 = "48000000AjD", --0x12.00 ++ divw_3 = "40000220ABD", --0x10.11 ++ udivw_3 = "40000240ABD", --0x10.12 ++ remw_3 = "40000260ABD", --0x10.13 ++ uremw_3 = "40000280ABD", --0x10.14 ++ divl_3 = "40000340ABD", --0x10.1a ++ udivl_3 = "40000360ABD", --0x10.1b ++ reml_3 = "40000380ABD", --0x10.1c ++ ureml_3 = "400003a0ABD", --0x10.1d ++ ++ andi_3 = "48000700AjD", ++ and_3 = "40000700ABD", ++ ornoti_3 ="48000760AjD", ++ ornot_3 = "40000760ABD", ++ bis_3 = "40000740ABD", ++ bisi_3 = "48000740AjD", ++ bic_3 = "40000720ABD", ++ bici_3 = "48000720AjD", ++ xori_3 = "48000780AjD", ++ xor_3 = "40000780ABD", ++ slli_3 = "48000900AjD", ++ sll_3 = "40000900ABD", ++ srli_3 = "48000920AjD", ++ srl_3 = "40000920ABD", ++ srai_3 = "48000940AjD", ++ sra_3 = "40000940ABD", ++ roll_3 = "40000960ABD", ++ rolli_3 = "48000960AjD", ++ sllw_3 = "40000980ABD", ++ sllwi_3 = "48000980AjD", ++ srlw_3 = "400009a0ABD", ++ srlwi_3 = "480009a0AjD", ++ sraw_3 = "400009c0ABD", ++ srawi_3 = "480009c0AjD", ++ rolw_3 = "400009e0ABD", ++ rolwi_3 = "480009e0AjD", ++ ++ beq_2 = "c0000000Ab", --0x30 ++ bne_2 = "c4000000Ab", --0x31 ++ blt_2 = "c8000000Ab", --0x32 ++ ble_2 = "cc000000Ab", --0x33 ++ bgt_2 = "d0000000Ab", --0x34 ++ bge_2 = "d4000000Ab", --0x35 ++ ++ fbeq_2 = "e0000000Fb", ++ fbge_2 = "f4000000Fb", ++ fbgt_2 = "f0000000Fb", ++ fble_2 = "ec000000Fb", ++ fblt_2 = "e8000000Fb", ++ fbne_2 = "e4000000Fb", ++ ++ call_2 = "04000000Ao", --0x1 ++ ret_2 = "08000000Ao", --0x2 ++ jmp_2 = "0C000000Ao", --0x3 ++ br_2 = "10000000Ab", --0x4 ++ getpc_1 = "10000000A", --br Rn, 0 ++ ++ ++ cmpeq_3 = "40000500ABD", ++ cmplt_3 = "40000520ABD", ++ cmplti_3 = "48000520AjD", ++ cmple_3 = "40000540ABD", ++ cmpult_3 = "40000560ABD", ++ cmpulti_3 = "48000560AjD", ++ cmpule_3 = "40000580ABD", ++ sbt_3 = "400005a0ABD", ++ sbti_3 = "480005a0AjD", ++ cbt_3 = "400005c0ABD", ++ cbti_3 = "480005c0AjD", ++ ++ ++ maskhw_3 = "40000cc0ABD", ++ maskhwi_3 = "48000cc0AjD", ++ maskhl_3 = "40000ce0ABD", ++ maskhli_3 = "48000ce0AjD", ++ maskll_3 = "40000c60ABD", ++ masklli_3 = "48000c60AjD", ++ ++ zap_3 = "40000d00ABD", ++ zapi_3 = "48000d00AjD", ++ ++ extlb_3 = "48000a00AjD", ++ extlh_3 = "48000a20AjD", ++ extlw_3 = "48000a40AjD", ++ extll_3 = "48000a60AjD", ++ exthb_3 = "48000a80AjD", ++ exthh_3 = "48000aa0AjD", ++ exthw_3 = "48000ac0AjD", ++ exthl_3 = "48000ae0AjD", ++ ++ inslb_3 = "48000800AjD", ++ ++ maskhw_3 = "48000cc0AjD", ++ ++ sexth_2 = "43e00d60BD", ++ sexthi_2 = "4be00d60iD", ++ sextb_2 = "43e00d40BD", ++ sextbi_2 = "4be00d40iD", ++ ++ selle_4 = "44000c00ABCD", ++ sellei_4 = "4c000c00AiCD", ++ sellt_4 = "44001000ABCD", ++ sellti_4 = "4c001000AiCD", ++ selgt_4 = "44000800ABCD", ++ selgti_4 = "4c000800AiCD", ++ selge_4 = "44000400ABCD", ++ selgei_4 = "4c000400AiCD", ++ selne_4 = "44001400ABCD", ++ selnei_4 = "4c001400AiCD", ++ seleq_4 = "44000000ABCD", ++ seleqi_4 = "4c000000AiCD", ++ ++ fselne_4 = "64004400FGHI", ++ fseleq_4 = "64004000FGHI", ++ ++ fsqrtd_2 = "63e00120GI", ++ ++ setfpec1_0 = "60000aa0", ++ setfpec3_0 = "60000ae0", ++ ++ syscall_0 = "00000083", --0x0.83 ++ bpt_0 = "00000080", --0x0.80 ++ ++ ldw_dec_2 = "20004000Ap", ++ ldl_dec_2 = "20005000Ap", ++} ++ ++------------------------------------------------------------------------------ ++ ++local function parse_gpr(expr) ++ local tname, ovreg = match(expr, "^([%w_]+):(r[1-3]?[0-9])$") ++ local tp = map_type[tname or expr] ++ if tp then ++ local reg = ovreg or tp.reg ++ if not reg then ++ werror("type `"..(tname or expr).."' needs a register override") ++ end ++ expr = reg ++ end ++ local r = match(expr, "^r([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r, tp end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++local function parse_fpr(expr) ++ local r = match(expr, "^f([1-3]?[0-9])$") ++ if r then ++ r = tonumber(r) ++ if r <= 31 then return r end ++ end ++ werror("bad register name `"..expr.."'") ++end ++ ++ ++local function parse_imm(imm, bits, shift, scale, signed) ++ local n = tonumber(imm) ++ if n then ++ local m = sar(n, scale) ++ if shl(m, scale) == n then ++ if signed then ++ local s = sar(m, bits-1) ++ if s == 0 then return shl(m, shift) ++ elseif s == -1 then return shl(m + shl(1, bits), shift) end ++ else ++ if sar(m, bits) == 0 then return shl(m, shift) end ++ end ++ end ++ werror("out of range immediate `"..imm.."'") ++ elseif match(imm, "^[rf]([1-3]?[0-9])$") or ++ match(imm, "^([%w_]+):([rf][1-3]?[0-9])$") then ++ werror("expected immediate operand, got register") ++ else ++ waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm) ++ return 0 ++ end ++end ++ ++local function parse_disp(disp, width) ++ local imm, reg = match(disp, "^(.*)%(([%w_:]+)%)$") ++ if imm then ++ local r = shl(parse_gpr(reg), 16) ++ local extname = match(imm, "^extern%s+(%S+)$") ++ if extname then ++ waction("REL_EXT", map_extern[extname], nil, 1) ++ return r ++ else ++ return r + parse_imm(imm, width, 0, 0, true) ++ end ++ end ++ local reg, tailr = match(disp, "^([%w_:]+)%s*(.*)$") ++ if reg and tailr ~= "" then ++ local r, tp = parse_gpr(reg) ++ if tp then ++ waction("IMM", 32768+16*32, format(tp.ctypefmt, tailr)) ++ return shl(r, 16) ++ end ++ end ++ werror("bad displacement `"..disp.."'") ++end ++ ++local function parse_label(label, def) ++ local prefix = sub(label, 1, 2) ++ -- =>label (pc label reference) ++ if prefix == "=>" then ++ return "PC", 0, sub(label, 3) ++ end ++ -- ->name (global label reference) ++ if prefix == "->" then ++ return "LG", map_global[sub(label, 3)] ++ end ++ if def then ++ -- [1-9] (local label definition) ++ if match(label, "^[1-9]$") then ++ return "LG", 10+tonumber(label) ++ end ++ else ++ -- [<>][1-9] (local label reference) ++ local dir, lnum = match(label, "^([<>])([1-9])$") ++ if dir then -- Fwd: 1-9, Bkwd: 11-19. ++ return "LG", lnum + (dir == ">" and 0 or 10) ++ end ++ -- extern label (extern label reference) ++ local extname = match(label, "^extern%s+(%S+)$") ++ if extname then ++ return "EXT", map_extern[extname] ++ end ++ end ++ werror("bad label `"..label.."'") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Handle opcodes defined with template strings. ++map_op[".template__"] = function(params, template, nparams) ++ if not params then return sub(template, 9) end ++ local op = tonumber(sub(template, 1, 8), 16) ++ local n = 1 ++ ++ -- Limit number of section buffer positions used by a single dasm_put(). ++ -- A single opcode needs a maximum of 2 positions (ins/ext). ++ if secpos+2 > maxsecpos then wflush() end ++ local pos = wpos() ++ ++ -- Process each character. ++ for p in gmatch(sub(template, 9), ".") do ++ if p == "A" then ++ op = op + shl(parse_gpr(params[n]), 21); n = n + 1 ++ elseif p == "B" then ++ op = op + shl(parse_gpr(params[n]), 16); n = n + 1 ++ elseif p == "C" then ++ op = op + shl(parse_gpr(params[n]), 5); n = n + 1 ++ elseif p == "D" then ++ op = op + shl(parse_gpr(params[n]), 0); n = n + 1 ++ elseif p == "F" then -- float version A ++ op = op + shl(parse_fpr(params[n]), 21); n = n + 1 ++ elseif p == "G" then -- float version B ++ op = op + shl(parse_fpr(params[n]), 16); n = n + 1 ++ elseif p == "H" then -- float version C ++ op = op + shl(parse_fpr(params[n]), 5); n = n + 1 ++ elseif p == "I" then -- float version D ++ op = op + shl(parse_fpr(params[n]), 0); n = n + 1 ++ ++ elseif p == "i" then ++ op = op + parse_imm(params[n], 8, 13, 0, true); n = n + 1 ++ elseif p == "j" then ++ op = op + parse_imm(params[n], 8, 13, 0, false); n = n + 1 ++ elseif p == "o" then ++ op = op + parse_disp(params[n], 16); n = n + 1 ++ elseif p == "p" then ++ op = op + parse_disp(params[n], 12); n = n + 1 ++ elseif p == "b" then ++ local mode, m, s = parse_label(params[n], false) ++ if p == "b" then m = m + 2048 end ++ waction("REL_"..mode, m, s, 1) ++ n = n + 1 ++ else ++ assert(false) ++ end ++ end ++ wputpos(pos, op) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode to mark the position where the action list is to be emitted. ++map_op[".actionlist_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeactions(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global enum is to be emitted. ++map_op[".globals_1"] = function(params) ++ if not params then return "prefix" end ++ local prefix = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobals(out, prefix) end) ++end ++ ++-- Pseudo-opcode to mark the position where the global names are to be emitted. ++map_op[".globalnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeglobalnames(out, name) end) ++end ++ ++-- Pseudo-opcode to mark the position where the extern names are to be emitted. ++map_op[".externnames_1"] = function(params) ++ if not params then return "cvar" end ++ local name = params[1] -- No syntax check. You get to keep the pieces. ++ wline(function(out) writeexternnames(out, name) end) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Label pseudo-opcode (converted from trailing colon form). ++map_op[".label_1"] = function(params) ++ if not params then return "[1-9] | ->global | =>pcexpr" end ++ if secpos+1 > maxsecpos then wflush() end ++ local mode, n, s = parse_label(params[1], true) ++ if mode == "EXT" then werror("bad label definition") end ++ waction("LABEL_"..mode, n, s, 1) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcodes for data storage. ++map_op[".long_*"] = function(params) ++ if not params then return "imm..." end ++ for _,p in ipairs(params) do ++ local n = tonumber(p) ++ if not n then werror("bad immediate `"..p.."'") end ++ if n < 0 then n = n + 2^32 end ++ wputw(n) ++ if secpos+2 > maxsecpos then wflush() end ++ end ++end ++ ++map_op[".str100_1"] = function(params) ++ function empty(s) ++ str = "" ++ i=0 ++ repeat ++ str = str .. "\0" ++ i = i+1 ++ until i >= s ++ return str ++ end ++ str = string.format("%s\n", params[1]) ++ if #str > 100 then ++ wfatal(".str100 only support string size below 100") ++ end ++ str = str..empty(100-#str) ++ i=0 ++ while i ~= #str do ++ wputxw(shl(string.byte(str, i+4), 24) + ++ shl(string.byte(str, i+3), 16) + ++ shl(string.byte(str, i+2), 8) + ++ shl(string.byte(str, i+1), 0)) ++ i = i + 4 ++ end ++end ++ ++ ++-- Alignment pseudo-opcode. ++map_op[".align_1"] = function(params) ++ if not params then return "numpow2" end ++ if secpos+1 > maxsecpos then wflush() end ++ local align = tonumber(params[1]) ++ if align then ++ local x = align ++ -- Must be a power of 2 in the range (2 ... 256). ++ for i=1,8 do ++ x = x / 2 ++ if x == 1 then ++ waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1. ++ return ++ end ++ end ++ end ++ werror("bad alignment") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pseudo-opcode for (primitive) type definitions (map to C types). ++map_op[".type_3"] = function(params, nparams) ++ if not params then ++ return nparams == 2 and "name, ctype" or "name, ctype, reg" ++ end ++ local name, ctype, reg = params[1], params[2], params[3] ++ if not match(name, "^[%a_][%w_]*$") then ++ werror("bad type name `"..name.."'") ++ end ++ local tp = map_type[name] ++ if tp then ++ werror("duplicate type `"..name.."'") ++ end ++ -- Add #type to defines. A bit unclean to put it in map_archdef. ++ map_archdef["#"..name] = "sizeof("..ctype..")" ++ -- Add new type and emit shortcut define. ++ local num = ctypenum + 1 ++ map_type[name] = { ++ ctype = ctype, ++ ctypefmt = format("Dt%X(%%s)", num), ++ reg = reg, ++ } ++ wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) ++ ctypenum = num ++end ++map_op[".type_2"] = map_op[".type_3"] ++ ++-- Dump type definitions. ++local function dumptypes(out, lvl) ++ local t = {} ++ for name in pairs(map_type) do t[#t+1] = name end ++ sort(t) ++ out:write("Type definitions:\n") ++ for _,name in ipairs(t) do ++ local tp = map_type[name] ++ local reg = tp.reg or "" ++ out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) ++ end ++ out:write("\n") ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Set the current section. ++function _M.section(num) ++ waction("SECTION", num) ++ wflush(true) -- SECTION is a terminal action. ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Dump architecture description. ++function _M.dumparch(out) ++ out:write(format("DynASM %s version %s, released %s\n\n", ++ _info.arch, _info.version, _info.release)) ++ dumpactions(out) ++end ++ ++-- Dump all user defined elements. ++function _M.dumpdef(out, lvl) ++ dumptypes(out, lvl) ++ dumpglobals(out, lvl) ++ dumpexterns(out, lvl) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Pass callbacks from/to the DynASM core. ++function _M.passcb(wl, we, wf, ww) ++ wline, werror, wfatal, wwarn = wl, we, wf, ww ++ return wflush ++end ++ ++-- Setup the arch-specific module. ++function _M.setup(arch, opt) ++ g_arch, g_opt = arch, opt ++end ++ ++-- Merge the core maps and the arch-specific maps. ++function _M.mergemaps(map_coreop, map_def) ++ setmetatable(map_op, { __index = map_coreop }) ++ setmetatable(map_def, { __index = map_archdef }) ++ return map_op, map_def ++end ++ ++return _M ++ ++------------------------------------------------------------------------------ +diff --git a/src/Makefile b/src/Makefile +index f56465d..0226e27 100644 +--- a/src/Makefile ++++ b/src/Makefile +@@ -53,6 +53,7 @@ CCOPT_arm= + CCOPT_arm64= + CCOPT_ppc= + CCOPT_mips= ++CCOPT_sw64= -mieee + # + CCDEBUG= + # Uncomment the next line to generate debug information: +@@ -232,6 +233,9 @@ TARGET_ASHLDFLAGS= $(LDOPTIONS) $(TARGET_XSHLDFLAGS) $(TARGET_FLAGS) $(TARGET_SH + TARGET_ALIBS= $(TARGET_XLIBS) $(LIBS) $(TARGET_LIBS) + + TARGET_TESTARCH=$(shell $(TARGET_CC) $(TARGET_TCFLAGS) -E lj_arch.h -dM) ++ifneq (,$(findstring LJ_TARGET_SW64 ,$(TARGET_TESTARCH))) ++ TARGET_LJARCH= sw64 ++else + ifneq (,$(findstring LJ_TARGET_X64 ,$(TARGET_TESTARCH))) + TARGET_LJARCH= x64 + else +@@ -272,6 +276,7 @@ endif + endif + endif + endif ++endif + + ifneq (,$(findstring LJ_TARGET_PS3 1,$(TARGET_TESTARCH))) + TARGET_SYS= PS3 +@@ -425,6 +430,9 @@ ifneq (,$(findstring LJ_NO_UNWIND 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D NO_UNWIND + TARGET_ARCH+= -DLUAJIT_NO_UNWIND + endif ++ifneq (,$(findstring SW64_DEBUG_WI 1,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D SW64_DEBUG_WI ++endif + DASM_AFLAGS+= -D VER=$(subst LJ_ARCH_VERSION_,,$(filter LJ_ARCH_VERSION_%,$(subst LJ_ARCH_VERSION ,LJ_ARCH_VERSION_,$(TARGET_TESTARCH)))) + ifeq (Windows,$(TARGET_SYS)) + DASM_AFLAGS+= -D WIN +@@ -439,6 +447,9 @@ ifeq (arm,$(TARGET_LJARCH)) + DASM_AFLAGS+= -D IOS + endif + else ++ifeq (,$(findstring LJ_SW64_CORE4 ,$(TARGET_TESTARCH))) ++ DASM_AFLAGS+= -D SW64_CORE4 ++endif + ifeq (ppc,$(TARGET_LJARCH)) + ifneq (,$(findstring LJ_ARCH_SQRT 1,$(TARGET_TESTARCH))) + DASM_AFLAGS+= -D SQRT +diff --git a/src/host/buildvm.c b/src/host/buildvm.c +index de23fab..90a6556 100644 +--- a/src/host/buildvm.c ++++ b/src/host/buildvm.c +@@ -65,6 +65,8 @@ static int collect_reloc(BuildCtx *ctx, uint8_t *addr, int idx, int type); + #include "../dynasm/dasm_ppc.h" + #elif LJ_TARGET_MIPS + #include "../dynasm/dasm_mips.h" ++#elif LJ_TARGET_SW64 ++#include "../dynasm/dasm_sw64.h" + #else + #error "No support for this architecture (yet)" + #endif +diff --git a/src/host/buildvm_asm.c b/src/host/buildvm_asm.c +index ffd1490..2c01c02 100644 +--- a/src/host/buildvm_asm.c ++++ b/src/host/buildvm_asm.c +@@ -159,7 +159,7 @@ static void emit_asm_wordreloc(BuildCtx *ctx, uint8_t *p, int n, + ins, sym); + exit(1); + } +-#elif LJ_TARGET_MIPS ++#elif LJ_TARGET_MIPS || LJ_TARGET_SW64 + fprintf(stderr, + "Error: unsupported opcode %08x for %s symbol relocation.\n", + ins, sym); +diff --git a/src/jit/bcsave.lua b/src/jit/bcsave.lua +index c17c88e..3c47e25 100644 +--- a/src/jit/bcsave.lua ++++ b/src/jit/bcsave.lua +@@ -64,7 +64,7 @@ local map_type = { + + local map_arch = { + x86 = true, x64 = true, arm = true, arm64 = true, arm64be = true, +- ppc = true, mips = true, mipsel = true, ++ ppc = true, mips = true, mipsel = true, sw64 = true + } + + local map_os = { +@@ -200,7 +200,7 @@ typedef struct { + ]] + local symname = LJBC_PREFIX..ctx.modname + local is64, isbe = false, false +- if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" then ++ if ctx.arch == "x64" or ctx.arch == "arm64" or ctx.arch == "arm64be" or ctx.arch == "sw64" then + is64 = true + elseif ctx.arch == "ppc" or ctx.arch == "mips" then + isbe = true +@@ -237,7 +237,7 @@ typedef struct { + hdr.eendian = isbe and 2 or 1 + hdr.eversion = 1 + hdr.type = f16(1) +- hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8 })[ctx.arch]) ++ hdr.machine = f16(({ x86=3, x64=62, arm=40, arm64=183, arm64be=183, ppc=20, mips=8, mipsel=8,sw64=39190 })[ctx.arch]) + if ctx.arch == "mips" or ctx.arch == "mipsel" then + hdr.flags = f32(0x50001006) + end +@@ -355,7 +355,7 @@ typedef struct { + -- Create PE object and fill in header. + local o = ffi.new("PEobj") + local hdr = o.hdr +- hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366 })[ctx.arch]) ++ hdr.arch = f16(({ x86=0x14c, x64=0x8664, arm=0x1c0, ppc=0x1f2, mips=0x366, mipsel=0x366,sw64=0x9916 })[ctx.arch]) + hdr.nsects = f16(2) + hdr.symtabofs = f32(ffi.offsetof(o, "sym0")) + hdr.nsyms = f32(6) +diff --git a/src/jit/dis_sw64.lua b/src/jit/dis_sw64.lua +new file mode 100644 +index 0000000..16b60bf +--- /dev/null ++++ b/src/jit/dis_sw64.lua +@@ -0,0 +1,649 @@ ++---------------------------------------------------------------------------- ++-- LuaJIT SW64 disassembler module. ++-- ++-- Copyright (C) 2019 deepin inc. All rights reserved. ++-- Released under the MIT/X license. See Copyright Notice in luajit.h ++---------------------------------------------------------------------------- ++-- This is a helper module used by the LuaJIT machine code dumper module. ++-- ++-- It disassembles all standard SW64 instructions. ++------------------------------------------------------------------------------ ++local type = type ++local byte, format = string.byte, string.format ++local match, gmatch = string.match, string.gmatch ++local concat = table.concat ++local bit = require("bit") ++local band, bor, tohex = bit.band, bit.bor, bit.tohex ++local lshift, rshift, arshift = bit.lshift, bit.rshift, bit.arshift ++ ++------------------------------------------------------------------------------ ++-- Primary and extended opcode maps ++------------------------------------------------------------------------------ ++ ++local OPC_SYSCALL = "" ++local OPC_MISI_MEMORY, OPC_FUNC_MEMORY = "", "" ++local OPC_MEMORY_F = "FBo" ++local OPC_MEMORY = "ABo" ++local OPC_CONTROL = "Ab" ++local OPC_CONTROL_F = "Fb" ++local OPC_ARITHMETIC = { ++ shift = 5, ++ mask = 0xff, ++ pat = "ABD", ++} ++local OPC_ARITHMETIC_F = { ++ shift = 5, ++ mask = 0xff, ++ pat = "FGI", ++} ++local OPC_ARITHMETIC_I = { ++ shift = 5, ++ mask = 0xff, ++ pat = "AjD", ++} ++local OPC_COMPLEX_ARITHMETIC = { ++ shift = 10, ++ mask = 0x7, ++ pat = "ABCD", ++} ++local OPC_COMPLEX_ARITHMETIC_F = { ++ shift = 10, ++ mask = 0x1f, ++ pat = "FGHI", ++} ++ ++local ignores_tabs = { ++ F = { "FCVT", "IFMOV", "FCPY" }, ++} ++ ++function should_ignore(name, field) ++ pat = ignores_tabs[field] or {} ++ for _, p in ipairs(pat) do ++ if match(name, p) then ++ return true ++ end ++ end ++ return false ++end ++ ++local class_tabs = { ++ [0x00] = OPC_SYSCALL, ++ [0x01] = OPC_MEMORY, ++ [0x02] = OPC_MEMORY, ++ [0x03] = OPC_MEMORY, ++ [0x04] = OPC_CONTROL, ++ [0x05] = OPC_CONTROL, ++ [0x06] = OPC_MISI_MEMORY, ++ [0x08] = OPC_FUNC_MEMORY, ++ [0x10] = OPC_ARITHMETIC, ++ [0x11] = OPC_COMPLEX_ARITHMETIC, ++ [0x12] = OPC_ARITHMETIC_I, ++ [0x13] = OPC_ARITHMETIC_I, ++ [0x18] = OPC_ARITHMETIC_F, ++ [0x19] = OPC_COMPLEX_ARITHMETIC_F, ++ [0x20] = OPC_MEMORY, ++ [0x21] = OPC_MEMORY, ++ [0x22] = OPC_MEMORY, ++ [0x23] = OPC_MEMORY, ++ [0x24] = OPC_MEMORY, ++ [0x25] = OPC_MEMORY, ++ [0x26] = OPC_MEMORY_F, ++ [0x27] = OPC_MEMORY_F, ++ [0x28] = OPC_MEMORY, ++ [0x29] = OPC_MEMORY, ++ [0x2A] = OPC_MEMORY, ++ [0x2B] = OPC_MEMORY, ++ [0x2C] = OPC_MEMORY, ++ [0x2D] = OPC_MEMORY, ++ [0x2E] = OPC_MEMORY_F, ++ [0x2F] = OPC_MEMORY_F, ++ [0x30] = OPC_CONTROL, ++ [0x31] = OPC_CONTROL, ++ [0x32] = OPC_CONTROL, ++ [0x33] = OPC_CONTROL, ++ [0x34] = OPC_CONTROL, ++ [0x35] = OPC_CONTROL, ++ [0x36] = OPC_CONTROL, ++ [0x37] = OPC_CONTROL, ++ [0x38] = OPC_CONTROL_F, ++ [0x39] = OPC_CONTROL_F, ++ [0x3A] = OPC_CONTROL_F, ++ [0x3B] = OPC_CONTROL_F, ++ [0x3C] = OPC_CONTROL_F, ++ [0x3D] = OPC_CONTROL_F, ++ [0x3e] = OPC_MEMORY, ++ [0x3f] = OPC_MEMORY, ++} ++ ++local map_pri = { ++ [0x00] = { ++ [0x0] = "SYSCALL/B", ++ [0x1] = "SYSCALL" ++ }, ++ [0x01]= {[0x0]= "CALL"}, ++ [0x02]= {[0x0]= "RET"}, ++ [0x03]= {[0x0]= "JMP"}, ++ [0x04]= {[0x0]= "BR"}, ++ [0x05]= {[0x0]= "BSR"}, ++ [0x06]= { ++ [0x0000] = "MEMB", ++ [0x0001] = "IMEMB", ++ [0x1000] = "RD_F", ++ [0x1020] = "WR_F", ++ }, ++ [0x08]= { ++ [0x0]= "LLDW", ++ [0x1]= "LLDL", ++ [0x8]= "LSTW", ++ [0x9]= "LSTL", ++ }, ++ [0x10]= { ++ [0x00]= "ADDW", ++ [0x01]= "SUBW", ++ [0x02]= "S4ADDW", ++ [0x03]= "S4SUBW", ++ [0x04]= "S8ADDW", ++ [0x05]= "S8SUBW", ++ [0x08]= "ADDL", ++ [0x09]= "SUBL", ++ [0x0a]= "S4ADDL", ++ [0x0b]= "S4SUBL", ++ [0x0c]= "S8ADDL", ++ [0x0d]= "S8SUBL", ++ [0x10]= "MULW", ++ [0x11]= "DIVW", ++ [0x12]= "UDIVW", ++ [0x13]= "REMW", ++ [0x14]= "UREMW", ++ [0x18]= "MULL", ++ [0x19]= "UMULH", ++ [0x1a]= "DIVL", ++ [0x1b]= "UDIVL", ++ [0x1c]= "REML", ++ [0x1d]= "UREML", ++ [0x1e]= "ADDPI", ++ [0x1f]= "ADDPIS", ++ [0x28]= "CMPEQ", ++ [0x29]= "CMPLT", ++ [0x2a]= "CMPLE", ++ [0x2b]= "CMPULT", ++ [0x2c]= "CMPULE", ++ [0x2d]= "SBT", ++ [0x2e]= "CBT", ++ [0x38]= "AND", ++ [0x39]= "BIC", ++ [0x3a]= "BIS", ++ [0x3b]= "ORNOT", ++ [0x3c]= "XOR", ++ [0x3d]= "EQV", ++ [0x40]= "INSLB", ++ [0x41]= "INSLH", ++ [0x42]= "INSLW", ++ [0x43]= "INSLL", ++ [0x44]= "INSHB", ++ [0x45]= "INSHH", ++ [0x46]= "INSHW", ++ [0x47]= "INSHL", ++ [0x48]= "SLL", ++ [0x49]= "SRL", ++ [0x4a]= "SRA", ++ [0x4b]= "ROLL", ++ [0x4c]= "SLLW", ++ [0x4d]= "SRLW", ++ [0x4e]= "SRAW", ++ [0x4f]= "ROLW", ++ [0x50]= "EXTLB", ++ [0x51]= "EXTLH", ++ [0x52]= "EXTLW", ++ [0x53]= "EXTLL", ++ [0x54]= "EXTHB", ++ [0x55]= "EXTHH", ++ [0x56]= "EXTHW", ++ [0x57]= "EXTHL", ++ [0x58]= "CTPOP", ++ [0x59]= "CTLZ", ++ [0x5a]= "CTTZ", ++ [0x60]= "MASKLB", ++ [0x61]= "MASKLH", ++ [0x62]= "MASKLW", ++ [0x63]= "MASKLL", ++ [0x64]= "MASKHB", ++ [0x65]= "MASKHH", ++ [0x66]= "MASKHW", ++ [0x67]= "MASKHL", ++ [0x68]= "ZAP", ++ [0x69]= "ZAPNOT", ++ [0x6a]= "SEXTB", ++ [0x6b]= "SEXTH", ++ [0x6c]= "CMPGEB", ++ [0x70]= "FIMOVS", ++ [0x78]= "FIMOVD", ++ }, ++ [0x11]= { ++ [0x0]= "SELEQ", ++ [0x1]= "SELGE", ++ [0x2]= "SELGT", ++ [0x3]= "SELLE", ++ [0x4]= "SELLT", ++ [0x5]= "SELNE", ++ [0x6]= "SELLBC", ++ [0x7]= "SELLBS", ++ }, ++ [0x12]= { ++ [0x00]= "ADDW", ++ [0x01]= "SUBW", ++ [0x02]= "S4ADDW", ++ [0x03]= "S4SUBW", ++ [0x04]= "S8ADDW", ++ [0x05]= "S8SUBW", ++ [0x08]= "ADDL", ++ [0x09]= "SUBL", ++ [0x0a]= "S4ADDL", ++ [0x0b]= "S4SUBL", ++ [0x0c]= "S8ADDL", ++ [0x0d]= "S8SUBL", ++ [0x10]= "MULW", ++ [0x18]= "MULL", ++ [0x19]= "UMULH", ++ [0x28]= "CMPEQ", ++ [0x29]= "CMPLT", ++ [0x2a]= "CMPLE", ++ [0x2b]= "CMPULT", ++ [0x2c]= "CMPULE", ++ [0X2d]= "SBT" ++ [0x2e]= "CBT", ++ [0x38]= "AND", ++ [0x39]= "BIC", ++ [0x3a]= "BIS", ++ [0x3b]= "ORNOT", ++ [0x3c]= "XOR", ++ [0x3d]= "EQV", ++ [0x40]= "INSLB", ++ [0x41]= "INSLH", ++ [0x42]= "INSLW", ++ [0x43]= "INSLL", ++ [0x44]= "INSHB", ++ [0x45]= "INSHH", ++ [0x46]= "INSHW", ++ [0x47]= "INSHL", ++ [0x48]= "SLL", ++ [0x49]= "SRL", ++ [0x4a]= "SRA", ++ [0x4b]= "ROLL", ++ [0x4c]= "SLLW", ++ [0x4d]= "SRLW", ++ [0x4e]= "SRAW", ++ [0x4f]= "ROLW" ++ [0x50]= "EXTLB", ++ [0x51]= "EXTLH", ++ [0x52]= "EXTLW", ++ [0x53]= "EXTLL", ++ [0x54]= "EXTHB", ++ [0x55]= "EXTHH", ++ [0x56]= "EXTHW", ++ [0x57]= "EXTHL", ++ [0x60]= "MASKLB", ++ [0x61]= "MASKLH", ++ [0x62]= "MASKLW", ++ [0x63]= "MASKLL", ++ [0x64]= "MASKHB", ++ [0x65]= "MASKHH", ++ [0x66]= "MASKHW", ++ [0x67]= "MASKHL", ++ [0x68]= "ZAP", ++ [0x69]= "ZAPNOT", ++ [0x6a]= "SEXTB", ++ [0x6b]= "SEXTH", ++ [0x6c]= "CMPGEB", ++ }, ++ [0x13]= { ++ [0x0]= "SELEQ", ++ [0x1]= "SELGE", ++ [0x2]= "SELGT", ++ [0x3]= "SELLE", ++ [0x4]= "SELLT", ++ [0x5]= "SELNE", ++ [0x6]= "SELLBC", ++ [0x7]= "SELLBS", ++ }, ++ [0x18]= { ++ [0x00]= "FADDS", ++ [0x01]= "FADDD", ++ [0x02]= "FSUBS", ++ [0x03]= "FSUBD", ++ [0x04]= "FMULS", ++ [0x05]= "FMULD", ++ [0x06]= "FDIVS", ++ [0x07]= "FDIVD", ++ [0x08]= "FSQRTS", ++ [0x09]= "FSQRTD", ++ [0x10]= "FCMPEQ", ++ [0x11]= "FCMPLE", ++ [0x12]= "FCMPLT", ++ [0x13]= "FCMPUN", ++ [0x20]= "FCVTSD", ++ [0x21]= "FCVTDS", ++ [0x22]= "FCVTDL_G", ++ [0x23]= "FCVTDL_P", ++ [0x24]= "FCVTDL_Z", ++ [0x25]= "FCVTDL_N", ++ [0x27]= "FCVTDL", ++ [0x28]= "FCVTWL", ++ [0x29]= "FCVTLW", ++ [0x2D]= "FCVTLS", ++ [0x2F]= "FCVTLD", ++ [0x30]= "FCPYS", ++ [0x31]= "FCPYSE", ++ [0x32]= "FCPYSN", ++ [0x40]= "IFMOVS", ++ [0x41]= "IFMOVD", ++ [0x50]= "RFPCR", ++ [0x51]= "WFPCR", ++ [0x54]= "SETFPEC0", ++ [0x55]= "SETFPEC1", ++ [0x56]= "SETFPEC2", ++ [0x57]= "SETFPEC3", ++ }, ++ [0x19]= { ++ [0x00]= "FMAS", ++ [0x01]= "FMAD", ++ [0x02]= "FMSS", ++ [0x03]= "FMSD", ++ [0x04]= "FNMAS", ++ [0x05]= "FNMAD", ++ [0x06]= "FNMSS", ++ [0x07]= "FNMSD", ++ ++ [0x10]= "FSELEQ", ++ [0x11]= "FSELNE", ++ [0x12]= "FSELLT", ++ [0x13]= "FSELLE", ++ [0x14]= "FSELGT", ++ [0x15]= "FSELGE", ++ }, ++ [0x1D]= {[0]= "LBR"}, ++ [0x20]= {[0]= "LDBU"}, ++ [0x21]= {[0]= "LDHU"}, ++ [0x22]= {[0]= "LDW"}, ++ [0x23]= {[0]= "LDL"}, ++ [0x24]= {[0]= "LDL_U"}, ++ [0x25]= {[0]= "PRI_LD"}, ++ [0x26]= {[0]= "FLDS"}, ++ [0x27]= {[0]= "FLDD"}, ++ [0x28]= {[0]= "STB"}, ++ [0x29]= {[0]= "STH"}, ++ [0x2A]= {[0]= "STW"}, ++ [0x2B]= {[0]= "STL"}, ++ [0x2C]= {[0]= "STL_U"}, ++ [0x2D]= {[0]= "PRI_ST"}, ++ [0x2E]= {[0]= "FSTS"}, ++ [0x2F]= {[0]= "FSTD"}, ++ [0x30]= {[0]= "BEQ"}, ++ [0x31]= {[0]= "BNE"}, ++ [0x32]= {[0]= "BLT"}, ++ [0x33]= {[0]= "BLE"}, ++ [0x34]= {[0]= "BGT"}, ++ [0x35]= {[0]= "BGE"}, ++ [0x36]= {[0]= "BLBC"}, ++ [0x37]= {[0]= "BLBS"}, ++ [0x38]= {[0]= "FBEQ"}, ++ [0x39]= {[0]= "FBNE"}, ++ [0x3A]= {[0]= "FBLT"}, ++ [0x3B]= {[0]= "FBLE"}, ++ [0x3C]= {[0]= "FBGT"}, ++ [0x3D]= {[0]= "FBGE"}, ++ [0x3e]= {[0]= "LDI"}, ++ [0x3f]= {[0]= "LDIH"}, ++} ++ ++ ++------------------------------------------------------------------------------ ++ ++local map_gpr = { ++ [0] = "r0", "r1", "r2", "r3", "r4", "r5", "r6", "r7", ++ "r8", "BASE", "r10", "r11", "r12", "r13", "r14", "JGL", ++ "r16", "r17", "r18", "r19", "r20", "r21", "r22", "r23", ++ "r24", "r25", "JTMP", "r27", "at", "r29", "sp", "zero", ++} ++ ++------------------------------------------------------------------------------ ++ ++-- Output a nicely formatted line with an opcode and operands. ++local function putop(ctx, text, operands) ++ local pos = ctx.pos ++ local extra = "" ++ if ctx.rel then ++ local sym = ctx.symtab[ctx.rel] ++ if sym then extra = "\t->"..sym end ++ end ++ if ctx.hexdump > 0 then ++ ctx.out(format("%08x %s %-7s %s%s\n", ++ ctx.addr+pos, tohex(ctx.op), text, concat(operands, ", "), extra)) ++ else ++ ctx.out(format("%08x %-7s %s%s\n", ++ ctx.addr+pos, text, concat(operands, ", "), extra)) ++ end ++ ctx.pos = pos + 4 ++end ++ ++-- Fallback for unknown opcodes. ++local function unknown(ctx) ++ return putop(ctx, ".long", { "0x"..tohex(ctx.op) }) ++end ++ ++local function get_le(ctx) ++ local pos = ctx.pos ++ local b0, b1, b2, b3 = byte(ctx.code, pos+1, pos+4) ++ return bor(lshift(b3, 24), lshift(b2, 16), lshift(b1, 8), b0) ++end ++ ++-- Disassemble a single instruction. ++local function disass_ins(ctx) ++ local op = ctx:get() ++ local operands = {} ++ local last = nil ++ ctx.op = op ++ ctx.rel = nil ++ ++ local opcode = band(rshift(op, 26), 0x3f) ++ local opat = map_pri[opcode] ++ ++ local fn = 0 ++ local pat = class_tabs[opcode] ++ local name = opat[0] ++ if pat.shift then ++ name = opat[band(rshift(op, pat.shift), pat.mask)] ++ pat = pat.pat ++ end ++ local isf = false ++ ++ if name == "FIMOVD" then ++ pat = "FD" ++ elseif name == "IFMOVD" then ++ pat = "AI" ++ end ++ ++ ++ for p in gmatch(pat, ".") do ++ local x = nil ++ if should_ignore(name, p) then ++ -- do nothing ++ elseif p == "A" then ++ x = map_gpr[band(rshift(op, 21), 31)] ++ elseif p == "B" then ++ x = map_gpr[band(rshift(op, 16), 31)] ++ elseif p == "C" then ++ x = map_gpr[band(rshift(op, 5), 31)] ++ elseif p == "D" then ++ x = map_gpr[band(rshift(op, 0), 31)] ++ elseif p == "F" then ++ isf = true ++ x = "f"..band(rshift(op, 21), 31) ++ elseif p == "G" then ++ isf = true ++ x = "f"..band(rshift(op, 16), 31) ++ elseif p == "H" then ++ isf = true ++ x = "f"..band(rshift(op, 5), 31) ++ elseif p == "I" then ++ isf = true ++ x = "f"..band(rshift(op, 0), 31) ++ elseif p == "o" then ++ local disp = arshift(lshift(op, 16), 16) ++ if name == "LDI" and disp == 0 then ++ name = "MOVE" ++ operands[#operands] = last ++ else ++ operands[#operands] = format("%d(%s)", disp, last) ++ end ++ elseif p == "p" then ++ local index = map_gpr[band(rshift(op, 16), 31)] ++ operands[#operands] = format("%s(%s)", index, last) ++ elseif p == "b" then ++ x = ctx.addr + ctx.pos + arshift(lshift(op, 21), 21)*4 + 4 ++ ctx.rel = x ++ x = format("0x%08x", x) ++ elseif p == "i" then ++ x = band(rshift(op, 13), 0x000ff) ++ elseif p == "j" then ++ x = band(rshift(op, 13), 0x000ff) ++ elseif p == "j" then ++ x = band(rshift(op, 13), 0x000ff) ++ elseif p == "1" then ++ if last == "ra" then ++ operands[#operands] = nil ++ end ++ else ++ assert(false) ++ end ++ if x then operands[#operands+1] = x; last = x end ++ end ++ return putop(ctx, name, operands) ++end ++ ++------------------------------------------------------------------------------ ++ ++-- Disassemble a block of code. ++local function disass_block(ctx, ofs, len) ++ if not ofs then ofs = 0 end ++ local stop = len and ofs+len or #ctx.code ++ stop = stop - stop % 4 ++ ctx.pos = ofs - ofs % 4 ++ ctx.rel = nil ++ while ctx.pos < stop do disass_ins(ctx) end ++end ++ ++-- Extended API: create a disassembler context. Then call ctx:disass(ofs, len). ++local function create(code, addr, out) ++ local ctx = {} ++ ctx.code = code ++ ctx.addr = addr or 0 ++ ctx.out = out or io.write ++ ctx.symtab = {} ++ ctx.disass = disass_block ++ ctx.hexdump = 8 ++ ctx.get = get_le ++ return ctx ++end ++ ++-- Simple API: disassemble code (a string) at address and output via out. ++local function disass(code, addr, out) ++ create(code, addr, out):disass() ++end ++ ++-- Return register name for RID. ++local function regname(r) ++ if r < 32 then return map_gpr[r] end ++ return "f"..(r-32) ++end ++ ++ ++ ++function wi_debug(__obj, op, addr) ++ if not addr then ++ addr = 0 ++ end ++ local operands = {} ++ local last = nil ++ ++ local opcode = band(rshift(op, 26), 0x3f) ++ local opat = map_pri[opcode] ++ ++ local fn = 0 ++ local pat = class_tabs[opcode] ++ local name = opat[0] ++ if pat.shift then ++ name = opat[band(rshift(op, pat.shift), pat.mask)] ++ pat = pat.pat ++ end ++ local isf = false ++ ++ if name == "FIMOVD" then ++ pat = "FD" ++ elseif name == "IFMOVD" then ++ pat = "AI" ++ end ++ ++ for p in gmatch(pat, ".") do ++ local x = nil ++ if should_ignore(name, p) then ++ -- do nothing ++ elseif p == "A" then ++ x = map_gpr[band(rshift(op, 21), 31)] ++ elseif p == "B" then ++ x = map_gpr[band(rshift(op, 16), 31)] ++ elseif p == "C" then ++ x = map_gpr[band(rshift(op, 5), 31)] ++ elseif p == "D" then ++ x = map_gpr[band(rshift(op, 0), 31)] ++ elseif p == "F" then ++ isf = true ++ x = "f"..band(rshift(op, 21), 31) ++ elseif p == "G" then ++ isf = true ++ x = "f"..band(rshift(op, 16), 31) ++ elseif p == "H" then ++ isf = true ++ x = "f"..band(rshift(op, 5), 31) ++ elseif p == "I" then ++ isf = true ++ x = "f"..band(rshift(op, 0), 31) ++ elseif p == "o" then ++ local disp = arshift(lshift(op, 16), 16) ++ if name == "LDI" and disp == 0 then ++ name = "MOVE" ++ operands[#operands] = last ++ else ++ operands[#operands] = format("%d(%s)", disp, last) ++ end ++ elseif p == "p" then ++ local index = map_gpr[band(rshift(op, 16), 31)] ++ operands[#operands] = format("%s(%s)", index, last) ++ elseif p == "b" then ++ x = addr + arshift(lshift(op, 21), 21)*4 + 4 ++ x = format("0x%08x", x) ++ elseif p == "i" then ++ x = band(rshift(op, 13), 0x000ff) ++ elseif p == "j" then ++ x = band(rshift(op, 13), 0x000ff) ++ elseif p == "j" then ++ x = band(rshift(op, 13), 0x000ff) ++ elseif p == "1" then ++ if last == "ra" then ++ operands[#operands] = nil ++ end ++ else ++ assert(false) ++ end ++ if x then operands[#operands+1] = x; last = x end ++ end ++ print(name, concat(operands, ", ")) ++end ++ ++-- Public module functions. ++return { ++ create = create, ++ disass = disass, ++ regname = regname, ++ wi_debug = wi_debug, ++} +diff --git a/src/jit/dump.lua b/src/jit/dump.lua +index 2bea652..3f90fe9 100644 +--- a/src/jit/dump.lua ++++ b/src/jit/dump.lua +@@ -623,7 +623,7 @@ local function dump_texit(tr, ex, ngpr, nfpr, ...) + if i % 8 == 0 then out:write("\n") end + end + end +- if jit.arch == "mips" or jit.arch == "mipsel" then ++ if jit.arch == "mips" or jit.arch == "mipsel" or jit.arch == "sw64" then + for i=1,nfpr,2 do + out:write(format(" %+17.14g", regs[ngpr+i])) + if i % 8 == 7 then out:write("\n") end +diff --git a/src/lib_jit.c b/src/lib_jit.c +index 22ca0a1..4be1761 100644 +--- a/src/lib_jit.c ++++ b/src/lib_jit.c +@@ -732,6 +732,8 @@ static uint32_t jit_cpudetect(lua_State *L) + } + #endif + #endif ++#elif LJ_TARGET_SW64 ++ /* Nothing to do. */ + #else + #error "Missing CPU detection for this architecture" + #endif +diff --git a/src/lj_arch.h b/src/lj_arch.h +index c8d7138..070bd89 100644 +--- a/src/lj_arch.h ++++ b/src/lj_arch.h +@@ -29,6 +29,9 @@ + #define LUAJIT_ARCH_mips32 6 + #define LUAJIT_ARCH_MIPS64 7 + #define LUAJIT_ARCH_mips64 7 ++#define LUAJIT_ARCH_SW64 77 ++#define LUAJIT_ARCH_sw64 77 ++ + + /* Target OS. */ + #define LUAJIT_OS_OTHER 0 +@@ -55,6 +58,8 @@ + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS64 + #elif defined(__mips__) || defined(__mips) || defined(__MIPS__) || defined(__MIPS) + #define LUAJIT_TARGET LUAJIT_ARCH_MIPS32 ++#elif defined(__sw_64__) ++#define LUAJIT_TARGET LUAJIT_ARCH_SW64 + #else + #error "No support for this architecture (yet)" + #endif +@@ -358,6 +363,24 @@ + #define LJ_ARCH_VERSION 10 + #endif + ++#elif LUAJIT_TARGET == LUAJIT_ARCH_SW64 ++ ++#define LJ_ARCH_NAME "sw64" ++#define LJ_ARCH_ENDIAN LUAJIT_LE ++#define LJ_ARCH_BITS 64 ++#define LJ_TARGET_SW64 1 ++#define LJ_TARGET_EHRETREG 4 //TODO ++#define LJ_TARGET_EHRAREG 8 //??TODO ++#define LJ_TARGET_GC64 1 ++#define LJ_TARGET_JUMPRANGE 21 /* 2*2^21 = 4MB-aligned region */ ++#define LJ_TARGET_MASKSHIFT 1 ++#define LJ_TARGET_MASKROT 1 ++#define LJ_ARCH_NUMMODE LJ_NUMMODE_DUAL ++#define LJ_ARCH_VERSION 10 // ?? ++#define LJ_PAGESIZE 8192 ++#define SW64_DEBUG_WI 0 ++#define LJ_SW64_CORE4 0 ++ + #else + #error "No target architecture defined" + #endif +@@ -553,7 +576,7 @@ + #define LUAJIT_NO_UNWIND 1 + #endif + +-#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 ++#if defined(LUAJIT_NO_UNWIND) || defined(__symbian__) || LJ_TARGET_IOS || LJ_TARGET_PS3 || LJ_TARGET_PS4 || LJ_TARGET_SW64 + #define LJ_NO_UNWIND 1 + #endif + +diff --git a/src/lj_asm.c b/src/lj_asm.c +index c2cf5a9..febea55 100644 +--- a/src/lj_asm.c ++++ b/src/lj_asm.c +@@ -177,6 +177,8 @@ IRFLDEF(FLOFS) + #include "lj_emit_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_emit_mips.h" ++#elif LJ_TARGET_SW64 ++#include "lj_emit_sw64.h" + #else + #error "Missing instruction emitter for target CPU" + #endif +@@ -1597,6 +1599,8 @@ static void asm_loop(ASMState *as) + #include "lj_asm_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_asm_mips.h" ++#elif LJ_TARGET_SW64 ++#include "lj_asm_sw64.h" + #else + #error "Missing assembler for target CPU" + #endif +@@ -2374,7 +2378,9 @@ void lj_asm_trace(jit_State *J, GCtrace *T) + T->nins = J->curfinal->nins; + break; /* Done. */ + } +- ++#if SW64_DEBUG_WI ++ memset(as->mcbot, 0, sizeof(MCode)*(as->mctop - as->mcbot)); ++#endif + /* Otherwise try again with a bigger IR. */ + lj_trace_free(J2G(J), J->curfinal); + J->curfinal = NULL; /* In case lj_trace_alloc() OOMs. */ +diff --git a/src/lj_asm_sw64.h b/src/lj_asm_sw64.h +new file mode 100644 +index 0000000..6564c77 +--- /dev/null ++++ b/src/lj_asm_sw64.h +@@ -0,0 +1,2072 @@ ++/* ++** SW64 IR assembler (SSA IR -> machine code). ++** Copyright (C) 2019 deepin inc. See Copyright Notice in luajit.h ++*/ ++ ++#include ++#define TODO do {printf("\e[1;34mTODO IMPLEMENT %s\e[m\n", __FUNCTION__); asm("bpt;bpt");} while(0); ++ ++#define EXIT_ROOM 6 ++ ++/* -- Register allocator extensions --------------------------------------- */ ++ ++/* Allocate a register with a hint. */ ++static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!ra_hashint(r) && !iscrossref(as, ref)) ++ ra_sethint(IR(ref)->r, hint); /* Propagate register hint. */ ++ r = ra_allocref(as, ref, allow); ++ } ++ ra_noweak(as, r); ++ return r; ++} ++ ++/* Allocate a register or RID_ZERO. */ ++static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow) ++{ ++ Reg r = IR(ref)->r; ++ if (ra_noreg(r)) { ++ if (!(allow & RSET_FPR) && irref_isk(ref) && get_kval(IR(ref)) == 0) ++ return RID_ZERO; ++ r = ra_allocref(as, ref, allow); ++ } else { ++ ra_noweak(as, r); ++ } ++ return r; ++} ++ ++/* Allocate two source registers for three-operand instructions. */ ++static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow) ++{ ++ IRIns *irl = IR(ir->op1), *irr = IR(ir->op2); ++ Reg left = irl->r, right = irr->r; ++ if (ra_hasreg(left)) { ++ ra_noweak(as, left); ++ if (ra_noreg(right)) ++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); ++ else ++ ra_noweak(as, right); ++ } else if (ra_hasreg(right)) { ++ ra_noweak(as, right); ++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); ++ } else if (ra_hashint(right)) { ++ right = ra_alloc1z(as, ir->op2, allow); ++ left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right)); ++ } else { ++ left = ra_alloc1z(as, ir->op1, allow); ++ right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left)); ++ } ++ return left | (right << 8); ++} ++ ++ ++/* -- Operand fusion ------------------------------------------------------ */ ++ ++/* Limit linear search to this distance. Avoids O(n^2) behavior. */ ++#define CONFLICT_SEARCH_LIM 31 ++ ++/* Check if there's no conflicting instruction between curins and ref. */ ++static int noconflict(ASMState *as, IRRef ref, IROp conflict) ++{ ++ IRIns *ir = as->ir; ++ IRRef i = as->curins; ++ if (i > ref + CONFLICT_SEARCH_LIM) ++ return 0; /* Give up, ref is too far away. */ ++ while (--i > ref) ++ if (ir[i].o == conflict) ++ return 0; /* Conflict found. */ ++ return 1; /* Ok, no conflict. */ ++} ++ ++/* Fuse the array base of colocated arrays. */ ++static int32_t asm_fuseabase(ASMState *as, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE && ++ !neverfuse(as) && noconflict(as, ref, IR_NEWREF)) ++ return (int32_t)sizeof(GCtab); ++ return 0; ++} ++ ++/* Fuse array/hash/upvalue reference into register+offset operand. */ ++static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow) ++{ ++ IRIns *ir = IR(ref); ++ if (ra_noreg(ir->r)) { ++ if (ir->o == IR_AREF) { ++ if (mayfuse(as, ref)) { ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki16(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, refa, allow); ++ } ++ } ++ } ++ } else if (ir->o == IR_HREFK) { ++ if (mayfuse(as, ref)) { ++ int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node)); ++ if (checki16(ofs)) { ++ *ofsp = ofs; ++ return ra_alloc1(as, ir->op1, allow); ++ } ++ } ++ } else if (ir->o == IR_UREFC) { ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ intptr_t ofs = (intptr_t)&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv; ++ intptr_t jgl = (intptr_t)J2G(as->J); ++ if ((uintptr_t)(ofs-jgl) < 65536) { ++ *ofsp = ofs-jgl-32768; ++ return RID_JGL; ++ } else { ++ *ofsp = (int16_t)ofs; ++ return ra_allock(as, ofs-(int16_t)ofs, allow); ++ } ++ } ++ } ++ } ++ *ofsp = 0; ++ return ra_alloc1(as, ref, allow); ++} ++ ++/* Fuse XLOAD/XSTORE reference into load/store operand. */ ++ ++/* --- LOAD ADDRESS MACRO ------------------------------------------------ */ ++ ++static int asm_lda(ASMState* as, MCode* mcp, Reg dest, uintptr_t addr) ++{ ++ int count = 0; ++ int16_t hi, lo; ++ MCode mtmp[5] = {0}; ++ split64AddrHI32(addr, &hi, &lo); ++ if (hi != 0) { ++ // ldih dest, hi(zero) ++ mtmp[count++] = SW64I_LDIH | SW64F_A(dest) | SW64F_DISP(hi, RID_ZERO); ++ } ++ if (lo != 0) { ++ // ldi dest, lo(dest or zero) ++ mtmp[count++] = SW64I_LDI | SW64F_A(dest) | SW64F_DISP(lo, hi ? dest : RID_ZERO); ++ } ++ if (hi || lo) { ++ // slli dest, 32, dest ++ mtmp[count++] = SW64I_SLLI | SW64F_A(dest) | SW64F_j(32) | SW64F_D(dest); ++ } ++ ++ split64AddrLO32(addr, &hi, &lo); ++ mtmp[count] = SW64I_LDIH | SW64F_A(dest) | SW64F_DISP(hi, count > 1 ? dest : RID_ZERO); ++ count++; ++ mtmp[count++] = SW64I_LDI | SW64F_A(dest) | SW64F_DISP(lo, dest); ++ ++ for (int i=count-1; i>=0; i--) { ++ __WI(&mcp[i-count], mtmp[i]); ++ } ++ return count; ++} ++ ++ ++/* -- Guard handling ------------------------------------------------------ */ ++ ++/* Need some spare long-range jump slots, for out-of-range branches. */ ++#define SW64_SPAREJUMP 4 ++ ++/* Setup spare long-range jump slots per mcarea. */ ++static void asm_sparejump_setup(ASMState *as) ++{ ++ MCode *mxp = as->mcbot; ++ if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == 8) { ++ lua_assert(SW64I_NOP == 0x43ff075f); ++ memset(mxp, SW64I_NOP, SW64_SPAREJUMP*2*sizeof(MCode)); ++ mxp += SW64_SPAREJUMP*2; ++ lua_assert(mxp < as->mctop); ++ lj_mcode_sync(as->mcbot, mxp); ++ lj_mcode_commitbot(as->J, mxp); ++ as->mcbot = mxp; ++ as->mclim = as->mcbot + MCLIM_REDZONE; ++ } ++} ++ ++/* Setup exit stub after the end of each trace. */ ++static void asm_exitstub_setup(ASMState *as) ++{ ++ MCode *mxp = as->mctop; ++ /* ++ stw TMP, 0(sp); //store exit number ++ ++ ldi TMP, traceno(zero); ++ lda at, lj_vm_exit_handler ++ call zero, (at); ++ */ ++ ++ __WI(--mxp, SW64I_CALL | SW64F_A(RID_ZERO) | SW64F_DISP(0, RID_R28)); ++ ++ mxp -= asm_lda(as, mxp, RID_R28, (uintptr_t)(void*)lj_vm_exit_handler); ++ ++ __WI(--mxp, SW64I_LDI | SW64F_A(RID_TMP) | SW64F_DISPI(as->T->traceno)); ++ ++#if SW64_DEBUG_WI ++ __WI(--mxp, SW64I_STL | SW64F_A(RID_TMP) | SW64F_DISP(0, RID_SP)); ++#else ++ __WI(--mxp, SW64I_STW | SW64F_A(RID_TMP) | SW64F_DISP(0, RID_SP)); ++#endif ++ ++ as->mctop = mxp; ++} ++ ++/* Keep this in-sync with exitstub_trace_addr(). */ ++#define asm_exitstub_addr(as) ((as)->mctop) ++ ++/* Emit conditional branch to exit for guard. */ ++static void asm_guard(ASMState *as, SW64Ins mi, Reg a) ++{ ++ lua_assert(a != RID_TMP); ++ MCode *target = asm_exitstub_addr(as); ++ MCode *p = as->mcp; ++ if (LJ_UNLIKELY(p == as->invmcp)) { ++ as->invmcp = NULL; ++ as->loopinv = 1; ++ as->mcp = p+1; ++ mi = invert_cond(mi); ++ target = p; /* Patch target later in asm_loop_fixup. */ ++ } ++ lua_assert(as->snapno >= 0); ++ ++ // bxx a, target ++ emit_branch(as, mi, a, target); ++ emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); ++} ++static void asm_compare_guard(ASMState* as, SW64Ins cmp, ++ Reg a, Reg b, MCode *target) ++{ ++ switch(SW64_OP(cmp)) { ++ case SW64_OP(0x60000000): ++ emit_branch(as, SW64I_FBNE, RID_F28, target); ++#if SW64_DEBUG_WI ++ emit_Ao(as, SW64I_LDI, RID_TMP, RID_TMP, as->snapno); ++ emit_loadu64(as, RID_TMP, (((unsigned long)(void*)as->mcp) << 32)); ++#else ++ // ldi RID_TMP, as->snapno(zero) ++ emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); ++#endif ++ emit_FGI(as, cmp, a, b, RID_F28); ++ break; ++ case SW64_OP(0x40000000): ++ emit_branch(as, SW64I_BNE, RID_R28, target); ++#if SW64_DEBUG_WI ++ emit_Ao(as, SW64I_LDI, RID_TMP, RID_TMP, as->snapno); ++ emit_loadu64(as, RID_TMP, (((unsigned long)(void*)as->mcp) << 32)); ++#else ++ // ldi RID_TMP, as->snapno(zero) ++ emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); ++#endif ++ emit_ABD(as, cmp, a, b, RID_R28); ++ break; ++ default: ++ lua_assert(!"NOT HRERE"); ++ } ++} ++ ++/* -- Operand fusion ------------------------------------------------------ */ ++ ++/* Limit linear search to this distance. Avoids O(n^2) behavior. */ ++#define CONFLICT_SEARCH_LIM 31 ++ ++/* Fuse XLOAD/XSTORE reference into load/store operand. */ ++static void asm_fusexref(ASMState *as, SW64Ins mi, Reg rt, IRRef ref, ++ RegSet allow, int32_t ofs) ++{ ++ IRIns *ir = IR(ref); ++ Reg base; ++ if (ra_noreg(ir->r) && canfuse(as, ir)) { ++ if (ir->o == IR_ADD) { ++ intptr_t ofs2; ++ if (irref_isk(ir->op2) && (ofs2 = ofs + get_kval(IR(ir->op2)), ++ checki16(ofs2))) { ++ ref = ir->op1; ++ ofs = (int32_t)ofs2; ++ } ++ } else if (ir->o == IR_STRREF) { ++ intptr_t ofs2 = 65536; ++ lua_assert(ofs == 0); ++ ofs = (int32_t)sizeof(GCstr); ++ if (irref_isk(ir->op2)) { ++ ofs2 = ofs + get_kval(IR(ir->op2)); ++ ref = ir->op1; ++ } else if (irref_isk(ir->op1)) { ++ ofs2 = ofs + get_kval(IR(ir->op1)); ++ ref = ir->op2; ++ } ++ if (!checki16(ofs2)) { ++ /* NYI: Fuse ADD with constant. */ ++ Reg right, left = ra_alloc2(as, ir, allow); ++ right = (left >> 8); left &= 255; ++ emit_Ao(as, mi, rt, RID_TMP, ofs); ++ emit_ABD(as, SW64I_ADDL, left, right, RID_TMP); ++ return; ++ } ++ ofs = ofs2; ++ } ++ } ++ base = ra_alloc1(as, ref, allow); ++ emit_Ao(as, mi, rt, base, ofs); ++} ++ ++/* -- Calls --------------------------------------------------------------- */ ++ ++/* Generate a call to a C function. */ ++static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args) ++{ ++ uint32_t n, nargs = CCI_XNARGS(ci); ++ int32_t ofs = 0; ++ Reg gpr, fpr = REGARG_FIRSTFPR; ++ if ((void *)ci->func) ++ emit_call(as, (void *)ci->func, 1); ++ for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++) ++ as->cost[gpr] = REGCOST(~0u, ASMREF_L); ++ gpr = REGARG_FIRSTGPR; ++ for (n = 0; n < nargs; n++) { /* Setup args. */ ++ IRRef ref = args[n]; ++ if (ref) { ++ IRIns *ir = IR(ref); ++ if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR && ++ !(ci->flags & CCI_VARARG)) { ++ lua_assert(rset_test(as->freeset, fpr)); /* Already evicted. */ ++ ra_leftov(as, fpr, ref); ++ fpr += 1; ++ gpr += 1; ++ } else { ++ if (gpr <= REGARG_LASTGPR) { ++ lua_assert(rset_test(as->freeset, gpr)); /* Already evicted. */ ++ if (irt_isfp(ir->t)) { ++ RegSet of = as->freeset; ++ Reg r; ++ /* Workaround to protect argument GPRs from being used for remat. */ ++ as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1); ++ r = ra_alloc1(as, ref, RSET_FPR); ++ as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1)); ++ if (irt_isnum(ir->t)) { ++ emit_GI(as, SW64I_FCVTLD, r, r); ++ emit_AI(as, SW64I_IFMOVD, gpr, r); ++ gpr++; fpr++; ++ } else if (irt_isfloat(ir->t)) { ++ emit_GI(as, SW64I_FCVTLS, r, r); ++ emit_AI(as, SW64I_IFMOVS, gpr, r); ++ gpr++; fpr++; ++ } ++ } else { ++ ra_leftov(as, gpr, ref); ++ gpr++; fpr++; ++ } ++ } else { ++ Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ if (irt_isnum(ir->t)) { ++ emit_Ao(as, SW64I_FSTD, r, RID_SP, ofs); ++ } else if(irt_isfloat(ir->t)) { ++ emit_Ao(as, SW64I_FSTS, r, RID_SP, ofs); ++ } else { ++ emit_Ao(as, SW64I_STL, r, RID_SP, ofs); ++ } ++ ofs += 8; ++ } ++ } ++ } else { ++ fpr = REGARG_LASTFPR+1; ++ if (gpr <= REGARG_LASTGPR) { ++ gpr++; fpr++; ++ } else { ++ ofs += 8; ++ } ++ } ++ checkmclim(as); ++ } ++} ++ ++/* Setup result reg/sp for call. Evict scratch regs. */ ++static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ WI_DEBUG_BEFORE(); ++ RegSet drop = RSET_SCRATCH; ++ if ((ci->flags & CCI_NOFPRCLOBBER)) ++ drop &= ~RSET_FPR; ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); /* Evictions must be performed first. */ ++ if (ra_used(ir)) { ++ lua_assert(!irt_ispri(ir->t)); ++ if (irt_isfp(ir->t)) { ++ if ((ci->flags & CCI_CASTU64)) { ++ int32_t ofs = sps_scale(ir->s); ++ Reg dest = ir->r; ++ if (ra_hasreg(dest)) { ++ ra_free(as, dest); ++ ra_modified(as, dest); ++ // This doesn't require FCVTLD, refer to the `lj_math_random_step` ++ emit_AI(as, SW64I_IFMOVD, RID_RET, dest); ++ } ++ if (ofs) { ++ emit_Ao(as, SW64I_STL, RID_RET, RID_SP, ofs); ++ } ++ } else { ++ ra_destreg(as, ir, RID_FPRET); ++ } ++ } else { ++ ra_destreg(as, ir, RID_RET); ++ } ++ } ++ WI_DEBUG_END(); ++} ++ ++static void asm_callx(ASMState *as, IRIns *ir) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ CCallInfo ci; ++ IRRef func; ++ IRIns *irf; ++ ci.flags = asm_callx_flags(as, ir); ++ asm_collectargs(as, ir, &ci, args); ++ asm_setupresult(as, ir, &ci); ++ func = ir->op2; irf = IR(func); ++ if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); } ++ if (irref_isk(func)) { /* Call to constant address. */ ++ ci.func = (ASMFunction)(void *)get_kval(irf); ++ } else { /* Need specific register for indirect calls. */ ++ Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR)); ++ MCode *p = as->mcp; ++ ++ __WI(--p, SW64I_CALL | SW64F_A(RID_RA) | SW64F_B(r)); ++ if (r != RID_CFUNCADDR) ++ __WI(--p, SW64I_LDI | SW64F_A(RID_CFUNCADDR) | SW64F_DISP(0, r)); ++ ++ as->mcp = p; ++ ci.func = (ASMFunction)(void *)0; ++ } ++ asm_gencall(as, &ci, args); ++} ++ ++/* -- Returns ------------------------------------------------------------- */ ++ ++/* Return to lower frame. Guard that it goes to the right spot. */ ++static void asm_retf(ASMState *as, IRIns *ir) ++{ ++ Reg base = ra_alloc1(as, REF_BASE, RSET_GPR); ++ void *pc = ir_kptr(IR(ir->op2)); ++ int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1)); ++ as->topslot -= (BCReg)delta; ++ if ((int32_t)as->topslot < 0) as->topslot = 0; ++ irt_setmark(IR(REF_BASE)->t); /* Children must not coalesce with BASE reg. */ ++ emit_setgl(as, base, jit_base); ++ emit_addptr(as, base, -8*delta); ++ ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, RID_TMP, ra_allock(as, igcptr(pc), rset_exclude(RSET_GPR, base)), RID_R28); ++ ++ emit_Ao(as, SW64I_AL, RID_TMP, base, -8); ++} ++ ++/* -- Type conversions ---------------------------------------------------- */ ++ ++static void asm_tointg(ASMState *as, IRIns *ir, Reg left) ++{ ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ asm_guard(as, SW64I_FBEQ, tmp); ++ emit_FGI(as, SW64I_FCMPEQ, tmp, left, tmp); ++ emit_GI(as, SW64I_FCVTLD, tmp, tmp); ++ emit_FD(as, SW64I_FIMOVD, tmp, dest); ++ emit_GI(as, SW64I_FCVTLW, tmp, tmp); ++ lua_assert(irt_isint(ir->t)); ++ emit_GI(as, SW64I_FCVTDL, left, tmp); ++} ++ ++static void asm_tobit(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_FPR; ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, allow); ++ Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left)); ++ Reg tmp = ra_scratch(as, rset_clear(allow, right)); ++ ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); ++ emit_FD(as, SW64I_FIMOVD, tmp, dest); ++ emit_FGI(as, SW64I_FADDD, left, right, tmp); ++} ++ ++static void asm_conv(ASMState *as, IRIns *ir) ++{ ++ IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK); ++ int st64 = (st == IRT_I64 || st == IRT_U64 || (LJ_64 && st == IRT_P64)); ++ int sti8 = st == IRT_I8; ++ int stu8 = st == IRT_U8; ++ int sti16 = st == IRT_I16; ++ int stu16 = st == IRT_U16; ++ int stu32 = st == IRT_U32; ++ int stu64 = st == IRT_U64; ++ int stfp = (st == IRT_NUM || st == IRT_FLOAT); ++ ++ IRRef lref = ir->op1; ++ ++ lua_assert(irt_type(ir->t) != st); ++ ++ if (irt_isfp(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ if (stfp) { /* FP to FP conversion. */ ++ emit_GI(as, st == IRT_NUM ? SW64I_FCVTDS : SW64I_FCVTSD, ++ ra_alloc1(as, lref, RSET_FPR), dest); ++ } else if (stu64) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ MCLabel l_end = emit_label(as); ++ if (irt_isfloat(ir->t)) { ++ TODO; ++ } else { ++ emit_FGI(as, SW64I_FADDD, dest, dest, dest); ++ emit_GI(as, SW64I_FCVTLD, dest, dest); ++ emit_AI(as, SW64I_IFMOVD, RID_R28, dest); ++ emit_ABD(as,SW64I_BIS, RID_R28, left, RID_R28); ++ emit_AjD(as,SW64I_ANDI, left, 1, left); ++ emit_AjD(as,SW64I_SRLI, left, 1, RID_R28); ++ } ++ emit_branch(as, SW64I_BGE, left, l_end); ++ emit_GI(as, SW64I_FCVTLD, dest, dest); ++ emit_AI(as, SW64I_IFMOVD, left, dest); ++ } else { /* Integer to FP conversion. */ ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_GI(as, irt_isfloat(ir->t) ? SW64I_FCVTLS : SW64I_FCVTLD, dest, dest); ++ if (stu32) { ++ emit_AI(as, SW64I_IFMOVD, RID_R28, dest); ++ emit_AjD(as, SW64I_EXTLWI, left, 0, RID_R28); ++ } else { ++ emit_AI(as, SW64I_IFMOVD, left, dest); ++ } ++ } ++ } else if (stfp) { /* FP to integer conversion. */ ++ if (irt_isguard(ir->t)) { ++ /* Checked conversions are only supported from number to int. */ ++ lua_assert(irt_isint(ir->t) && st == IRT_NUM); ++ asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR)); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, lref, RSET_FPR); ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left)); ++ if (irt_isu32(ir->t)) { /* FP to U32 conversion. */ ++ emit_AjD(as, SW64I_EXTLWI, dest, 0, dest); ++ } ++ emit_FD(as, SW64I_FIMOVD, tmp, dest); ++ emit_GI(as, SW64I_FCVTDL, left, tmp); ++ } ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irt_isu32(ir->t)) { ++ emit_ABD(as, SW64I_EXTLWI, dest, 0, dest); ++ } ++ ++ if (st64 && irt_isint(ir->t)) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_AjD(as, SW64I_EXTLWI, left, 0, dest); ++ } else if (irt_isu64(ir->t) && st == IRT_INT) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_AjD(as, SW64I_EXTLWI, left, 0, dest); ++ } else if (sti8) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ if (!irt_is64(ir->t)) { ++ emit_ABD(as, SW64I_EXTLWI, dest, 0, dest); ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); ++ } ++ emit_ABD(as, SW64I_SEXTB, 0, left, dest); ++ } else if (stu8) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_ABD(as, SW64I_EXTLBI, left, 0, dest); ++ } else if (sti16) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ if (!irt_is64(ir->t)) { ++ emit_ABD(as, SW64I_EXTLWI, dest, 0, dest); ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); ++ } ++ emit_ABD(as, SW64I_SEXTH, 0, left, dest); ++ } else if (stu16) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ emit_ABD(as, SW64I_EXTLHI, left, 0, dest); ++ } else if (stu32) { ++ Reg left = ra_alloc1(as, lref, RSET_GPR); ++ if (irt_isint(ir->t)) ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); ++ emit_AjD(as, SW64I_EXTLWI, left, 0, dest); ++ } else { ++ ra_leftov(as, dest, lref); /* Do nothing, but may need to move regs. */ ++ } ++ } ++} ++ ++static void asm_strto(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num]; ++ IRRef args[2]; ++ int32_t ofs = 0; ++ RegSet drop = RSET_SCRATCH; ++ if (ra_hasreg(ir->r)) rset_set(drop, ir->r); /* Spill dest reg (if any). */ ++ ra_evictset(as, drop); ++ ofs = sps_scale(ir->s); ++ asm_guard(as, SW64I_BEQ, RID_RET); /* Test return status. */ ++ args[0] = ir->op1; /* GCstr *str */ ++ args[1] = ASMREF_TMP1; /* TValue *n */ ++ asm_gencall(as, ci, args); ++ /* Store the result to the spill slot or temp slots. */ ++ emit_Ao(as, SW64I_LDI, ra_releasetmp(as, ASMREF_TMP1), ++ RID_SP, ofs); ++} ++ ++/* -- Memory references --------------------------------------------------- */ ++ ++/* Store tagged value for ref at base+ofs. */ ++static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref) ++{ ++ RegSet allow = rset_exclude(RSET_GPR, base); ++ IRIns *ir = IR(ref); ++ lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t)); ++ if (irref_isk(ref)) { ++ TValue k; ++ lj_ir_kvalue(as->J->L, &k, ir); ++ emit_Ao(as, SW64I_STL, ra_allock(as, (int64_t)k.u64, allow), base, ofs); ++ } else { ++ Reg src = ra_alloc1(as, ref, allow); ++ Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, ++ rset_exclude(allow, src)); ++ emit_Ao(as, SW64I_STL, RID_TMP, base, ofs); ++ if (irt_isinteger(ir->t)) { ++ emit_ABD(as, SW64I_ADDL, RID_TMP, type, RID_TMP); ++ emit_AjD(as, SW64I_EXTLWI, src, 0, RID_TMP); ++ } else { ++ emit_ABD(as, SW64I_ADDL, src, type, RID_TMP); ++ } ++ } ++} ++/* Get pointer to TValue. */ ++static void asm_tvptr(ASMState *as, Reg dest, IRRef ref) ++{ ++ IRIns *ir = IR(ref); ++ if (irt_isnum(ir->t)) { ++ if (irref_isk(ref)) /* Use the number constant itself as a TValue. */ ++ ra_allockreg(as, igcptr(ir_knum(ir)), dest); ++ else /* Otherwise force a spill and use the spill slot. */ ++ emit_Ao(as, SW64I_LDI, dest, RID_SP, ra_spill(as, ir)); ++ } else { ++ /* Otherwise use g->tmptv to hold the TValue. */ ++ asm_tvstore64(as, dest, 0, ref); ++ emit_Ao(as, SW64I_LDI, dest, RID_JGL, ++ (int32_t)(offsetof(global_State, tmptv)-32768)); ++ } ++} ++ ++static void asm_aref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg idx, base; ++ if (irref_isk(ir->op2)) { ++ IRRef tab = IR(ir->op1)->op1; ++ int32_t ofs = asm_fuseabase(as, tab); ++ IRRef refa = ofs ? tab : ir->op1; ++ ofs += 8*IR(ir->op2)->i; ++ if (checki16(ofs)) { ++ base = ra_alloc1(as, refa, RSET_GPR); ++ emit_Ao(as, SW64I_LDI, dest, base, ofs); ++ return; ++ } ++ } ++ base = ra_alloc1(as, ir->op1, RSET_GPR); ++ idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base)); ++ emit_ABD(as, SW64I_S8ADDL, idx, base, dest); ++} ++ ++/* Inlined hash lookup. Specialized for key type and for const keys. ++** The equivalent C code is: ++** Node *n = hashkey(t, key); ++** do { ++** if (lj_obj_equal(&n->key, key)) return &n->val; ++** } while ((n = nextnode(n))); ++** return niltv(L); ++*/ ++static void asm_href(ASMState *as, IRIns *ir, IROp merge) ++{ ++ WI_DEBUG_BEFORE(); ++ RegSet allow = RSET_GPR; ++ int destused = ra_used(ir); ++ Reg dest = ra_dest(as, ir, allow); ++ Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest)); ++ Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1, tmp2; ++ ++ Reg cmp64 = RID_NONE; ++ ++ IRRef refkey = ir->op2; ++ IRIns *irkey = IR(refkey); ++ int isk = irref_isk(refkey); ++ IRType1 kt = irkey->t; ++ uint32_t khash; ++ MCLabel l_end, l_loop, l_next; ++ ++ rset_clear(allow, tab); ++ tmp1 = ra_scratch(as, allow); ++ rset_clear(allow, tmp1); ++ tmp2 = ra_scratch(as, allow); ++ rset_clear(allow, tmp2); ++ ++ if ( irt_isnum(kt)) { ++ key = ra_alloc1(as, refkey, RSET_FPR); ++ tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key)); ++ } else if (!irt_ispri(kt)) { ++ key = ra_alloc1(as, refkey, allow); ++ rset_clear(allow, key); ++ } ++ ++ if (!irt_isnum(kt)) { ++ /* Allocate cmp64 register used for 64-bit comparisons */ ++ if ( irt_isnum(kt)) { ++ cmp64 = key; ++ } else if (!isk && irt_isaddr(kt)) { ++ cmp64 = tmp2; ++ } else { ++ int64_t k; ++ if (isk && irt_isaddr(kt)) { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64; ++ } else { ++ lua_assert(irt_ispri(kt) && !irt_isnil(kt)); ++ k = ~((int64_t)~irt_toitype(ir->t) << 47); ++ } ++ cmp64 = ra_allock(as, k, allow); ++ rset_clear(allow, cmp64); ++ } ++ } ++ ++ /* Key not found in chain: jump to exit (if merged) or load niltv. */ ++ l_end = emit_label(as); ++ as->invmcp = NULL; ++ if (merge == IR_NE) { ++ asm_guard(as, SW64I_BEQ, RID_ZERO); ++ } else if (destused) { ++ emit_loada(as, dest, niltvg(J2G(as->J))); ++ } ++ /* Follow hash chain until the end. */ ++ l_loop = --as->mcp; ++ emit_move(as, dest, tmp1); ++ emit_Ao(as, SW64I_AL, tmp1, dest, (int32_t)offsetof(Node, next)); ++ l_next = emit_label(as); ++ ++ /* Type and value comparison. */ ++ if (merge == IR_EQ) { /* Must match asm_guard(). */ ++ l_end = asm_exitstub_addr(as); ++ } ++ if ( irt_isnum(kt)) { ++ emit_branch(as, SW64I_BEQ, RID_R28, l_end); ++ emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); ++ emit_FGI(as, SW64I_FCMPEQ, tmpnum, key, RID_R28); ++ Reg isnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); ++ emit_branch(as, SW64I_BEQ, tmp1, l_next); ++ emit_ABD(as, SW64I_CMPULT, tmp1, isnum, tmp1); ++ emit_AjD(as, SW64I_SRAI, tmp1, 47, tmp1); ++ emit_AI(as, SW64I_IFMOVD, tmp1, tmpnum); ++ } else { ++ emit_branch(as, SW64I_BNE, RID_R28, l_end); ++ emit_ABD(as, SW64I_CMPEQ, tmp1, cmp64, RID_R28); ++ emit_Ao(as, SW64I_LDI, RID_TMP, RID_ZERO, as->snapno); ++ } ++ emit_Ao(as, SW64I_LDL, tmp1, dest, (int32_t)offsetof(Node, key.u64)); ++ *l_loop = SW64I_BNE | SW64F_A(tmp1) | ((as->mcp-l_loop-1) & 0x1fffff); ++ if (!isk && irt_isaddr(kt)) { ++ type = ra_allock(as, (int64_t)irt_toitype(kt) << 47, allow); ++ emit_ABD(as, SW64I_ADDL, key, type, tmp2); ++ rset_clear(allow, type); ++ } ++ ++ /* Load main position relative to tab->node into dest. */ ++ khash = isk ? ir_khash(irkey) : 1; ++ if (khash == 0) { ++ emit_Ao(as, SW64I_AL, dest, tab, (int32_t)offsetof(GCtab, node)); ++ } else { ++ Reg tmphash = tmp1; ++ if (isk) ++ tmphash = ra_allock(as, khash, allow); ++ ++ emit_ABD(as, SW64I_ADDL, dest, tmp1, dest); ++ lua_assert(sizeof(Node) == 24); ++ emit_ABD(as, SW64I_SUBW, tmp2, tmp1, tmp1); ++ emit_AjD(as, SW64I_SLLI, tmp1, 3, tmp1); ++ emit_AjD(as, SW64I_SLLI, tmp1, 5, tmp2); ++ ++ emit_ABD(as, SW64I_AND, tmp2, tmphash, tmp1); //tmp1 <- hmask & tmphash ++ emit_Ao(as, SW64I_AL, dest, tab, (int32_t)offsetof(GCtab, node)); ++ emit_Ao(as, SW64I_LDW, tmp2, tab, (int32_t)offsetof(GCtab, hmask)); ++ ++ if (isk) {//TODO ++ /* Nothing to do. */ ++ } else if (irt_isstr(kt)) { ++ emit_Ao(as, SW64I_LDW, tmp1, key, (int32_t)offsetof(GCstr, hash)); ++ } else { /* Must match with hash*() in lj_tab.c. */ ++ //hi = tmp1, lo = tmp2 ++ Reg hi = tmp1; ++ Reg lo = tmp2; ++ ++ /* hi = hi - lj_rol(lo, HASH_ROT3); */ ++ emit_ABD(as, SW64I_SUBL, hi, dest, hi); ++ emit_rotl32(as, lo, (HASH_ROT3)&31, dest, RID_R28); ++ ++ /* hi = lo ^ lj_rol(hi, HASH_ROT1 + HASH_ROT2); */ ++ emit_ABD(as, SW64I_XOR, lo, dest, hi); ++ emit_rotl32(as, hi, (HASH_ROT2+HASH_ROT1)&31, dest, RID_R28); ++ ++ /* lo = lo - lj_rol(hi, HASH_ROT1); */ ++ emit_ABD(as, SW64I_SUBL, lo, dest, lo); ++ emit_rotl32(as, hi, HASH_ROT1&31, dest, RID_R28); ++ ++ /* lo = lo ^ hi; */ ++ emit_ABD(as, SW64I_XOR, lo, hi, lo); ++ ++ ++ if (irt_isnum(kt)) { ++ emit_ABD(as, SW64I_ADDL, hi, hi, hi); // hi << 1 ++ ++ emit_AjD(as, SW64I_MASKLLI, tmp2, 4, lo); //lo ++ emit_AjD(as, SW64I_SRAI, tmp2, 32, hi); //hi ++ ++ emit_FD(as, SW64I_FIMOVD, key, tmp2); ++ } else { ++ emit_ABD(as, SW64I_XOR, key, tmp1, tmp2); ++ emit_rotl32(as, tmp1, HASH_ROT1&31, dest, tmp2); ++ emit_ABD(as, SW64I_ADDL, key, ra_allock(as, HASH_BIAS, allow), tmp1); ++ } ++ } ++ } ++ WI_DEBUG_END(); ++} ++ ++static void asm_hrefk(ASMState *as, IRIns *ir) ++{ ++ IRIns *kslot = IR(ir->op2); ++ IRIns *irkey = IR(kslot->op1); ++ int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node)); ++ int32_t kofs = ofs + (int32_t)offsetof(Node, key); ++ Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE; ++ Reg node = ra_alloc1(as, ir->op1, RSET_GPR); ++ RegSet allow = rset_exclude(RSET_GPR, node); ++ Reg idx = node; ++ Reg key = ra_scratch(as, allow); ++ int64_t k; ++ lua_assert(ofs % sizeof(Node) == 0); ++ if (ofs > 32736) { ++ idx = dest; ++ rset_clear(allow, dest); ++ kofs = (int32_t)offsetof(Node, key); ++ } else if (ra_hasreg(dest)) { ++ emit_Ao(as, SW64I_LDI, dest, node, ofs); ++ } ++ if (irt_ispri(irkey->t)) { ++ lua_assert(!irt_isnil(irkey->t)); ++ k = ~((int64_t)~irt_toitype(irkey->t) << 47); ++ } else if (irt_isnum(irkey->t)) { ++ k = (int64_t)ir_knum(irkey)->u64; ++ } else { ++ k = ((int64_t)irt_toitype(irkey->t) << 47) | (int64_t)ir_kgc(irkey); ++ } ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, key, ra_allock(as, k, allow), RID_R28); ++ emit_Ao(as, SW64I_LDL, key, idx, kofs); ++ if (ofs > 32736) ++ emit_ABD(as, SW64I_ADDL, node, ra_allock(as, ofs, allow), dest); ++} ++ ++static void asm_uref(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irref_isk(ir->op1)) { ++ GCfunc *fn = ir_kfunc(IR(ir->op1)); ++ MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v; ++ emit_lsptr(as, SW64I_AL, dest, v, RSET_GPR); ++ } else { ++ Reg uv = ra_scratch(as, RSET_GPR); ++ Reg func = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (ir->o == IR_UREFC) { ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_Ao(as, SW64I_LDI, dest, uv, (int32_t)offsetof(GCupval, tv)); ++ emit_Ao(as, SW64I_LDBU, RID_R28, uv, (int32_t)offsetof(GCupval, closed)); ++ } else { ++ emit_Ao(as, SW64I_AL, dest, uv, (int32_t)offsetof(GCupval, v)); ++ } ++ emit_Ao(as, SW64I_AL, uv, func, (int32_t)offsetof(GCfuncL, uvptr) + ++ (int32_t)sizeof(MRef) * (int32_t)(ir->op2 >> 8)); ++ } ++} ++ ++static void asm_fref(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); ++ lua_assert(!ra_used(ir)); ++} ++ ++static void asm_strref(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg dest = ra_dest(as, ir, allow); ++ Reg base = ra_alloc1(as, ir->op1, allow); ++ IRIns *irr = IR(ir->op2); ++ int32_t ofs = sizeof(GCstr); ++ rset_clear(allow, base); ++ if (irref_isk(ir->op2) && checki16(ofs + irr->i)) { ++ emit_Ao(as, SW64I_LDI, dest, base, ofs+irr->i); ++ } else { ++ emit_Ao(as, SW64I_LDI, dest, dest, ofs); ++ emit_ABD(as, SW64I_ADDL, base, ra_alloc1(as, ir->op2, allow), dest); ++ } ++} ++ ++/* -- Loads and stores ---------------------------------------------------- */ ++static void fxloadins_end(ASMState*as, SW64Ins mi, Reg r) ++{ ++ if (mi == SW64I_EXTLWI) { ++ emit_AjD(as, SW64I_EXTLWI, r, 0, r); ++ } else if (mi) ++ emit_ABD(as, mi, RID_ZERO, r, r); ++} ++static SW64Ins asm_fxloadins(IRIns *ir, SW64Ins* mi2) ++{ ++ *mi2 = 0; ++ switch (irt_type(ir->t)) { ++ case IRT_I8: ++ *mi2 = SW64I_SEXTB; //fallthrough ++ case IRT_U8: ++ return SW64I_LDBU; ++ ++ case IRT_I16: ++ *mi2 = SW64I_SEXTH; //fallthrough ++ case IRT_U16: ++ return SW64I_LDHU; ++ ++ case IRT_U32: ++ *mi2 = SW64I_EXTLWI; //fallthrough ++ case IRT_INT: ++ return SW64I_LDW; ++ ++ case IRT_NUM: return SW64I_FLDD; ++ case IRT_FLOAT: return SW64I_FLDS; ++ default: return irt_is64(ir->t) ? SW64I_LDL : SW64I_LDW; ++ } ++} ++ ++static SW64Ins asm_fxstoreins(IRIns *ir) ++{ ++ switch (irt_type(ir->t)) { ++ case IRT_I8: case IRT_U8: return SW64I_STB; ++ case IRT_I16: case IRT_U16: return SW64I_STH; ++ case IRT_NUM: return SW64I_FSTD; ++ case IRT_FLOAT: return SW64I_FSTS; ++#if LJ_64 && !LJ_GC64 ++ case IRT_LIGHTUD: lua_assert(0); /* NYI: mask 64 bit lightuserdata. */ ++#endif ++ default: return (irt_is64(ir->t)) ? SW64I_STL : SW64I_STW; ++ } ++} ++ ++static void asm_fload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ SW64Ins mi2 = 0; ++ SW64Ins mi = asm_fxloadins(ir, &mi2); ++ Reg idx; ++ int32_t ofs; ++ if (ir->op1 == REF_NIL) { ++ idx = RID_JGL; ++ ofs = (ir->op2 << 2) - 32768 - GG_OFS(g); ++ } else { ++ idx = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (ir->op2 == IRFL_TAB_ARRAY) { ++ ofs = asm_fuseabase(as, ir->op1); ++ if (ofs) { /* Turn the t->array load into an add for colocated arrays. */ ++ emit_Ao(as, SW64I_LDI, dest, idx, ofs); ++ return; ++ } ++ } ++ ofs = field_ofs[ir->op2]; ++ } ++ fxloadins_end(as, mi2, dest); ++ emit_Ao(as, mi, dest, idx, ofs); ++} ++ ++static void asm_fstore(ASMState *as, IRIns *ir) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1z(as, ir->op2, RSET_GPR); ++ IRIns *irf = IR(ir->op1); ++ Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src)); ++ int32_t ofs = field_ofs[irf->op2]; ++ SW64Ins mi = asm_fxstoreins(ir); ++ lua_assert(!irt_isfp(ir->t)); ++ emit_Ao(as, mi, src, idx, ofs); ++ } ++} ++ ++static void asm_xload(ASMState *as, IRIns *ir) ++{ ++ SW64Ins mi2 = 0; ++ SW64Ins mi = asm_fxloadins(ir, &mi2); ++ Reg dest = ra_dest(as, ir, ++ irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED)); ++ fxloadins_end(as, mi2, dest); ++ asm_fusexref(as, mi, dest, ir->op1, RSET_GPR, 0); ++} ++ ++static void asm_xstore_(ASMState *as, IRIns *ir, int32_t ofs) ++{ ++ if (ir->r != RID_SINK) { ++ Reg src = ra_alloc1z(as, ir->op2, ++ irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); ++ asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1, ++ rset_exclude(RSET_GPR, src), ofs); ++ } ++} ++ ++#define asm_xstore(as, ir) asm_xstore_(as, ir, 0) ++ ++#if LJ_64 && !LJ_GC64 ++static Reg asm_load_lightud64(ASMState *as, IRIns *ir, int typecheck) ++{ ++ ++ if (ra_used(ir) || typecheck) { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (typecheck) { ++ Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, dest)); ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, ++ tmp, ra_allock(as, (int32_t)0x1fffe, rset_exclude(RSET_GPR, dest)), ++ RID_R28); ++ emit_AjD(as, SW64I_SRLI, dest, 47, tmp); ++ } ++ return dest; ++ } else { ++ return RID_NONE; ++ } ++} ++#endif ++ ++static void asm_ahuvload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type, idx; ++ RegSet allow = RSET_GPR; ++ int32_t ofs = 0; ++ IRType1 t = ir->t; ++ ++ type = ra_scratch(as, allow); ++ rset_clear(allow, type); ++ ++ if (ra_used(ir)) { ++ lua_assert(irt_isnum(ir->t) || irt_isint(ir->t) || irt_isaddr(ir->t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ if (irt_isaddr(t)) ++ emit_DEXTM(as, dest, dest, 0, 47); ++ else if (irt_isint(t)) ++ emit_AjD(as, SW64I_ADDWI, dest, 0, dest); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ rset_clear(allow, idx); ++ if (irt_isnum(t)) { ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPULT, type, ra_allock(as, (int32_t)LJ_TISNUM, allow), RID_R28); ++ } else { ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, type, ra_allock(as, (int32_t)irt_toitype(t), allow), RID_R28); ++ } ++ if (ra_hasreg(dest)) { ++ if (irt_isnum(t)){ ++ emit_Fo(as, SW64I_FLDD, dest, idx, ofs); ++ dest = type; ++ } ++ } else { ++ dest = type; ++ } ++ emit_AjD(as, SW64I_SRAI, dest, 47, type); ++ emit_Ao(as, SW64I_LDL, dest, idx, ofs); ++} ++ ++static void asm_ahustore(ASMState *as, IRIns *ir) ++{ ++ RegSet allow = RSET_GPR; ++ Reg idx, src = RID_NONE, type = RID_NONE; ++ int32_t ofs = 0; ++ if (ir->r == RID_SINK) ++ return; ++ if (irt_isnum(ir->t)) { ++ src = ra_alloc1(as, ir->op2, RSET_FPR); ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_Fo(as, SW64I_FSTD, src, idx, ofs); ++ } else { ++ Reg tmp = RID_TMP; ++ if (irt_ispri(ir->t)) { ++ tmp = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow); ++ rset_clear(allow, tmp); ++ } else { ++ src = ra_alloc1(as, ir->op2, allow); ++ rset_clear(allow, src); ++ type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow); ++ rset_clear(allow, type); ++ } ++ idx = asm_fuseahuref(as, ir->op1, &ofs, allow); ++ emit_Ao(as, SW64I_STL, tmp, idx, ofs); ++ if (ra_hasreg(src)) { ++ if (irt_isinteger(ir->t)) { ++ emit_ABD(as, SW64I_ADDL, tmp, type, tmp); ++ emit_AjD(as, SW64I_EXTLWI, src, 0, RID_TMP); ++ } else { ++ emit_ABD(as, SW64I_ADDL, src, type, tmp); ++ } ++ } ++ } ++} ++ ++static void asm_sload(ASMState *as, IRIns *ir) ++{ ++ Reg dest = RID_NONE, type = RID_NONE, base; ++ RegSet allow = RSET_GPR; ++ IRType1 t = ir->t; ++ int32_t ofs = 8*((int32_t)ir->op1-2); ++ ++ lua_assert(!(ir->op2 & IRSLOAD_PARENT)); /* Handled by asm_head_side(). */ ++ lua_assert(irt_isguard(ir->t) || !(ir->op2 & IRSLOAD_TYPECHECK)); ++ ++ if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) { ++ dest = ra_scratch(as, RSET_FPR); ++ asm_tointg(as, ir, dest); ++ t.irt = IRT_NUM; /* Continue with a regular number type check. */ ++ } else if (ra_used(ir)) { ++ lua_assert(irt_isnum(ir->t) || ++ irt_isint(ir->t) || irt_isaddr(ir->t)); ++ dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow); ++ rset_clear(allow, dest); ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++ if (ir->op2 & IRSLOAD_CONVERT) { ++ if (irt_isint(t)) { ++ Reg tmp = ra_scratch(as, RSET_FPR); ++ emit_FD(as, SW64I_FIMOVD, dest, tmp); ++ emit_GI(as, SW64I_FCVTDL_Z, tmp, tmp); ++ emit_GI(as, SW64I_FCVTLW, tmp, tmp); ++ dest = tmp; ++ t.irt = IRT_NUM; /* Check for original type. */ ++ } else { ++ Reg tmp = ra_scratch(as, RSET_GPR); ++ emit_GI(as, SW64I_FCVTLD, dest, dest); ++ emit_AI(as, SW64I_IFMOVD, tmp, dest); ++ dest = tmp; ++ t.irt = IRT_INT; /* Check for original type. */ ++ } ++ } ++ else if (irt_isaddr(t)) { ++ /* Clear type from pointers. */ ++ emit_DEXTM(as, dest, dest, 0, 47); ++ } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) { ++ /* Sign-extend integers. */ ++ emit_AjD(as, SW64I_ADDWI, dest, 0, dest); ++ } ++ goto dotypecheck; ++ } ++ base = ra_alloc1(as, REF_BASE, allow); ++ rset_clear(allow, base); ++dotypecheck: ++ if ((ir->op2 & IRSLOAD_TYPECHECK)) { ++ type = dest < RID_MAX_GPR ? dest : RID_TMP; ++ if (irt_ispri(t)) { ++ Reg ktype = ra_allock(as, ~((int64_t)~irt_toitype(t) << 47), allow); ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, type, ktype, RID_R28); ++ } else { ++ if (irt_isnum(t)) { ++ Reg isnum = ra_allock(as, (int32_t)LJ_TISNUM, allow); ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPULT, RID_TMP, isnum, RID_R28); ++ if (ra_hasreg(dest)) ++ emit_Fo(as, SW64I_FLDD, dest, base, ofs); ++ } else { ++ Reg ktype2 = ra_allock(as, (int32_t)irt_toitype(t), allow); ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, RID_TMP, ktype2, RID_R28); ++ } ++ emit_AjD(as, SW64I_SRAI, type, 47, RID_TMP); ++ } ++ emit_Ao(as, SW64I_LDL, type, base, ofs); ++ } else if (ra_hasreg(dest)) { ++ if (irt_isnum(t)) ++ emit_Fo(as, SW64I_FLDD, dest, base, ofs); ++ else ++ emit_Ao(as, irt_isint(t) ? SW64I_LDW : SW64I_LDL, dest, base, ++ ofs ); ++ } ++} ++ ++/* -- Allocations --------------------------------------------------------- */ ++ ++#if LJ_HASFFI ++static void asm_cnew(ASMState *as, IRIns *ir) ++{ ++ CTState *cts = ctype_ctsG(J2G(as->J)); ++ CTypeID id = (CTypeID)IR(ir->op1)->i; ++ CTSize sz; ++ CTInfo info = lj_ctype_info(cts, id, &sz); ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco]; ++ IRRef args[4]; ++ RegSet drop = RSET_SCRATCH; ++ lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL)); ++ ++ as->gcsteps++; ++ if (ra_hasreg(ir->r)) ++ rset_clear(drop, ir->r); /* Dest reg handled below. */ ++ ra_evictset(as, drop); ++ if (ra_used(ir)) ++ ra_destreg(as, ir, RID_RET); /* GCcdata * */ ++ ++ /* Initialize immutable cdata object. */ ++ if (ir->o == IR_CNEWI) { ++ RegSet allow = (RSET_GPR & ~RSET_SCRATCH); ++ emit_Ao(as, sz == 8 ? SW64I_STL : SW64I_STW, ra_alloc1(as, ir->op2, allow), ++ RID_RET, sizeof(GCcdata)); ++ lua_assert(sz == 4 || sz == 8); ++ } else if (ir->op2 != REF_NIL) { /* Create VLA/VLS/aligned cdata. */ ++ ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv]; ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ir->op1; /* CTypeID id */ ++ args[2] = ir->op2; /* CTSize sz */ ++ args[3] = ASMREF_TMP1; /* CTSize align */ ++ asm_gencall(as, ci, args); ++ emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info)); ++ return; ++ } ++ ++ /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */ ++ emit_Ao(as, SW64I_STB, RID_R28, RID_RET, offsetof(GCcdata, gct)); ++ emit_Ao(as, SW64I_LDI, RID_R28, RID_ZERO, ~LJ_TCDATA); ++ ++ emit_Ao(as, SW64I_STH, RID_R28, RID_RET, offsetof(GCcdata, ctypeid)); ++ emit_Ao(as, SW64I_LDI, RID_R28, RID_ZERO, id); /* Lower 16 bit used. Sign-ext ok. */ ++ args[0] = ASMREF_L; /* lua_State *L */ ++ args[1] = ASMREF_TMP1; /* MSize size */ ++ asm_gencall(as, ci, args); ++ ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)), ++ ra_releasetmp(as, ASMREF_TMP1)); ++} ++#else ++#define asm_cnew(as, ir) ((void)0) ++#endif ++ ++/* -- Write barriers ------------------------------------------------------ */ ++ ++static void asm_tbar(ASMState *as, IRIns *ir) ++{ ++ Reg tab = ra_alloc1(as, ir->op1, RSET_GPR); ++ Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab)); ++ Reg link = RID_TMP; ++ MCLabel l_end = emit_label(as); ++ emit_Ao(as, SW64I_AS, link, tab, (int32_t)offsetof(GCtab, gclist)); ++ emit_Ao(as, SW64I_STB, mark, tab, (int32_t)offsetof(GCtab, marked)); ++ emit_setgl(as, tab, gc.grayagain); ++ emit_getgl(as, link, gc.grayagain); ++ emit_branch(as, SW64I_BEQ, RID_TMP, l_end); ++ emit_ABD(as, SW64I_XOR, mark, RID_TMP, mark); /* Clear black bit. */ ++ emit_AjD(as, SW64I_ANDI, mark, LJ_GC_BLACK, RID_TMP); ++ emit_Ao(as, SW64I_LDBU, mark, tab, (int32_t)offsetof(GCtab, marked)); ++} ++ ++static void asm_obar(ASMState *as, IRIns *ir) ++{ ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg obj, val, tmp; ++ /* No need for other object barriers (yet). */ ++ lua_assert(IR(ir->op1)->o == IR_UREFC); ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ir->op1; /* TValue *tv */ ++ asm_gencall(as, ci, args); ++ emit_Ao(as, SW64I_LDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); ++ obj = IR(ir->op1)->r; ++ tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj)); ++ ++ emit_AjD(as, SW64I_ANDI, tmp, LJ_GC_BLACK, tmp); ++ ++ emit_branch(as, SW64I_BEQ, RID_TMP, l_end); ++ emit_AjD(as, SW64I_ANDI, RID_TMP, LJ_GC_WHITES, RID_TMP); ++ ++ emit_branch(as, SW64I_BEQ, RID_TMP, l_end); ++ val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj)); ++ emit_Ao(as, SW64I_LDBU, tmp, obj, ++ (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv)); ++ emit_Ao(as, SW64I_LDBU, RID_TMP, val, (int32_t)offsetof(GChead, marked)); ++} ++ ++/* -- Arithmetic and logic operations ------------------------------------- */ ++ ++static void asm_fparith(ASMState *as, IRIns *ir, SW64Ins mi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_FGI(as, mi, left, right, dest); ++} ++ ++static void asm_fpunary(ASMState *as, IRIns *ir, SW64Ins mi) ++{ ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ emit_FGI(as, mi, RID_FZERO, left, dest); ++} ++ ++static void asm_fpmath(ASMState *as, IRIns *ir) ++{ ++ if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) ++ return; ++ asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2); ++} ++ ++#define asm_fpadd(as, ir) asm_fparith(as, ir, SW64I_FADDD) ++#define asm_fpsub(as, ir) asm_fparith(as, ir, SW64I_FSUBD) ++#define asm_fpmul(as, ir) asm_fparith(as, ir, SW64I_FMULD) ++ ++//TODO ++ ++static void asm_add(ASMState *as, IRIns *ir) ++{ ++ IRType1 t = ir->t; ++ if (irt_isnum(t)) { ++ asm_fpadd(as, ir); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(IR(ir->op2)); ++ if (checku8(k)) { ++ emit_AjD(as, (LJ_64 && irt_is64(t)) ? SW64I_ADDLI : SW64I_ADDWI, ++ left, k, dest); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ABD(as, (LJ_64 && irt_is64(t)) ? SW64I_ADDL : SW64I_ADDW, ++ left, right, dest); ++ } ++} ++ ++static void asm_sub(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpsub(as, ir); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ABD(as, irt_is64(ir->t) ? SW64I_SUBL : SW64I_SUBW, ++ left, right, dest); ++ } ++} ++ ++static void asm_mul(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ asm_fpmul(as, ir); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ABD(as, irt_is64(ir->t) ? SW64I_MULL : SW64I_MULW, ++ left, right, dest); ++ } ++} ++ ++static void asm_mod(ASMState *as, IRIns *ir) ++{ ++ if (!irt_isint(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 : ++ IRCALL_lj_carith_modu64); ++ else ++ asm_callid(as, ir, IRCALL_lj_vm_modi); ++} ++ ++static void asm_pow(ASMState *as, IRIns *ir) ++{ ++ if (!irt_isnum(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 : ++ IRCALL_lj_carith_powu64); ++ else ++ asm_callid(as, ir, IRCALL_lj_vm_powi); ++} ++ ++static void asm_div(ASMState *as, IRIns *ir) ++{ ++ if (!irt_isnum(ir->t)) ++ asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 : ++ IRCALL_lj_carith_divu64); ++ else ++ asm_fparith(as, ir, SW64I_FDIVD); ++} ++ ++static void asm_neg(ASMState *as, IRIns *ir) ++{ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR); ++ emit_FGI(as, SW64I_FCPYSN, left, left, dest); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_ABD(as, (LJ_64 && irt_is64(ir->t)) ? SW64I_SUBL : SW64I_SUBW, ++ RID_ZERO, left, dest); ++ } ++} ++ ++#define asm_abs(as, ir) asm_fpunary(as, ir, SW64I_FABS) ++ ++#define asm_atan2(as, ir) asm_callid(as, ir, IRCALL_atan2) ++#define asm_ldexp(as, ir) asm_callid(as, ir, IRCALL_ldexp) ++ ++static void asm_arithov(ASMState *as, IRIns *ir) ++{ ++ Reg right, left, dest = ra_dest(as, ir, RSET_GPR); ++ lua_assert(!irt_is64(ir->t)); ++ if (irref_isk(ir->op2)) { ++ int k = IR(ir->op2)->i; ++ if (ir->o == IR_SUBOV) k = -k; ++ if (checki16(k)) { /* (dest < left) == (k >= 0 ? 1 : 0) */ ++ left = ra_alloc1(as, ir->op1, RSET_GPR); ++ asm_guard(as, k >= 0 ? SW64I_BNE : SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPLT, dest, dest == left ? RID_TMP : left, RID_R28); ++ emit_Ao(as, SW64I_LDI, dest, left, k); ++ if (dest == left) emit_move(as, RID_TMP, left); ++ return; ++ } ++ } ++ left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ ++ asm_guard(as, SW64I_BLT, RID_R28); ++ ++ emit_ABD(as, SW64I_AND, RID_TMP, RID_R28, RID_R28); ++ if (ir->o == IR_ADDOV) { /* ((dest^left) & (dest^right)) < 0 */ ++ emit_ABD(as, SW64I_XOR, dest, dest == right ? RID_TMP : right, RID_TMP); ++ } else { /* ((dest^left) & (dest^~right)) < 0 */ ++ emit_ABD(as, SW64I_XOR, RID_TMP, dest, RID_TMP); ++ emit_ABD(as, SW64I_EQV, dest == right ? RID_TMP : right, RID_ZERO, RID_TMP); ++ } ++ ++ emit_ABD(as, SW64I_XOR, dest, dest == left ? RID_TMP : left, RID_R28); ++ emit_ABD(as, ir->o == IR_ADDOV ? SW64I_ADDW : SW64I_SUBW, left, right, dest); ++ ++ if (dest == left || dest == right) ++ emit_move(as, RID_TMP, dest == left ? left : right); ++} ++ ++#define asm_addov(as, ir) asm_arithov(as, ir) ++#define asm_subov(as, ir) asm_arithov(as, ir) ++ ++static void asm_mulov(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ ++ asm_guard(as, SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, dest, RID_R28, RID_R28); ++ ++ emit_ABD(as, SW64I_MULW, left, right, dest); ++ emit_ABD(as, SW64I_MULL, left, right, RID_R28); ++} ++ ++static void asm_bnot(ASMState *as, IRIns *ir) ++{ ++ Reg left, right, dest = ra_dest(as, ir, RSET_GPR); ++ IRIns *irl = IR(ir->op1); ++ if (mayfuse(as, ir->op1) && irl->o == IR_BOR) { ++ left = ra_alloc2(as, irl, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ } else { ++ left = RID_ZERO; ++ right = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ } ++ emit_ABD(as, SW64I_ORNOT, left, right, dest); ++} ++ ++static void asm_bswap(ASMState *as, IRIns *ir) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest)); ++ int is64 = irt_is64(ir->t); ++ int bit = is64 ? 64 : 32; ++ ++ if (is64) { ++ /* 8. extlb left, 7 */ ++ emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); ++ emit_AjD(as, SW64I_EXTLBI, left, 7, RID_R28); ++ ++ /* 7. extlb left, 6 */ ++ emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); ++ emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*7, RID_R28); ++ emit_AjD(as, SW64I_EXTLBI, left, 6, RID_R28); ++ ++ /* 6. extlb left, 5 */ ++ emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); ++ emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*6, RID_R28); ++ emit_AjD(as, SW64I_EXTLBI, left, 5, RID_R28); ++ ++ /* 5. extlb left, 4 */ ++ emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); ++ emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*5, RID_R28); ++ emit_AjD(as, SW64I_EXTLBI, left, 4, RID_R28); ++ } ++ ++ /* 4. extlb left, 3, AT; addl AT, dest, dest */ ++ emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); ++ if (is64) emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*4, RID_R28); ++ emit_AjD(as, SW64I_EXTLBI, left, 3, RID_R28); ++ ++ /* 3. extlb left, 2, AT; slli AT, 8, AT; addl AT, dest, dest */ ++ emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); ++ emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*3, RID_R28); ++ emit_AjD(as, SW64I_EXTLBI, left, 2, RID_R28); ++ ++ /* 2. extlb left, 1, AT; slli AT, 16, AT; addl AT, dest, dest */ ++ emit_ABD(as, SW64I_ADDL, RID_R28, dest, dest); ++ emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*2, RID_R28); ++ emit_AjD(as, SW64I_EXTLBI, left, 1, RID_R28); ++ ++ /* 1. extlb left, 0, AT; slli AT, 24, dest */ ++ emit_AjD(as, SW64I_SLLI, RID_R28, bit-8*1, dest); ++ emit_AjD(as, SW64I_EXTLBI, left, 0, RID_R28); ++} ++ ++static void asm_bitop(ASMState *as, IRIns *ir, SW64Ins mi, SW64Ins mik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (!irt_is64(ir->t)) { ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); ++ } ++ ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(IR(ir->op2)); ++ if (checki8(k)) { ++ emit_AjD(as, mik, left, k, dest); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ emit_ABD(as, mi, left, right, dest); ++} ++ ++#define asm_band(as, ir) asm_bitop(as, ir, SW64I_AND, SW64I_ANDI) ++#define asm_bor(as, ir) asm_bitop(as, ir, SW64I_BIS, SW64I_BISI) ++#define asm_bxor(as, ir) asm_bitop(as, ir, SW64I_XOR, SW64I_XORI) ++ ++static void asm_bitshift(ASMState *as, IRIns *ir, SW64Ins mi, SW64Ins mik) ++{ ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ int is64 = irt_is64(ir->t); ++ if (!is64) ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, dest, dest); // truncated it to 32 bit ++ ++ if (irref_isk(ir->op2)) { /* Constant shifts. */ ++ uint32_t shift = (uint32_t)IR(ir->op2)->i; ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ emit_AjD(as, mik, is64 ? left : RID_R28, (shift & 63), dest); ++ if (!is64) { ++ if (mi != SW64I_SRAI && mi != SW64I_SRA) ++ emit_AjD(as, SW64I_EXTLWI, RID_R28, 0, RID_R28); ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, left, RID_R28); // truncated it to 32 bit ++ } ++ } else { ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ABD(as, mi, is64 ? left : RID_R28, right, dest); ++ if (!is64) { ++ if (mi != SW64I_SRAI && mi != SW64I_SRA) ++ emit_AjD(as, SW64I_EXTLWI, RID_R28, 0, RID_R28); ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, left, RID_R28); // truncated it to 32 bit ++ } ++ } ++} ++ ++#define asm_bshl(as, ir) asm_bitshift(as, ir, SW64I_SLL, SW64I_SLLI) ++#define asm_bshr(as, ir) asm_bitshift(as, ir, SW64I_SRL, SW64I_SRLI) ++#define asm_bsar(as, ir) asm_bitshift(as, ir, SW64I_SRA, SW64I_SRAI) ++ ++static void asm_brotx(ASMState *as, IRIns *ir, int mode) ++{ ++ int is64 = irt_is64(ir->t); ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ if (irref_isk(ir->op2)) { /* Constant shifts. */ ++ uint32_t shift = (uint32_t)(IR(ir->op2)->i & 63); ++ Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR); ++ if (is64) { ++ emit_rotx(as, left, shift, dest, RID_R28, mode); ++ } else { ++ emit_rotx32(as, left, shift, dest, RID_R28, mode); ++ } ++ } else { ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ if (!is64) { ++ emit_ABD(as, SW64I_ADDW, dest, RID_ZERO, dest); ++ } ++ emit_ABD(as, SW64I_BIS, dest, RID_TMP, dest); ++ ++ if (mode == 1) { ++ emit_ABD(as, SW64I_SLL, is64 ? left: RID_R28, right, dest); ++ emit_ABD(as, SW64I_SRL, is64 ? left: RID_R28, RID_TMP, RID_TMP); ++ } else if (mode == 2){ ++ emit_ABD(as, SW64I_SRL, is64 ? left: RID_R28, right, dest); ++ emit_ABD(as, SW64I_SLL, is64 ? left: RID_R28, RID_TMP, RID_TMP); ++ } else { ++ lua_assert(0); ++ } ++ if (!is64) { ++ emit_AjD(as, SW64I_EXTLWI, left, 0, RID_R28); ++ } ++ emit_ABD(as, SW64I_SUBL, ra_allock(as, is64 ? 64 : 32, RSET_GPR), right, RID_TMP); ++ } ++} ++#define asm_brol(as, ir) asm_brotx(as, ir, 1) ++#define asm_bror(as, ir) asm_brotx(as, ir, 2) ++ ++static void asm_min_max(ASMState *as, IRIns *ir, int ismax) ++{ ++ if (irt_isnum(ir->t)) { ++ Reg dest = ra_dest(as, ir, RSET_FPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ emit_FGHI(as, SW64I_FSELEQ, RID_F28, left, right, dest); ++ emit_FGI(as, SW64I_FCMPLT, ismax ? left: right, ismax ? right : left, ++ RID_F28); ++ } else { ++ Reg dest = ra_dest(as, ir, RSET_GPR); ++ Reg right, left = ra_alloc2(as, ir, RSET_GPR); ++ right = (left >> 8); left &= 255; ++ emit_ABCD(as, SW64I_SELEQ, RID_R28, left, right, dest); ++ emit_ABD(as, SW64I_CMPLT, ismax ? left : right, ismax ? right : left, ++ RID_R28); ++ } ++} ++ ++#define asm_min(as, ir) asm_min_max(as, ir, 0) ++#define asm_max(as, ir) asm_min_max(as, ir, 1) ++ ++/* -- Comparisons --------------------------------------------------------- */ ++ ++static void asm_comp(ASMState *as, IRIns *ir) ++{ ++ /* ORDER IR: LT GE LE GT ULT UGE ULE UGT. */ ++ IROp op = ir->o; ++ if (irt_isnum(ir->t)) { ++ MCLabel l_true; ++ Reg right, left = ra_alloc2(as, ir, RSET_FPR); ++ right = (left >> 8); left &= 255; ++ l_true = emit_label(as); ++ switch (op) { ++ case IR_LT: ++ case IR_ULT: ++ asm_guard(as, SW64I_FBEQ, RID_F28); ++ emit_FGI(as, SW64I_FCMPLT, left, right, RID_F28); ++ break; ++ case IR_GE: ++ case IR_UGE: ++ asm_guard(as, SW64I_FBEQ, RID_F28); ++ emit_FGI(as, SW64I_FCMPLE, right, left, RID_F28); ++ break; ++ case IR_LE: ++ case IR_ULE: ++ asm_guard(as, SW64I_FBEQ, RID_F28); ++ emit_FGI(as, SW64I_FCMPLE, left, right, RID_F28); ++ break; ++ case IR_GT: ++ case IR_UGT: ++ asm_guard(as, SW64I_FBEQ, RID_F28); ++ emit_FGI(as, SW64I_FCMPLT, right, left, RID_F28); ++ break; ++ default: ++ lua_assert(!"not here"); ++ } ++ ++ if (op & 4) { ++ emit_branch(as, SW64I_FBNE, RID_F28, l_true); ++ } else { ++ asm_guard(as, SW64I_FBNE, RID_F28); ++ } ++ emit_FGI(as, SW64I_FCMPUN, left, right, RID_F28); ++ ++ } else { ++ Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR); ++ if (op == IR_ABC) op = IR_UGT; ++ ++ if ((op&4) == 0 && irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { ++ SW64Ins mi = (op&2) ? ((op&1) ? SW64I_BLE : SW64I_BGT) : ++ ((op&1) ? SW64I_BLT : SW64I_BGE); ++ asm_guard(as, mi, left); ++ } else { ++ if (irref_isk(ir->op2)) { ++ intptr_t k = get_kval(IR(ir->op2)); ++ if ((op&2)) k++; ++ if (checki8(k)) { ++ asm_guard(as, (op&1) ? SW64I_BNE : SW64I_BEQ, RID_R28); ++ emit_AjD(as, (op&4) ? SW64I_CMPULTI : SW64I_CMPLTI, ++ left, k, RID_R28); ++ return; ++ } ++ } ++ right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left)); ++ asm_guard(as, ((op^(op>>1))&1) ? SW64I_BNE : SW64I_BEQ, RID_R28); ++ emit_ABD(as, (op&4) ? SW64I_CMPULT : SW64I_CMPLT, ++ (op&2) ? right : left, (op&2) ? left : right, RID_R28); ++ } ++ } ++} ++ ++ ++static void asm_equal(ASMState *as, IRIns *ir) ++{ ++ Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR); ++ int is_ne = ir->o & 1; ++ right = (left >> 8); left &= 255; ++ if (irt_isnum(ir->t)) { ++ MCLabel l_true = emit_label(as); ++ ++ if (irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { ++ asm_guard(as, is_ne ? SW64I_FBEQ : SW64I_FBNE, left); ++ return; ++ } ++ asm_guard(as, is_ne ? SW64I_FBNE : SW64I_FBEQ, RID_F28); ++ emit_FGI(as, SW64I_FCMPEQ, left, right, RID_F28); ++ ++ if (is_ne) { ++ emit_branch(as, SW64I_FBNE, RID_F28, l_true); ++ } else { ++ asm_guard(as, SW64I_FBNE, RID_F28); ++ } ++ emit_FGI(as, SW64I_FCMPUN, left, right, RID_F28); ++ ++ } else { ++ if (irref_isk(ir->op2) && get_kval(IR(ir->op2)) == 0) { ++ asm_guard(as, is_ne ? SW64I_BEQ: SW64I_BNE, left); ++ return; ++ } ++ asm_guard(as, is_ne ? SW64I_BNE : SW64I_BEQ, RID_R28); ++ emit_ABD(as, SW64I_CMPEQ, left, right, RID_R28); ++ } ++} ++ ++/* -- Support for 64 bit ops in 32 bit mode ------------------------------- */ ++ ++/* Hiword op of a split 64 bit op. Previous op must be the loword op. */ ++static void asm_hiop(ASMState *as, IRIns *ir) ++{ ++ UNUSED(as); UNUSED(ir); lua_assert(0); ++} ++ ++/* -- Profiling ----------------------------------------------------------- */ ++ ++static void asm_prof(ASMState *as, IRIns *ir) ++{ ++ UNUSED(ir); ++ asm_guard(as, SW64I_BNE, RID_R28); ++ emit_AjD(as, SW64I_ANDI, RID_R28, HOOK_PROFILE, RID_R28); ++ emit_lsglptr(as, SW64I_LDBU, RID_R28, ++ (int32_t)offsetof(global_State, hookmask)); ++} ++ ++/* -- Stack handling ------------------------------------------------------ */ ++ ++/* Check Lua stack size for overflow. Use exit handler as fallback. */ ++static void asm_stack_check(ASMState *as, BCReg topslot, ++ IRIns *irp, RegSet allow, ExitNo exitno) ++{ ++ /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */ ++ Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE; ++ ExitNo oldsnap = as->snapno; ++ rset_clear(allow, pbase); ++ tmp = allow ? rset_pickbot(allow) : RID_RET; ++ as->snapno = exitno; ++ asm_guard(as, SW64I_BLE, RID_R28); ++ as->snapno = oldsnap; ++ if (allow == RSET_EMPTY) /* Restore temp. register. */ ++ emit_Ao(as, SW64I_AL, tmp, RID_SP, 0); ++ else ++ ra_modified(as, tmp); ++ emit_Ao(as, SW64I_LDI, RID_R28, RID_TMP, -(8*topslot)); ++ emit_ABD(as, SW64I_SUBL, tmp, pbase, RID_TMP); ++ emit_Ao(as, SW64I_AL, tmp, tmp, offsetof(lua_State, maxstack)); ++ if (pbase == RID_TMP) ++ emit_getgl(as, RID_TMP, jit_base); ++ emit_getgl(as, tmp, cur_L); ++ if (allow == RSET_EMPTY) /* Spill temp. register. */ ++ emit_Ao(as, SW64I_AS, tmp, RID_SP, 0); ++} ++ ++/* Restore Lua stack from on-trace state. */ ++static void asm_stack_restore(ASMState *as, SnapShot *snap) ++{ ++ WI_DEBUG_BEFORE(); ++ SnapEntry *map = &as->T->snapmap[snap->mapofs]; ++ SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2]; ++ MSize n, nent = snap->nent; ++ /* Store the value of all modified slots to the Lua stack. */ ++ for (n = 0; n < nent; n++) { ++ SnapEntry sn = map[n]; ++ BCReg s = snap_slot(sn); ++ int32_t ofs = 8*((int32_t)s-1-LJ_FR2); ++ IRRef ref = snap_ref(sn); ++ IRIns *ir = IR(ref); ++ if ((sn & SNAP_NORESTORE)) ++ continue; ++ if (irt_isnum(ir->t)) { ++ Reg src = ra_alloc1(as, ref, RSET_FPR); ++ emit_Fo(as, SW64I_FSTD, src, RID_BASE, ofs); ++ } else { ++ asm_tvstore64(as, RID_BASE, ofs, ref); ++ } ++ checkmclim(as); ++ } ++ lua_assert(map + nent == flinks); ++ WI_DEBUG_END(); ++} ++ ++/* -- GC handling --------------------------------------------------------- */ ++ ++/* Check GC threshold and do one or more GC steps. */ ++static void asm_gc_check(ASMState *as) ++{ ++ WI_DEBUG_BEFORE(); ++ const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit]; ++ IRRef args[2]; ++ MCLabel l_end; ++ Reg tmp; ++ ra_evictset(as, RSET_SCRATCH); ++ l_end = emit_label(as); ++ /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */ ++ /* Assumes asm_snap_prep() already done. */ ++ asm_guard(as, SW64I_BNE, RID_RET); ++ ++ args[0] = ASMREF_TMP1; /* global_State *g */ ++ args[1] = ASMREF_TMP2; /* MSize steps */ ++ asm_gencall(as, ci, args); ++ emit_Ao(as, SW64I_LDI, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768); ++ tmp = ra_releasetmp(as, ASMREF_TMP2); ++ emit_loadi(as, tmp, as->gcsteps); ++ /* Jump around GC step if GC total < GC threshold. */ ++ emit_branch(as, SW64I_BNE, RID_R28, l_end); ++ emit_ABD(as, SW64I_CMPULT, RID_TMP, tmp, RID_R28); ++ ++ emit_getgl(as, tmp, gc.threshold); ++ emit_getgl(as, RID_TMP, gc.total); ++ as->gcsteps = 0; ++ checkmclim(as); ++ WI_DEBUG_END(); ++} ++ ++/* -- Loop handling ------------------------------------------------------- */ ++ ++/* Fixup the loop branch. */ ++static void asm_loop_fixup(ASMState *as) ++{ ++ WI_DEBUG_BEFORE(); ++ MCode *p = as->mctop; ++ MCode *target = as->mcp; ++ for (int i=1; iloopinv) { /* Inverted loop branch? */ ++ /* asm_guard already inverted the cond branch. Only patch the target. */ ++ p[-EXIT_ROOM] &= 0xffe00000u; ++ p[-EXIT_ROOM] |= ((uint32_t)(target-(p-EXIT_ROOM)-1) & 0x001fffffu); ++ } else { ++ __WI(p-EXIT_ROOM, SW64I_BR | SW64F_A(RID_ZERO) | SW64F_BRANCH(target - (p-EXIT_ROOM) - 1)); ++ } ++ WI_DEBUG_END(); ++} ++ ++/* -- Head of trace ------------------------------------------------------- */ ++ ++/* Coalesce BASE register for a root trace. */ ++static void asm_head_root_base(ASMState *as) ++{ ++ WI_DEBUG_BEFORE(); ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (as->loopinv) as->mctop--; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (r != RID_BASE) ++ emit_move(as, r, RID_BASE); ++ } ++ WI_DEBUG_END(); ++} ++ ++/* Coalesce BASE register for a side trace. */ ++static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow) ++{ ++ WI_DEBUG_BEFORE(); ++ IRIns *ir = IR(REF_BASE); ++ Reg r = ir->r; ++ if (as->loopinv) as->mctop--; ++ if (ra_hasreg(r)) { ++ ra_free(as, r); ++ if (rset_test(as->modset, r) || irt_ismarked(ir->t)) ++ ir->r = RID_INIT; /* No inheritance for modified BASE register. */ ++ if (irp->r == r) { ++ rset_clear(allow, r); /* Mark same BASE register as coalesced. */ ++ } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) { ++ rset_clear(allow, irp->r); ++ emit_move(as, r, irp->r); /* Move from coalesced parent reg. */ ++ } else { ++ emit_getgl(as, r, jit_base); /* Otherwise reload BASE. */ ++ } ++ } ++ WI_DEBUG_END(); ++ return allow; ++} ++ ++/* -- Tail of trace ------------------------------------------------------- */ ++ ++/* Fixup the tail code. */ ++static void asm_tail_fixup(ASMState *as, TraceNo lnk) ++{ ++ WI_DEBUG_BEFORE(); ++ MCode *p = as->mctop-1; ++ MCode *target; ++ int32_t spadj = as->T->spadjust; ++ ++ /* Patch exit branch. */ ++ target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp; ++ ++ for (int i=0; imctop - EXIT_ROOM; /* Leave room for exit branch. */ ++ if (as->loopref) { ++ as->invmcp = as->mcp = p; ++ } else { ++ as->mcp = p-1; /* Leave room for stack pointer adjustment. */ ++ as->invmcp = NULL; ++ } ++} ++ ++/* -- Trace setup --------------------------------------------------------- */ ++ ++/* Ensure there are enough stack slots for call arguments. */ ++static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci) ++{ ++ IRRef args[CCI_NARGS_MAX*2]; ++ uint32_t i, nargs = CCI_XNARGS(ci); ++ int nslots = 0, ngpr = REGARG_NUMGPR; ++ asm_collectargs(as, ir, ci, args); ++ for (i = 0; i < nargs; i++) { ++ if (ngpr > 0) ngpr--; else nslots += 2; ++ } ++ if (nslots > as->evenspill) /* Leave room for args in stack slots. */ ++ as->evenspill = nslots; ++ return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET); ++} ++ ++static void asm_setup_target(ASMState *as) ++{ ++ asm_sparejump_setup(as); ++ asm_exitstub_setup(as); ++} ++ ++/* -- Trace patching ------------------------------------------------------ */ ++ ++int is_branch_op(MCode ins) ++{ ++ switch(ins & 0xfc000000) { ++ case SW64I_BEQ: case SW64I_BNE: case SW64I_BLT: ++ case SW64I_BLE: case SW64I_BGT: case SW64I_BGE: ++ return 1; ++ case SW64I_FBEQ: ++ case SW64I_FBGE: ++ case SW64I_FBGT: ++ case SW64I_FBLE: ++ case SW64I_FBLT: ++ case SW64I_FBNE: ++ return 1; ++ } ++ return 0; ++} ++ ++/* Patch exit jumps of existing machine code to a new target. */ ++void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target) ++{ ++ WI_DEBUG_BEFORE(); ++ MCode *p = T->mcode; ++ MCode *pe = (MCode *)((char *)p + T->szmcode); ++ MCode *px = exitstub_trace_addr(T, exitno); ++ MCode *cstart = NULL, *cstop = NULL; ++ MCode *mcarea = lj_mcode_patch(J, p, 0); ++#if SW64_DEBUG_WI ++ MCode exitload = SW64I_LDI | SW64F_A(RID_TMP) | SW64F_DISP(exitno, RID_TMP); ++ printf("try patching traceno:%d exitno:%d target:%p px:%p\n", ++ T->traceno, exitno, target, px); ++#else ++ MCode exitload = SW64I_LDI | SW64F_A(RID_TMP) | SW64F_DISP(exitno, RID_TMP); ++#endif ++ ++ for (p++; p < pe; p++) { ++ /* Look for load of exit number. */ ++ if (*p != exitload) { ++ continue; ++ } ++ ++ /* Look for exitstub branch. Yes, this covers all used branch variants. */ ++ if (is_branch_op(p[1]) ++ && (SW64F_BRANCH(p[1]) == SW64F_BRANCH((px - (p+1) - 1)))) { ++ ptrdiff_t delta = target - (p+1) - 1; ++ if (IS_SW64F_BRANCH_VALID(delta)) { /* Patch in-range branch. */ ++#if SW64_DEBUG_WI ++ printf("p1atch at %p\n", p+2); ++#endif ++ __WI_REPLACE(p+1, ++ (p[1] & (~0x1fffff)) | SW64F_BRANCH(delta), ++ p[1]); ++ patchbranch: ++ cstop = p+1; ++ if (!cstart) cstart = p; ++ } else { /* Branch out of range. Use spare jump slot in mcarea. */ ++ TODO; ++ } ++ } else if (p[2] == SW64I_NOP) { ++#if SW64_DEBUG_WI ++ printf("p2atch at %p\n", p+2); ++#endif ++ ptrdiff_t delta = target - (p+2) - 1; ++ __WI_REPLACE(p+2, SW64I_BR | SW64F_A(RID_ZERO) | SW64F_BRANCH(delta), SW64I_NOP); ++ goto patchbranch; ++ } ++ } ++ if (cstart) lj_mcode_sync(cstart, cstop); ++ lj_mcode_patch(J, mcarea, 1); ++ WI_DEBUG_END(); ++} ++ ++#undef TODO +diff --git a/src/lj_ccall.c b/src/lj_ccall.c +index 5c252e5..19e5a06 100644 +--- a/src/lj_ccall.c ++++ b/src/lj_ccall.c +@@ -562,6 +562,66 @@ + goto done; \ + } + ++#elif LJ_TARGET_SW64 ++/* -- SW64 calling conventions -------------------------------------------- */ ++ ++#define CCALL_HANDLE_STRUCTRET \ ++ cc->retref = 1; /* Return all structs by reference. */ \ ++ cc->gpr[ngpr++] = (GPRArg)dp; ++ ++#define CCALL_HANDLE_COMPLEXRET \ ++ /* Complex values are returned in 2 FPRs. */ \ ++ cc->retref = 0; ++ ++#define CCALL_HANDLE_COMPLEXRET2 \ ++ if (ctr->size == 2*sizeof(float)) { /* Copy complex float from FPRs. */ \ ++ ((float *)dp)[0] = (float)cc->fpr[0].d; \ ++ ((float *)dp)[1] = (float)cc->fpr[1].d; \ ++ } else { /* Copy complex double from FPRs. */ \ ++ ((double *)dp)[0] = cc->fpr[0].d; \ ++ ((double *)dp)[1] = cc->fpr[1].d; \ ++ } ++ ++#define CCALL_HANDLE_STRUCTARG \ ++ if (!(sz <= 8*6)) { \ ++ rp = cdataptr(lj_cdata_new(cts, did, sz)); \ ++ sz = CTSIZE_PTR; /* Pass all other structs by reference. */ \ ++ } ++ ++#define CCALL_HANDLE_COMPLEXARG \ ++ if (sz == 2*sizeof(float)) { \ ++ isfp = 2; \ ++ if (ngpr < maxgpr) \ ++ sz *= 2; \ ++ } ++ ++#define CCALL_HANDLE_REGARG \ ++ { /* Try to pass argument in GPRs. */ \ ++ cc->reg_is_word[ngpr] = (d->size == 4); \ ++ if (ctype_iscomplex(d->info)) { \ ++ cc->reg_is_word[ngpr] = d->size == 8; \ ++ cc->reg_is_word[ngpr+1] = d->size==8; \ ++ } \ ++ if (n > 1) { \ ++ lua_assert(n == 2 || n == 4); /* int64_t or complex (float). */ \ ++ } \ ++ if (ngpr < maxgpr) { \ ++ dp = &cc->gpr[ngpr]; \ ++ if (ngpr + n > maxgpr) { \ ++ nsp += ngpr + n - maxgpr; /* Assumes contiguous gpr/stack fields. */ \ ++ if (nsp > CCALL_MAXSTACK) goto err_nyi; /* Too many arguments. */ \ ++ ngpr = maxgpr; \ ++ } else { \ ++ ngpr += n; \ ++ } \ ++ goto done; \ ++ } \ ++ } ++ ++#define CCALL_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ ctr = ctype_get(cts, CTID_DOUBLE); /* FPRs always hold doubles. */ ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -921,6 +981,9 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + + /* Clear unused regs to get some determinism in case of misdeclaration. */ + memset(cc->gpr, 0, sizeof(cc->gpr)); ++#if LJ_TARGET_SW64 ++ memset(cc->reg_is_word, 0, sizeof(cc->reg_is_word)); ++#endif + #if CCALL_NUM_FPR + memset(cc->fpr, 0, sizeof(cc->fpr)); + #endif +@@ -1044,7 +1107,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + if (isfp && d->size == sizeof(float)) + ((float *)dp)[1] = ((float *)dp)[0]; /* Floats occupy high slot. */ + #endif +-#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) ++#if LJ_TARGET_MIPS64 || (LJ_TARGET_ARM64 && LJ_BE) || LJ_TARGET_SW64 + if ((ctype_isinteger_or_bool(d->info) || ctype_isenum(d->info) + #if LJ_TARGET_MIPS64 + || (isfp && nsp == 0) +@@ -1068,7 +1131,7 @@ static int ccall_set_args(lua_State *L, CTState *cts, CType *ct, + cc->fpr[nfpr-1].d[0] = cc->fpr[nfpr-2].d[1]; /* Split complex double. */ + cc->fpr[nfpr-2].d[1] = 0; + } +-#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) ++#elif LJ_TARGET_ARM64 || (LJ_TARGET_MIPS64 && !LJ_ABI_SOFTFP) || LJ_TARGET_SW64 + if (isfp == 2 && (uint8_t *)dp < (uint8_t *)cc->stack) { + /* Split float HFA or complex float into separate registers. */ + CTSize i = (sz >> 2) - 1; +diff --git a/src/lj_ccall.h b/src/lj_ccall.h +index 59f6648..a3cb613 100644 +--- a/src/lj_ccall.h ++++ b/src/lj_ccall.h +@@ -126,6 +126,22 @@ typedef union FPRArg { + struct { LJ_ENDIAN_LOHI(float f; , float g;) }; + } FPRArg; + ++#elif LJ_TARGET_SW64 ++ ++#define CCALL_NARG_GPR 6 ++#define CCALL_NARG_FPR 0 /* FP args are positional and overlay the GPR array. */ ++#define CCALL_NRET_GPR 1 ++#define CCALL_NRET_FPR 2 ++ ++#define CCALL_SPS_EXTRA 3 ++#define CCALL_SPS_FREE 1 ++ ++typedef intptr_t GPRArg; ++typedef union FPRArg { ++ double d; ++ struct { float f; float g; }; ++} FPRArg; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +@@ -174,6 +190,9 @@ typedef LJ_ALIGN(CCALL_ALIGN_CALLSTATE) struct CCallState { + #if LJ_32 + int32_t align1; + #endif ++#if LJ_TARGET_SW64 ++ uint8_t reg_is_word[CCALL_NUM_GPR]; ++#endif + #if CCALL_NUM_FPR + FPRArg fpr[CCALL_NUM_FPR]; /* Arguments/results in FPRs. */ + #endif +diff --git a/src/lj_ccallback.c b/src/lj_ccallback.c +index 846827b..434c649 100644 +--- a/src/lj_ccallback.c ++++ b/src/lj_ccallback.c +@@ -71,6 +71,10 @@ static MSize CALLBACK_OFS2SLOT(MSize ofs) + + #define CALLBACK_MCODE_HEAD 52 + ++#elif LJ_TARGET_SW64 ++ ++#define CALLBACK_MCODE_HEAD 4*13 ++ + #else + + /* Missing support for this architecture. */ +@@ -238,6 +242,87 @@ static void callback_mcode_init(global_State *g, uint32_t *page) + } + lua_assert(p - page <= CALLBACK_MCODE_SIZE); + } ++ ++#elif LJ_TARGET_SW64 ++ ++static void split32Addr(uint32_t addr, int16_t* hi, int16_t* lo) ++{ ++ *hi = (int16_t)(addr >> 16); ++ *lo = (int16_t)(addr & 0xffff); ++ if (*lo < 0) { ++ *hi = *hi + 1; ++ *lo = (int16_t)(addr - ((int32_t)(*hi) << 16)); ++ } ++} ++static void split64AddrHI32(uint64_t addr, int16_t*hi, int16_t*lo) ++{ ++ split32Addr((uint32_t)(addr >> 32), hi, lo); ++} ++static void split64AddrLO32(uint64_t addr, int16_t*hi, int16_t*lo) ++{ ++ split32Addr((uint32_t)(addr & 0xffffffff), hi, lo); ++} ++ ++static void callback_mcode_init(global_State *g, uint32_t *page) ++{ ++ uint32_t *p = page; ++ void *target = (void *)lj_vm_ffi_callback; ++ int16_t hi, lo; ++ ++ MSize slot; ++ { ++ split64AddrHI32((uint64_t)target, &hi, &lo); ++ // ldih PV, h32_hi(zero) ++ *p++ = SW64I_LDIH | SW64F_A(RID_CFUNCADDR)| SW64F_DISP(hi, RID_ZERO); ++ // ldi PV, h32_lo(PV) ++ *p++ = SW64I_LDI | SW64F_A(RID_CFUNCADDR)| SW64F_DISP(lo, RID_CFUNCADDR); ++ // slli PV, 32, PV ++ *p++ = SW64I_SLLI | SW64F_A(RID_CFUNCADDR)| SW64F_IMM(32) | SW64F_D(RID_CFUNCADDR); ++ ++ split64AddrLO32((uint64_t)target, &hi, &lo); ++ // ldih at, lo32_hi(zero) ++ *p++ = SW64I_LDIH | SW64F_A(RID_R28)| SW64F_DISP(hi, RID_ZERO); ++ // ldi at, lo32_lo(at) ++ *p++ = SW64I_LDI | SW64F_A(RID_R28)| SW64F_DISP(lo, RID_R28); ++ // addl PV, at, PV ++ *p++ = SW64I_ADDL | SW64F_A(RID_CFUNCADDR)| SW64F_B(RID_R28) | SW64F_D(RID_CFUNCADDR); ++ } ++ ++ { ++ split64AddrHI32((uint64_t)g, &hi, &lo); ++ // ldih r2, h32_hi(zero) ++ *p++ = SW64I_LDIH | SW64F_A(RID_R2) | SW64F_DISP(hi, RID_ZERO); ++ // ldi r2, h32_lo(r2) ++ *p++ = SW64I_LDI | SW64F_A(RID_R2) | SW64F_DISP(lo, RID_R2); ++ // slli r2, 32, r2 ++ *p++ = SW64I_SLLI | SW64F_A(RID_R2)| SW64F_IMM(32) | SW64F_D(RID_R2); ++ ++ split64AddrLO32((uint64_t)g, &hi, &lo); ++ // ldih at, lo32_hi(zero) ++ *p++ = SW64I_LDIH | SW64F_A(RID_R28)| SW64F_DISP(hi, RID_ZERO); ++ // ldi at, lo32_lo(at) ++ *p++ = SW64I_LDI | SW64F_A(RID_R28)| SW64F_DISP(lo, RID_R28); ++ // addl r2, at, r2 ++ *p++ = SW64I_ADDL | SW64F_A(RID_R2)| SW64F_B(RID_R28) | SW64F_D(RID_R2); ++ } ++ ++ // call zero, 0(PV) ++ *p++ = SW64I_CALL | SW64F_A(RID_ZERO) | SW64F_DISP(0, RID_CFUNCADDR); ++ ++ //lua_assert((p - page) * 4 <= CALLBACK_MCODE_HEAD); ++ ++ for (slot = 0; slot < CALLBACK_MAX_SLOT; slot++) { ++ // ldi r1, slot(zero) ++ *p++ = SW64I_LDI | SW64F_A(RID_R1) | SW64F_DISP(slot, RID_ZERO); ++ ++ // br zero, (page-p-2) ++ *p = SW64I_BR | SW64F_A(RID_ZERO) | SW64F_BRANCH((page-p-2)+1); ++ p++; ++ } ++ ++ lua_assert(p - page <= CALLBACK_MCODE_SIZE); ++} ++ + #else + /* Missing support for this architecture. */ + #define callback_mcode_init(g, p) UNUSED(p) +@@ -495,6 +580,33 @@ void lj_ccallback_mcode_free(CTState *cts) + if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ + ((float *)dp)[1] = *(float *)dp; + ++#elif LJ_TARGET_SW64 ++ ++#define CALLBACK_HANDLE_REGARG \ ++ if (ngpr + n <= maxgpr) { \ ++ if (isfp) { \ ++ FPRCBArg *reg = &(cts->cb.fpr[ngpr]); \ ++ if (cta->size == 4) { \ ++ reg->f[0] = (float)reg->d; \ ++ }\ ++ sp = reg; \ ++ } else {\ ++ intptr_t *reg = &(cts->cb.gpr[ngpr]); \ ++ if (cta->size == 4) { \ ++ *reg = *(int*)reg; \ ++ } \ ++ sp = reg; \ ++ } \ ++ ngpr += n; \ ++ goto done; \ ++ } ++ ++#define CALLBACK_HANDLE_RET \ ++ if (ctype_isfp(ctr->info) && ctr->size == sizeof(float)) \ ++ *(double *)dp = *(float *)dp; /* FPRs always hold doubles. */ \ ++ if (ctype_isinteger(ctr->info) && ctr->size == 4) \ ++ *((int64_t *)dp) = *(int *)dp; ++ + #else + #error "Missing calling convention definitions for this architecture" + #endif +diff --git a/src/lj_crecord.c b/src/lj_crecord.c +index e32ae23..00de245 100644 +--- a/src/lj_crecord.c ++++ b/src/lj_crecord.c +@@ -130,7 +130,7 @@ static IRType crec_ct2irt(CTState *cts, CType *ct) + /* Number of windowed registers used for optimized memory copy. */ + #if LJ_TARGET_X86 + #define CREC_COPY_REGWIN 2 +-#elif LJ_TARGET_PPC || LJ_TARGET_MIPS ++#elif LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_SW64 + #define CREC_COPY_REGWIN 8 + #else + #define CREC_COPY_REGWIN 4 +diff --git a/src/lj_dispatch.c b/src/lj_dispatch.c +index 5d6795f..d38b860 100644 +--- a/src/lj_dispatch.c ++++ b/src/lj_dispatch.c +@@ -38,7 +38,7 @@ LJ_STATIC_ASSERT(GG_NUM_ASMFF == FF_NUM_ASMFUNC); + + /* -- Dispatch table management ------------------------------------------- */ + +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_SW64 + #include + LJ_FUNCA_NORET void LJ_FASTCALL lj_ffh_coroutine_wrap_err(lua_State *L, + lua_State *co); +@@ -74,7 +74,7 @@ void lj_dispatch_init(GG_State *GG) + GG->g.bc_cfunc_ext = GG->g.bc_cfunc_int = BCINS_AD(BC_FUNCC, LUA_MINSTACK, 0); + for (i = 0; i < GG_NUM_ASMFF; i++) + GG->bcff[i] = BCINS_AD(BC__MAX+i, 0, 0); +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_SW64 + memcpy(GG->got, dispatch_got, LJ_GOT__MAX*sizeof(ASMFunction *)); + #endif + } +diff --git a/src/lj_dispatch.h b/src/lj_dispatch.h +index 5bda51a..9b8559e 100644 +--- a/src/lj_dispatch.h ++++ b/src/lj_dispatch.h +@@ -12,7 +12,7 @@ + #include "lj_jit.h" + #endif + +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_SW64 + /* Need our own global offset table for the dreaded MIPS calling conventions. */ + + #ifndef _LJ_VM_H +@@ -89,7 +89,7 @@ typedef uint16_t HotCount; + typedef struct GG_State { + lua_State L; /* Main thread. */ + global_State g; /* Global state. */ +-#if LJ_TARGET_MIPS ++#if LJ_TARGET_MIPS || LJ_TARGET_SW64 + ASMFunction got[LJ_GOT__MAX]; /* Global offset table. */ + #endif + #if LJ_HASJIT +diff --git a/src/lj_emit_sw64.h b/src/lj_emit_sw64.h +new file mode 100644 +index 0000000..dfd485f +--- /dev/null ++++ b/src/lj_emit_sw64.h +@@ -0,0 +1,454 @@ ++/* ++** SW64 instruction emitter. ++** Copyright (C) 2019 deepin inc. See Copyright Notice in luajit.h ++*/ ++ ++#include ++#define TODO do {printf("\e[1;34mTODO IMPLEMENT %s\e[m\n", __FUNCTION__); asm("bpt;bpt");} while(0); ++ ++#if SW64_DEBUG_WI ++#include ++const char* disass_ins(int ins, void* addr) { ++ static char cmd[1024]; ++ sprintf(cmd, "./luajit -e 'require(\"jit.dis_sw64\"):wi_debug(%d, %p)'", ins, addr); ++ FILE* out = popen(cmd, "r"); ++ memset(cmd, 0, sizeof(cmd)); ++ fread(cmd, sizeof(cmd), 1, out); ++ pclose(out); ++ for (int i=0; icurins; \ ++ if (*x != origin) { \ ++ printf("BUG: 1 overwrite[%p](0x%x) to 0x%lx on %s !\n", \ ++ x, (int)origin, (int)ins, __FUNCTION__); \ ++ asm("bpt;bpt"); \ ++ } else { \ ++ printf("\tIR' %d --> %p #%s\n", ir-REF_BASE, x, disass_ins(ins, x)); \ ++ } \ ++ *x = ins; \ ++ } while(0) ++ ++#define __WI(addr, ins) \ ++ do { \ ++ MCode* x = addr; \ ++ IRRef ir = -1; \ ++ if (as!=0) ir = as->curins; \ ++ if (*x != 0 && *x != 0x43ff075f) { \ ++ printf("BUG: 2 overwrite[%p](0x%x)(%s) to 0x%lx(%s) on %s! IR:0x%x\n", \ ++ x, *x, disass_ins(*x, x), (unsigned long)ins, disass_ins(ins, x), __FUNCTION__, \ ++ ir); \ ++ asm("bpt;bpt"); \ ++ } else { \ ++ printf("\tIR %d --> %p #%s\n", ir-REF_BASE, x, disass_ins(ins, x)); \ ++ } \ ++ *x = ins; \ ++ } while(0) ++#define WI_DEBUG_BEFORE() printf("before %s\n", __FUNCTION__); ++#define WI_DEBUG_END() printf("end %s\n", __FUNCTION__); ++#else ++#define __WI(addr, ins) *(addr) = ins; ++#define __WI_REPLACE(addr, ins, origin) *(addr) = ins; ++#define WI_DEBUG_BEFORE() ++#define WI_DEBUG_END() ++#endif ++ ++#if LJ_64 ++static intptr_t get_k64val(IRIns *ir) ++{ ++ if (ir->o == IR_KINT64) { ++ return (intptr_t)ir_kint64(ir)->u64; ++ } else if (ir->o == IR_KGC) { ++ return (intptr_t)ir_kgc(ir); ++ } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) { ++ return (intptr_t)ir_kptr(ir); ++ } else if (ir->o == IR_KNUM) { ++ return (intptr_t)ir_knum(ir)->u64; ++ } else { ++ lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL); ++ return ir->i; /* Sign-extended. */ ++ } ++} ++#endif ++ ++#if LJ_64 ++#define get_kval(ir) get_k64val(ir) ++#else ++#define get_kval(ir) ((ir)->i) ++#endif ++ ++ ++inline static void split32Addr(uint32_t addr, int16_t* hi, int16_t* lo) ++{ ++ *hi = (int16_t)(addr >> 16); ++ *lo = (int16_t)(addr & 0xffff); ++ if (*lo < 0) { ++ *hi = *hi + 1; ++ *lo = (int16_t)(addr - ((int32_t)(*hi) << 16)); ++ } ++} ++inline static void split64AddrHI32(uint64_t addr, int16_t*hi, int16_t*lo) ++{ ++ int32_t addr_hi = (int32_t)(addr >> 32); ++ int32_t addr_lo = (addr & 0xffffffff); ++ if (addr_lo < 0) { ++ addr_hi++; ++ } ++ split32Addr((uint32_t)addr_hi, hi, lo); ++} ++inline static void split64AddrLO32(uint64_t addr, int16_t*hi, int16_t*lo) ++{ ++ int32_t addr_hi = addr >> 32; ++ int32_t addr_lo = addr & 0xffffffff; ++ if (addr_lo < 0) { ++ addr_hi++; ++ addr_lo = addr - ((int64_t)addr_hi << 32); ++ } ++ split32Addr((uint32_t)(addr_lo), hi, lo); ++} ++ ++/* -- Emit basic instructions --------------------------------------------- */ ++ ++static void emit_AjD(ASMState *as, SW64Ins mi, Reg a, uint8_t i, Reg d) ++{ ++ lua_assert(a <=31 && a >= 0); ++ lua_assert(d <= 31 && d >= 0); ++ __WI(--as->mcp, mi | SW64F_A(a) | SW64F_j(i) | SW64F_D(d)); ++} ++ ++static void emit_Ao(ASMState *as, SW64Ins mi, Reg a, Reg b, int32_t disp) ++{ ++ lua_assert(b <= 31 && b >= 0); ++ lua_assert(((int16_t)disp) == disp); ++ __WI(--as->mcp, mi | SW64F_A(a) | SW64F_DISP(disp, b)); ++} ++ ++static void emit_ABD(ASMState *as, SW64Ins mi, Reg a, Reg b, Reg d) ++{ ++ lua_assert(a <=31 && a >= 0); ++ lua_assert(b <=31 && b >= 0); ++ lua_assert(d <=31 && d >= 0); ++ __WI(--as->mcp, mi | SW64F_A(a) | SW64F_B(b) | SW64F_D(d)); ++} ++ ++static void emit_ABCD(ASMState *as, SW64Ins mi, Reg a, Reg b, Reg c, Reg d) ++{ ++ lua_assert(a <=31 && a >= 0); ++ lua_assert(b <=31 && b >= 0); ++ lua_assert(c <=31 && c >= 0); ++ lua_assert(d <=31 && d >= 0); ++ __WI(--as->mcp, mi | SW64F_A(a) | SW64F_B(b) | SW64F_C(c) | SW64F_D(d)); ++} ++ ++static void emit_FGI(ASMState *as, SW64Ins mi, Reg f, Reg g, Reg i) ++{ ++ lua_assert(f >= 32 && f <= 63); ++ lua_assert(g >= 32 && g <= 63); ++ lua_assert(i >= 32 && i <= 63); ++ __WI(--as->mcp, mi | SW64F_F(f) | SW64F_G(g) | SW64F_I(i)); ++} ++static void emit_FGHI(ASMState *as, SW64Ins mi, Reg f, Reg g, Reg h, Reg i) ++{ ++ lua_assert(f >= 32 && f <= 63); ++ lua_assert(g >= 32 && g <= 63); ++ lua_assert(h >= 32 && h <= 63); ++ lua_assert(i >= 32 && i <= 63); ++ __WI(--as->mcp, mi | SW64F_F(f) | SW64F_G(g) | SW64F_H(h) | SW64F_I(i)); ++} ++static void emit_Fo(ASMState *as, SW64Ins mi, Reg f, Reg b, int16_t disp) ++{ ++ lua_assert(f >= 32 && f <= 63); ++ lua_assert(b <= 31 && b >= 0); ++ __WI(--as->mcp, mi | SW64F_F(f) | SW64F_DISP(disp, b)); ++} ++static void emit_FD(ASMState *as, SW64Ins mi, Reg f, Reg d) ++{ ++ lua_assert(f >= 32 && f <= 63); ++ lua_assert(d >= 0 && d <= 31); ++ __WI(--as->mcp, mi | SW64F_F(f) | SW64F_D(d)); ++} ++static void emit_AI(ASMState *as, SW64Ins mi, Reg a, Reg i) ++{ ++ lua_assert(a >= 0 && a <= 31); ++ lua_assert(i >= 32 && i <= 63); ++ __WI(--as->mcp, mi | SW64F_A(a) | SW64F_I(i)); ++} ++ ++static void emit_GI(ASMState *as, SW64Ins mi, Reg g, Reg i) ++{ ++ lua_assert(g >= 32 && g <= 63); ++ lua_assert(i >= 32 && i <= 63); ++ __WI(--as->mcp, mi | SW64F_F(RID_F31) | SW64F_G(g) | SW64F_I(i)); ++} ++ ++static void emit_void(ASMState *as, SW64Ins mi) ++{ ++ __WI(--as->mcp, mi); ++} ++ ++static void emit_DEXTM(ASMState *as, Reg rt, Reg rs, uint32_t pos, uint32_t size) ++{ ++ emit_AjD(as, SW64I_SRLI, rt, 64-size, rt); ++ emit_AjD(as, SW64I_SLLI, rs, 64-pos-size, rt); ++} ++ ++/* -- Emit loads/stores --------------------------------------------------- */ ++ ++/* Prefer rematerialization of BASE/L from global_State over spills. */ ++#define emit_canremat(ref) ((ref) <= REF_BASE) ++ ++/* Load a 32 bit constant into a GPR. */ ++static void emit_loadi(ASMState *as, Reg r, int32_t i) ++{ ++ int16_t hi, lo; ++ split32Addr(i, &hi, &lo); ++ emit_ABD(as, SW64I_ADDW, RID_ZERO, r, r); ++ if (i == 0 && r != RID_ZERO) { ++ emit_Ao(as, SW64I_LDI, r, RID_ZERO, 0); ++ return; ++ } ++ lua_assert(r != RID_ZERO); ++ if (lo != 0) { ++ emit_Ao(as, SW64I_LDI, r, hi ? r : RID_ZERO, lo); ++ } ++ if (hi != 0) { ++ // ldih dest, hi(zero) ++ emit_Ao(as, SW64I_LDIH, r, RID_ZERO, hi); ++ } ++} ++ ++/* Load a 64 bit constant into a GPR. */ ++static void emit_loadu64(ASMState *as, Reg r, uint64_t u64) ++{ ++ int16_t h_hi, h_lo; ++ int16_t l_hi, l_lo; ++ if (u64 == 0) { ++ emit_Ao(as, SW64I_LDI, r, RID_ZERO, 0); ++ return; ++ } ++ lua_assert(r != RID_ZERO); ++ ++ split64AddrLO32(u64, &l_hi, &l_lo); ++ split64AddrHI32(u64, &h_hi, &h_lo); ++ int has_high = h_hi || h_lo; ++ ++ if (l_lo) { ++ emit_Ao(as, SW64I_LDI, r, l_hi || has_high ? r : RID_ZERO, l_lo); ++ } ++ if (l_hi) { ++ emit_Ao(as, SW64I_LDIH, r, has_high ? r : RID_ZERO, l_hi); ++ } ++ ++ if (has_high) { ++ emit_AjD(as, SW64I_SLLI, r, 32, r); ++ } ++ if (h_lo) { ++ emit_Ao(as, SW64I_LDI, r, h_hi ? r : RID_ZERO, h_lo); ++ } ++ if (h_hi) { ++ emit_Ao(as, SW64I_LDIH, r, RID_ZERO, h_hi); ++ } ++} ++ ++#define emit_loada(as, r, addr) emit_loadu64(as, (r), u64ptr((addr))) ++ ++static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow); ++static void ra_allockreg(ASMState *as, intptr_t k, Reg r); ++ ++/* Get/set from constant pointer. */ ++static void emit_lsptr(ASMState *as, SW64Ins mi, Reg r, void *p, RegSet allow) ++{ ++ intptr_t jgl = (intptr_t)(J2G(as->J)); ++ intptr_t i = (intptr_t)(p); ++ Reg base; ++ if ((uint32_t)(i-jgl-32768) < 0x1000) { ++ i = i-jgl-32768; ++ base = RID_JGL; ++ } else { ++ base = ra_allock(as, i-(int16_t)i, allow); ++ i = (int16_t)i; ++ } ++ emit_Ao(as, mi, r, base, i); ++} ++ ++static void emit_loadk64(ASMState *as, Reg r, IRIns *ir) ++{ ++ const uint64_t *k = &ir_k64(ir)->u64; ++ Reg r64 = r; ++ if (rset_test(RSET_FPR, r)) { ++ emit_lsptr(as, SW64I_FLDD, ((r) & 31), (void *)&ir_knum((ir))->u64, RSET_GPR); ++ } else { ++ if ((uint32_t)((intptr_t)k-(intptr_t)J2G(as->J)) < 65536) ++ emit_lsptr(as, SW64I_LDL, r64, (void *)k, 0); ++ else ++ emit_loadu64(as, r64, *k); ++ } ++} ++ ++/* Get/set global_State fields. */ ++static void emit_lsglptr(ASMState *as, SW64Ins mi, Reg r, int32_t ofs) ++{ ++ emit_Ao(as, mi, r, RID_JGL, ofs-32768); ++} ++ ++#define emit_getgl(as, r, field) \ ++ emit_lsglptr(as, SW64I_AL, (r), (int32_t)offsetof(global_State, field)) ++#define emit_setgl(as, r, field) \ ++ emit_lsglptr(as, SW64I_AS, (r), (int32_t)offsetof(global_State, field)) ++ ++/* Trace number is determined from per-trace exit stubs. */ ++#define emit_setvmstate(as, i) UNUSED(i) ++ ++/* -- Emit control-flow instructions -------------------------------------- */ ++ ++/* Label for internal jumps. */ ++typedef MCode *MCLabel; ++ ++/* Return label pointing to current PC. */ ++#define emit_label(as) ((as)->mcp) ++ ++static SW64Ins invert_cond(SW64Ins mi) ++{ ++ switch (mi) { ++ case SW64I_BEQ: return SW64I_BNE; ++ case SW64I_BNE: return SW64I_BEQ; ++ case SW64I_FBEQ: return SW64I_FBNE; ++ case SW64I_FBNE: return SW64I_FBEQ; ++ case SW64I_BLT: return SW64I_BGE; ++ case SW64I_BGE: return SW64I_BLT; ++ case SW64I_BLE: return SW64I_BGT; ++ case SW64I_BGT: return SW64I_BLE; ++ default: ++ printf("TODO HANDLING INVERT_COND:%x\n", mi); ++ asm("bpt;bpt"); ++ return SW64I_NOP; ++ } ++} ++ ++static void emit_branch(ASMState *as, SW64Ins mi, Reg a, MCode *target) ++{ ++ MCode *p = as->mcp; ++ ptrdiff_t delta = target - p; ++ lua_assert(IS_SW64F_BRANCH_VALID(delta)); ++ __WI(--p, mi | SW64F_A(a) | SW64F_BRANCH(delta)); ++ as->mcp = p; ++} ++ ++static void emit_jmp(ASMState *as, MCode *target) ++{ ++ TODO ++ /* *--as->mcp = SW64I_NOP; */ ++ /* emit_branch(as, SW64I_B, RID_ZERO, RID_ZERO, (target)); */ ++} ++ ++static void emit_call(ASMState *as, void *target, int needcfa) ++{ ++ MCode *p = as->mcp; ++ __WI(--p, SW64I_CALL | SW64F_A(RID_RA) | SW64F_B(RID_CFUNCADDR)); ++ needcfa = 1; ++ // TODO: use br if target in range ++ as->mcp = p; ++ if (needcfa) ra_allockreg(as, (intptr_t)target, RID_CFUNCADDR); ++} ++ ++/* -- Emit generic operations --------------------------------------------- */ ++ ++#define emit_move(as, dst, src) \ ++ emit_Ao(as, SW64I_LDI, (dst), (src), 0) ++ ++#define emit_fmove(as, dst, src) \ ++ emit_FGI(as, SW64I_FADDD, (src), (RID_FZERO), (dst)) ++ ++/* Generic move between two regs. */ ++static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src) ++{ ++ if (dst < RID_MAX_GPR) ++ emit_move(as, dst, src); ++ else ++ emit_fmove(as, dst, src); ++} ++ ++/* Generic load of register with base and (small) offset address. */ ++static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) ++ emit_Ao(as, irt_is64(ir->t) ? SW64I_LDL : SW64I_LDW, r, base, ofs); ++ else ++ emit_Fo(as, irt_isnum(ir->t) ? SW64I_FLDD : SW64I_FLDS, ++ r, base, ofs); ++} ++ ++/* Generic store of register with base and (small) offset address. */ ++static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs) ++{ ++ if (r < RID_MAX_GPR) ++ emit_Ao(as, irt_is64(ir->t) ? SW64I_STL : SW64I_STW, r, base, ofs); ++ else ++ emit_Fo(as, irt_isnum(ir->t) ? SW64I_FSTD : SW64I_FSTS, ++ r, base, ofs); ++} ++ ++/* Add offset to pointer. */ ++static void emit_addptr(ASMState *as, Reg r, int32_t ofs) ++{ ++ if (ofs) { ++ lua_assert(checki16(ofs)); ++ emit_Ao(as, SW64I_LDI, r, r, ofs); ++ } ++} ++ ++#define emit_spsub(as, ofs) emit_addptr(as, RID_SP, -(ofs)) ++ ++ ++ ++static void emit_rotx32(ASMState *as, Reg src, uint32_t shift, Reg dest, Reg tmp, int mode) ++{ ++ lua_assert(src != tmp); ++ lua_assert(dest != tmp); ++ ++ emit_ABD(as, SW64I_ADDW, dest, RID_ZERO, dest); ++ emit_ABD(as, SW64I_BIS, dest, tmp, dest); ++ if (mode == 1) { ++ emit_AjD(as, SW64I_SRLI, dest, (-shift) & 31, dest); ++ emit_AjD(as, SW64I_SLLI, dest, shift, tmp); ++ } else if (mode == 2) { ++ emit_AjD(as, SW64I_SLLI, dest, (-shift) & 31, dest); ++ emit_AjD(as, SW64I_SRLI, dest, shift, tmp); ++ } else { ++ lua_assert(0); ++ } ++ emit_AjD(as, SW64I_EXTLWI, src, 0, dest); ++} ++static void emit_rotx(ASMState *as, Reg src, uint32_t shift, Reg dest, Reg tmp, int mode) ++{ ++ lua_assert(src != tmp); ++ lua_assert(dest != tmp); ++ ++ emit_ABD(as, SW64I_BIS, dest, tmp, dest); ++ if (mode == 1) { ++ emit_AjD(as, SW64I_SRLI, src, (-shift) & 63, dest); ++ emit_AjD(as, SW64I_SLLI, src, shift, tmp); ++ } else if (mode == 2) { ++ emit_AjD(as, SW64I_SLLI, src, (-shift) & 63, dest); ++ emit_AjD(as, SW64I_SRLI, src, shift, tmp); ++ } else { ++ lua_assert(0); ++ } ++} ++#define emit_rotl32(as, src, shift, dest, tmp) emit_rotx32(as, src, shift, dest, tmp, 1) ++#define emit_rotr32(as, src, shift, dest, tmp) emit_rotx32(as, src, shift, dest, tmp, 2) ++#define emit_rotl(as, src, shift, dest, tmp) emit_rotx(as, src, shift, dest, tmp, 1) ++#define emit_rotr(as, src, shift, dest, tmp) emit_rotx(as, src, shift, dest, tmp, 2) ++ ++#undef TODO +diff --git a/src/lj_frame.h b/src/lj_frame.h +index 19c49a4..a6e805e 100644 +--- a/src/lj_frame.h ++++ b/src/lj_frame.h +@@ -264,6 +264,18 @@ enum { LJ_CONT_TAILCALL, LJ_CONT_FFI_CALLBACK }; /* Special continuations. */ + #endif + #define CFRAME_OFS_MULTRES 0 + #define CFRAME_SHIFT_MULTRES 3 ++ ++#elif LJ_TARGET_SW64 ++#define CFRAME_OFS_ERRF 172 ++#define CFRAME_OFS_NRES 168 ++#define CFRAME_OFS_PREV 160 ++#define CFRAME_OFS_L 152 ++#define CFRAME_OFS_PC 144 ++#define CFRAME_SIZE 176 ++ ++#define CFRAME_OFS_MULTRES 0 ++#define CFRAME_SHIFT_MULTRES 3 ++ + #else + #error "Missing CFRAME_* definitions for this architecture" + #endif +diff --git a/src/lj_gdbjit.c b/src/lj_gdbjit.c +index c219ffa..c89b60b 100644 +--- a/src/lj_gdbjit.c ++++ b/src/lj_gdbjit.c +@@ -306,6 +306,9 @@ enum { + #elif LJ_TARGET_MIPS + DW_REG_SP = 29, + DW_REG_RA = 31, ++#elif LJ_TARGET_SW64 ++ DW_REG_SP = 30, ++ DW_REG_RA = 26, + #else + #error "Unsupported target architecture" + #endif +@@ -383,6 +386,8 @@ static const ELFheader elfhdr_template = { + .machine = 20, + #elif LJ_TARGET_MIPS + .machine = 8, ++#elif LJ_TARGET_SW64 ++ .machine = 0x9916, + #else + #error "Unsupported target architecture" + #endif +@@ -591,6 +596,10 @@ static void LJ_FASTCALL gdbjit_ehframe(GDBJITctx *ctx) + for (i = 23; i >= 16; i--) { DB(DW_CFA_offset|i); DUV(26-i); } + for (i = 30; i >= 20; i -= 2) { DB(DW_CFA_offset|32|i); DUV(42-i); } + } ++#elif LJ_TARGET_SW64 ++ { ++ /* TODO: setup saved register position */ ++ } + #else + #error "Unsupported target architecture" + #endif +diff --git a/src/lj_jit.h b/src/lj_jit.h +index 92054e3..c2a9f70 100644 +--- a/src/lj_jit.h ++++ b/src/lj_jit.h +@@ -55,6 +55,11 @@ + #else + #define JIT_F_CPUSTRING "\010MIPS64R2" + #endif ++ ++#elif LJ_TARGET_SW64 ++#define JIT_F_SW1621 0x00000010 ++#define JIT_F_CPU_FIRST JIT_F_SW1621 ++#define JIT_F_CPUSTRING "\4SW6A" + #else + #define JIT_F_CPU_FIRST 0 + #define JIT_F_CPUSTRING "" +diff --git a/src/lj_snap.c b/src/lj_snap.c +index bb063c2..33beb5c 100644 +--- a/src/lj_snap.c ++++ b/src/lj_snap.c +@@ -715,7 +715,7 @@ static void snap_restoredata(GCtrace *T, ExitState *ex, + #if !LJ_SOFTFP + if (r >= RID_MAX_GPR) { + src = (int32_t *)&ex->fpr[r-RID_MIN_FPR]; +-#if LJ_TARGET_PPC ++#if LJ_TARGET_PPC || LJ_TARGET_SW64 + if (sz == 4) { /* PPC FPRs are always doubles. */ + *(float *)dst = (float)*(double *)src; + return; +diff --git a/src/lj_target.h b/src/lj_target.h +index 8dcae95..c7bed12 100644 +--- a/src/lj_target.h ++++ b/src/lj_target.h +@@ -55,7 +55,7 @@ typedef uint32_t RegSP; + /* Bitset for registers. 32 registers suffice for most architectures. + ** Note that one set holds bits for both GPRs and FPRs. + */ +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_SW64 + typedef uint64_t RegSet; + #else + typedef uint32_t RegSet; +@@ -69,7 +69,7 @@ typedef uint32_t RegSet; + #define rset_set(rs, r) (rs |= RID2RSET(r)) + #define rset_clear(rs, r) (rs &= ~RID2RSET(r)) + #define rset_exclude(rs, r) (rs & ~RID2RSET(r)) +-#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 ++#if LJ_TARGET_PPC || LJ_TARGET_MIPS || LJ_TARGET_ARM64 || LJ_TARGET_SW64 + #define rset_picktop(rs) ((Reg)(__builtin_clzll(rs)^63)) + #define rset_pickbot(rs) ((Reg)__builtin_ctzll(rs)) + #else +@@ -144,6 +144,8 @@ typedef uint32_t RegCost; + #include "lj_target_ppc.h" + #elif LJ_TARGET_MIPS + #include "lj_target_mips.h" ++#elif LJ_TARGET_SW64 ++#include "lj_target_sw64.h" + #else + #error "Missing include for target CPU" + #endif +diff --git a/src/lj_target_sw64.h b/src/lj_target_sw64.h +new file mode 100644 +index 0000000..cb2f611 +--- /dev/null ++++ b/src/lj_target_sw64.h +@@ -0,0 +1,283 @@ ++/* ++** Definitions for SW64 CPUs. ++** Copyright (C) 2019-2019 deepin inc. See Copyright Notice in luajit.h ++*/ ++ ++#ifndef _LJ_TARGET_SW64_H ++#define _LJ_TARGET_SW64_H ++ ++/* -- Registers IDs ------------------------------------------------------- */ ++ ++#define GPRDEF(_) \ ++ _(R0) _(R1) _(R2) _(R3) _(R4) _(R5) _(R6) _(R7) \ ++ _(R8) _(R9) _(R10) _(R11) _(R12) _(R13) _(R14) _(R15) \ ++ _(R16) _(R17) _(R18) _(R19) _(R20) _(R21) _(R22) _(R23) \ ++ _(R24) _(R25) _(RA) _(R27) _(R28) _(R29) _(SP) _(R31) ++#define FPRDEF(_) \ ++ _(F0) _(F1) _(F2) _(F3) _(F4) _(F5) _(F6) _(F7) \ ++ _(F8) _(F9) _(F10) _(F11) _(F12) _(F13) _(F14) _(F15) \ ++ _(F16) _(F17) _(F18) _(F19) _(F20) _(F21) _(F22) _(F23) \ ++ _(F24) _(F25) _(F26) _(F27) _(F28) _(F29) _(F30) _(F31) ++#define VRIDDEF(_) ++ ++#define RIDENUM(name) RID_##name, ++ ++enum { ++ GPRDEF(RIDENUM) /* General-purpose registers (GPRs). */ ++ FPRDEF(RIDENUM) /* Floating-point registers (FPRs). */ ++ RID_MAX, ++ RID_ZERO = RID_R31, ++ RID_FZERO = RID_F31, ++ RID_TMP = RID_RA, ++ RID_GP = RID_R29, ++ ++ /* Calling conventions. */ ++ RID_RET = RID_R0, ++ RID_FPRET = RID_F0, ++ RID_CFUNCADDR = RID_R27, ++ ++ /* These definitions must match with the *.dasc file(s): */ ++ RID_BASE = RID_R9, /* Interpreter BASE. */ ++ RID_LPC = RID_R11, /* Interpreter PC. */ ++ RID_DISPATCH = RID_R12, /* Interpreter DISPATCH table. */ ++ RID_LREG = RID_R13, /* Interpreter L. */ ++ RID_JGL = RID_R15, /* On-trace: global_State + 32768. */ ++ ++ /* Register ranges [min, max) and number of registers. */ ++ RID_MIN_GPR = RID_R0, ++ RID_MAX_GPR = RID_R31+1, ++ RID_MIN_FPR = RID_F0, ++ RID_MAX_FPR = RID_F31+1, ++ RID_NUM_GPR = RID_MAX_GPR - RID_MIN_GPR, ++ RID_NUM_FPR = RID_MAX_FPR - RID_MIN_FPR /* Only even regs are used. */ ++}; ++ ++#define RID_NUM_KREF RID_NUM_GPR ++#define RID_MIN_KREF RID_R0 ++ ++/* -- Register sets ------------------------------------------------------- */ ++ ++/* Make use of all registers, except ZERO, TMP, SP, JGL and GP. */ ++#define RSET_FIXED \ ++ (RID2RSET(RID_ZERO)|RID2RSET(RID_TMP)|RID2RSET(RID_SP)|\ ++ RID2RSET(RID_JGL)|RID2RSET(RID_GP)|RID2RSET(RID_R28)) ++ ++#define RSET_GPR (RSET_RANGE(RID_MIN_GPR, RID_MAX_GPR) - RSET_FIXED) ++#define RSET_FPR (RSET_RANGE(RID_MIN_FPR, RID_MAX_FPR) - RID2RSET(RID_FZERO)) ++#define RSET_ALL (RSET_GPR|RSET_FPR) ++#define RSET_INIT RSET_ALL ++ ++ ++#define RSET_SCRATCH_GPR \ ++ (RSET_RANGE(RID_R0, RID_R8+1)|RSET_RANGE(RID_R16, RID_R25+1)| \ ++ RID2RSET(RID_R27)) ++#define RSET_SCRATCH_FPR (RSET_RANGE(RID_F0, RID_F1+1)|RSET_RANGE(RID_F10,RID_F30+1)) ++#define RSET_SCRATCH (RSET_SCRATCH_GPR|RSET_SCRATCH_FPR) ++ ++#define REGARG_FIRSTGPR RID_R16 ++#define REGARG_LASTGPR RID_R21 ++#define REGARG_NUMGPR 6 ++#define REGARG_FIRSTFPR RID_F16 ++#define REGARG_LASTFPR RID_F21 ++#define REGARG_NUMFPR 6 ++ ++ ++ ++/* -- Spill slots --------------------------------------------------------- */ ++ ++/* Spill slots are 32 bit wide. An even/odd pair is used for FPRs. ++** ++** SPS_FIXED: Available fixed spill slots in interpreter frame. ++** This definition must match with the *.dasc file(s). ++** ++** SPS_FIRST: First spill slot for general use. ++*/ ++#if LJ_32 ++#define SPS_FIXED 5 ++#else ++#define SPS_FIXED 4 ++#endif ++ ++#define SPS_FIRST 4 ++ ++#define SPOFS_TMP 0 ++ ++#define sps_scale(slot) (4 * (int32_t)(slot)) ++#define sps_align(slot) (((slot) - SPS_FIXED + 1) & ~1) ++ ++/* -- Exit state ---------------------------------------------------------- */ ++ ++/* This definition must match with the *.dasc file(s). */ ++typedef struct { ++ lua_Number fpr[RID_NUM_FPR]; /* Floating-point registers. */ ++ intptr_t gpr[RID_NUM_GPR]; /* General-purpose registers. */ ++ int32_t spill[256]; /* Spill slots. */ ++} ExitState; ++ ++/* Highest exit + 1 indicates stack check. */ ++#define EXITSTATE_CHECKEXIT 1 ++ ++/* Return the address of a per-trace exit stub. */ ++static LJ_AINLINE uint32_t *exitstub_trace_addr_(uint32_t *p) ++{ ++ while (*p == 0x43ff075f) p++; /* Skip SW64_NOP. */ ++ return p; ++} ++/* Avoid dependence on lj_jit.h if only including lj_target.h. */ ++#define exitstub_trace_addr(T, exitno) \ ++ exitstub_trace_addr_((MCode *)((char *)(T)->mcode + (T)->szmcode)) ++ ++/* -- Instructions -------------------------------------------------------- */ ++ ++/* Instruction fields. */ ++#define SW64F_A(r) (((r) & 0x1f) << 21) ++#define SW64F_B(r) (((r) & 0x1f) << 16) ++#define SW64F_C(r) (((r) & 0x1f) << 5) ++#define SW64F_D(r) (((r) & 0x1f) << 0) ++ ++#define SW64F_F(r) (((r) & 0x1f) << 21) ++#define SW64F_G(r) (((r) & 0x1f) << 16) ++#define SW64F_H(r) (((r) & 0x1f) << 5) ++#define SW64F_I(r) (((r) & 0x1f) << 0) ++ ++ ++#define SW64F_IMM(i) (((i) & 0xff) << 13) ++#define SW64F_DISP(d, r) (((d) & 0xffff) | SW64F_B(r)) ++#define SW64F_DISPI(d) (((d) & 0xffff) | SW64F_B(RID_ZERO)) ++#define SW64F_BRANCH(d) ((d) & 0x1fffff) ++#define SW64F_j(i) SW64F_IMM(i) ++#define IS_SW64F_BRANCH_VALID(d) (((int32_t)d) < 0x1fffff) ++ ++#define SW64_OP(ins) ((ins) & 0x3f000000) ++ ++typedef enum SW64Ins { ++ ++ /* Load/store instructions. */ ++ SW64I_LDL = 0x8c000000, ++ SW64I_STL = 0xac000000, ++ SW64I_LDW = 0x88000000, ++ SW64I_STW = 0xa8000000, ++ SW64I_LDHU = 0x84000000, ++ SW64I_STH = 0xa4000000, ++ SW64I_LDBU = 0x80000000, ++ SW64I_STB = 0xa0000000, ++ SW64I_FLDD = 0x9c000000, ++ SW64I_FSTD = 0xbc000000, ++ SW64I_FLDS = 0x98000000, ++ SW64I_FSTS = 0xb8000000, ++ ++ SW64I_LDIH = 0xfc000000, ++ SW64I_LDI = 0xf8000000, ++ SW64I_CALL= 0x04000000, ++ SW64I_BR = 0x10000000, ++ ++ SW64I_S4ADDL = 0x40000140, ++ SW64I_S4ADDLI = 0x48000140, ++ SW64I_S8ADDL = 0x40000180, ++ SW64I_S8ADDLI =0x48000180, ++ SW64I_ADDL = 0x40000100, ++ SW64I_ADDLI =0x48000100, ++ SW64I_ADDW = 0x40000000, ++ SW64I_ADDWI =0x48000000, ++ SW64I_SUBL = 0x40000120, ++ SW64I_SUBLI =0x48000120, ++ SW64I_SUBW = 0x40000020, ++ SW64I_MULL = 0x40000300, ++ SW64I_MULLI =0x48000300, ++ SW64I_MULW = 0x40000200, ++ SW64I_MULWI =0x48000200, ++ SW64I_UMULH = 0x40000320, ++ SW64I_UMULHI =0x48000320, ++ ++ SW64I_FADDS = 0x60000000, ++ SW64I_FADDD = 0x60000020, ++ SW64I_FSUBD = 0x60000060, ++ SW64I_FMULD = 0x600000a0, ++ SW64I_FDIVD = 0x600000e0, ++ ++ SW64I_SLL = 0x40000900, ++ SW64I_SLLI = 0x48000900, ++ SW64I_SRLI = 0x48000920, ++ SW64I_SRL = 0x40000920, ++ SW64I_SRAI = 0x48000940, ++ SW64I_SRA = 0x40000940, ++ ++ SW64I_AND = 0x40000700, ++ SW64I_ANDI = 0x48000700, ++ SW64I_XOR = 0x40000780, ++ SW64I_XORI = 0x48000780, ++ SW64I_BIS = 0x40000740, ++ SW64I_BISI = 0x48000740, ++ SW64I_ORNOT = 0x40000760, ++ SW64I_ORNOTI = 0x48000760, ++ SW64I_EQV = 0x400007a0, ++ SW64I_EQVI =0x480007a0, ++ ++ SW64I_BEQ = 0xc0000000, ++ SW64I_BNE = 0xc4000000, ++ SW64I_BLT = 0xc8000000, ++ SW64I_BLE = 0xcc000000, ++ SW64I_BGT = 0xd0000000, ++ SW64I_BGE = 0xd4000000, ++ ++ SW64I_CMPEQ = 0x40000500, ++ SW64I_CMPULE = 0x40000580, ++ SW64I_CMPLE = 0x40000540, ++ SW64I_CMPULT = 0x40000560, ++ SW64I_CMPULTI = 0x48000560, ++ SW64I_CMPLT = 0x40000520, ++ SW64I_CMPLTI = 0x48000520, ++ ++ SW64I_FCMPEQ = 0x60000200, ++ SW64I_FCMPLE = 0x60000220, ++ SW64I_FCMPLT = 0x60000240, ++ SW64I_FCMPUN = 0x60000260, ++ SW64I_FBEQ = 0xe0000000, ++ SW64I_FBGE = 0xf4000000, ++ SW64I_FBGT = 0xf0000000, ++ SW64I_FBLE = 0xec000000, ++ SW64I_FBLT = 0xe8000000, ++ SW64I_FBNE = 0xe4000000, ++ ++ SW64I_BPT = 0x00000080, ++ SW64I_NOP = 0x43ff075f, //excb, same as gcc's asm("nop") ++ ++ SW64I_FCVTLW = 0x63e00520, ++ SW64I_FCVTWL = 0x63e00500, ++ SW64I_FCVTLS = 0x63e005a0, ++ SW64I_FCVTLD = 0x63e005e0, ++ SW64I_FCVTDL = 0x63e004e0, ++ SW64I_FCVTDL_Z = 0x63e00480, ++ SW64I_FIMOVD = 0x401f0f00, ++ SW64I_IFMOVD = 0x601f0820, ++ SW64I_IFMOVS = 0x601f0800, ++ SW64I_FCVTDS = 0x63e00420, ++ SW64I_FCVTSD = 0x63e00400, ++ ++ SW64I_MASKLLI = 0x48000c60, ++ SW64I_MASKLL = 0x40000c60, ++ ++ SW64I_FABS = 0x63e00600, ++ SW64I_FCPYSN = 0x60000640, ++ ++ SW64I_SEXTB = 0x43e00d40, ++ SW64I_SEXTH = 0x43e00d60, ++ ++ SW64I_EXTLBI = 0x48000a00, ++ SW64I_EXTLHI = 0x48000a20, ++ SW64I_EXTLWI = 0x48000a40, ++ ++ SW64I_SETFPEC1 = 0x60000aa0, ++ SW64I_SETFPEC3 = 0x60000ae0, ++ ++ SW64I_SELEQ = 0x44000000, ++ SW64I_SELNE = 0x44001400, ++ SW64I_FSELEQ = 0x64004000, ++ SW64I_FSELNE = 0x64004400, ++ ++ SW64I_AL = SW64I_LDL, ++ SW64I_AS = SW64I_STL, ++ ++} SW64Ins; ++ ++#endif +diff --git a/src/lj_trace.c b/src/lj_trace.c +index d85b47f..954d388 100644 +--- a/src/lj_trace.c ++++ b/src/lj_trace.c +@@ -827,8 +827,57 @@ static TraceNo trace_exit_find(jit_State *J, MCode *pc) + } + #endif + ++#if SW64_DEBUG_WI ++LUALIB_API const char *o_tostr(lua_State *L, TValue *o, ++ const char *def, size_t *len) ++{ ++ GCstr *s; ++ if (LJ_LIKELY(tvisstr(o))) { ++ s = strV(o); ++ } else if (tvisnil(o)) { ++ if (len != NULL) *len = def ? strlen(def) : 0; ++ return def; ++ } else if (tvisnumber(o)) { ++ lj_gc_check(L); ++ s = lj_strfmt_number(L, o); ++ setstrV(L, o, s); ++ } else { ++ return "Other"; ++ //lj_err_argt(L, 0, LUA_TSTRING); ++ } ++ if (len != NULL) ++ *len = s->len; ++ return strdata(s); ++} ++void dump_base(char* msg, lua_State* L) ++{ ++ return; ++ int n = L->top - L->base; ++ printf("%s N:%d\n", msg, n); ++ ++ for (int i=0; ibase+i; ++ char* t = lj_typename(o); ++ ++ if (tvisnum(o)) { ++ double n = numberVnum(o); ++ printf("%d\t%s\t%f\n", i, t, n); ++ } else if (tvisint(o)) { ++ printf("%d\t%s\t%d\n", i, t, intV(o)); ++ } else { ++ printf("%d\t%s\n", i, t); ++ } ++ } ++} ++#endif ++ + /* A trace exited. Restore interpreter state. */ ++#if SW64_DEBUG_WI ++int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr, unsigned long exit_addr) ++#else + int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) ++#endif + { + ERRNO_SAVE + lua_State *L = J->L; +@@ -851,6 +900,10 @@ int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr) + } + #endif + lua_assert(T != NULL && J->exitno < T->nsnap); ++#if SW64_DEBUG_WI ++ printf("-----------%s exitno:%d nsnap:%d traceno:%d exit_addr:0x%lx\n", __FUNCTION__, ++ J->exitno, T->nsnap, T->traceno, exit_addr); ++#endif + exd.J = J; + exd.exptr = exptr; + errcode = lj_vm_cpcall(L, NULL, &exd, trace_exit_cp); +diff --git a/src/lj_trace.h b/src/lj_trace.h +index 22cae74..4c20367 100644 +--- a/src/lj_trace.h ++++ b/src/lj_trace.h +@@ -36,7 +36,11 @@ LJ_FUNC void lj_trace_freestate(global_State *g); + LJ_FUNC void lj_trace_ins(jit_State *J, const BCIns *pc); + LJ_FUNCA void LJ_FASTCALL lj_trace_hot(jit_State *J, const BCIns *pc); + LJ_FUNCA void LJ_FASTCALL lj_trace_stitch(jit_State *J, const BCIns *pc); ++#if SW64_DEBUG_WI ++LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr, unsigned long exit_addr); ++#else + LJ_FUNCA int LJ_FASTCALL lj_trace_exit(jit_State *J, void *exptr); ++#endif + + /* Signal asynchronous abort of trace or end of trace. */ + #define lj_trace_abort(g) (G2J(g)->state &= ~LJ_TRACE_ACTIVE) +diff --git a/src/lj_vmmath.c b/src/lj_vmmath.c +index b231d3e..4d4d744 100644 +--- a/src/lj_vmmath.c ++++ b/src/lj_vmmath.c +@@ -57,7 +57,7 @@ double lj_vm_foldarith(double x, double y, int op) + } + } + +-#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS ++#if (LJ_HASJIT && !(LJ_TARGET_ARM || LJ_TARGET_ARM64 || LJ_TARGET_PPC)) || LJ_TARGET_MIPS || LJ_TARGET_SW64 + int32_t LJ_FASTCALL lj_vm_modi(int32_t a, int32_t b) + { + uint32_t y, ua, ub; +diff --git a/src/vm_sw64.dasc b/src/vm_sw64.dasc +new file mode 100644 +index 0000000..38a103f +--- /dev/null ++++ b/src/vm_sw64.dasc +@@ -0,0 +1,4761 @@ ++|// Low-level VM code for SW64 CPUs. ++|// Bytecode interpreter, fast functions and helper functions. ++|// Copyright (C) 2023 Mike Pall. See Copyright Notice in luajit.h ++| ++|.arch sw64 ++|.section code_op, code_sub ++| ++|.actionlist build_actionlist ++|.globals GLOB_ ++|.globalnames globnames ++|.externnames extnames ++|// Fixed register assignments for the interpreter. ++|// Don't use: r31 = 0, r29 = gp, r30 = sp, r26 = ra ++| ++|// The following must be C callee-save (but BASE is often refetched). ++|.define BASE, r9 //s0 // Base of current Lua stack frame. ++|.define KBASE, r10 //s1 // Constants of current Lua function. ++|.define PC, r11 //s2 // Next PC. ++|.define DISPATCH, r12 //s3 // Opcode dispatch table. ++|.define LREG, r13 //s4 // Register holding lua_State (also in SAVE_L). ++|.define MULTRES, r21 //a5 // Size of multi-result: (nresults+1). ++| ++|.define JGL, r15 //fp // On-trace: global_State + 32768. ++| ++|// Constants for type-comparisons, stores and conversions. C callee-save. ++|.define TISNIL, r15 //fp ++|.define TISNUM, r8 //t7 ++|.define TOBIT, f8 // 2^52 + 2^51. ++| ++|// The following temporaries are not saved across C calls, except for RA. ++|.define RA, r14 //mips:s7 sw64:s5 // Callee-save. ++|.define RB, r22 //t8 ++|.define RC, r23 //t9 ++|.define RD, r24 //t10 ++|.define INS, r25 //t11 ++| ++|.define AT, r28 //at // Assembler temporary. ++|.define FAT, f28 //at // Assembler temporary. ++|.define TMP0, r5 //t4 ++|.define TMP1, r6 //t5 ++|.define TMP2, r7 //t6 ++|.define TMP3, r3 //t2 ++|.define TMP4, r4 //t3 ++| ++|// Calling conventions. ++|.define CFUNCADDR, r27 //t12/pv ++|.define CARG1, r16 //a0 ++|.define CARG2, r17 //a1 ++|.define CARG3, r18 //a2 ++|.define CARG4, r19 //a3 ++|.define CARG5, r20 //a4 ++|.define CARG6, r21 //a5 ++| ++|.define CRET1, r0 //v0 ++|.define CRET2, r2 //t1 ++| ++|.define FCARG1, f16 ++|.define FCARG2, f17 ++|.define FCARG3, f18 ++|.define FCARG4, f19 ++|.define FCARG5, f20 ++|.define FCARG6, f21 ++| ++|.define FCRET1, f0 ++|.define FCRET2, f1 ++| ++|.define FTMP0, f10 ++|.define FTMP1, f11 ++|.define FTMP2, f12 ++|.define FTMP3, f13 ++|.define FTMP4, f14 ++| ++|// Stack layout while in interpreter. Must match with lj_frame.h. ++| ++|.define CFRAME_SPACE, 176 // Delta for sp. ++| ++|//----- 16 byte aligned, <-- sp entering interpreter ++|.define SAVE_ERRF, 172 // 32 bit values. ++|.define SAVE_NRES, 168 ++|.define SAVE_CFRAME, 160 // 64 bit values. ++|.define SAVE_L, 152 ++|.define SAVE_PC, 144 ++|//----- 16 byte aligned ++|.define SAVE_GPR_, 80 // .. 80+8*8: 64 bit GPR saves. s0-s5 ++|.define SAVE_FPR_, 16 // .. 16+8*8: 64 bit FPR saves. f2-f9 ++| ++|.define TMPX, 8 ++|.define TMPD, 0 ++|//----- 16 byte aligned ++| ++|.define TMPD_OFS, 0 ++| ++|.define SAVE_MULTRES, TMPD ++| ++|//----------------------------------------------------------------------- ++| ++|.macro saveregs ++| ldi sp, -CFRAME_SPACE(sp) //TODO ++| stl ra, SAVE_GPR_+7*8(sp) ++| stl r15, SAVE_GPR_+6*8(sp) ++| stl r14, SAVE_GPR_+5*8(sp) ++| stl r13, SAVE_GPR_+4*8(sp) ++| stl r12, SAVE_GPR_+3*8(sp) ++| stl r11, SAVE_GPR_+2*8(sp) ++| stl r10, SAVE_GPR_+1*8(sp) ++| stl r9, SAVE_GPR_+0*8(sp) ++| fstd f9, SAVE_FPR_+7*8(sp) ++| fstd f8, SAVE_FPR_+6*8(sp) ++| fstd f7, SAVE_FPR_+5*8(sp) ++| fstd f6, SAVE_FPR_+4*8(sp) ++| fstd f5, SAVE_FPR_+3*8(sp) ++| fstd f4, SAVE_FPR_+2*8(sp) ++| fstd f3, SAVE_FPR_+1*8(sp) ++| fstd f2, SAVE_FPR_+0*8(sp) ++|.endmacro ++| ++|.macro restoreregs_ret ++| ldl ra, SAVE_GPR_+7*8(sp) ++| ldl r15, SAVE_GPR_+6*8(sp) ++| ldl r14, SAVE_GPR_+5*8(sp) ++| ldl r13, SAVE_GPR_+4*8(sp) ++| ldl r12, SAVE_GPR_+3*8(sp) ++| ldl r11, SAVE_GPR_+2*8(sp) ++| ldl r10, SAVE_GPR_+1*8(sp) ++| ldl r9, SAVE_GPR_+0*8(sp) ++| fldd f9, SAVE_FPR_+7*8(sp) ++| fldd f8, SAVE_FPR_+6*8(sp) ++| fldd f7, SAVE_FPR_+5*8(sp) ++| fldd f6, SAVE_FPR_+4*8(sp) ++| fldd f5, SAVE_FPR_+3*8(sp) ++| fldd f4, SAVE_FPR_+2*8(sp) ++| fldd f3, SAVE_FPR_+1*8(sp) ++| fldd f2, SAVE_FPR_+0*8(sp) ++| ldi sp, CFRAME_SPACE(sp) ++| ret zero, 0(ra) ++|.endmacro ++| ++|// Type definitions. Some of these are only used for documentation. ++|.type L, lua_State, LREG ++|.type GL, global_State ++|.type TVALUE, TValue ++|.type GCOBJ, GCobj ++|.type STR, GCstr ++|.type TAB, GCtab ++|.type LFUNC, GCfuncL ++|.type CFUNC, GCfuncC ++|.type PROTO, GCproto ++|.type UPVAL, GCupval ++|.type NODE, Node ++|.type NARGS8, int ++|.type TRACE, GCtrace ++|.type SBUF, SBuf ++| ++|//----------------------------------------------------------------------- ++| ++|// Trap for not-yet-implemented parts. ++|.macro NYI; syscall ; .endmacro //TODO ++| ++|//----------------------------------------------------------------------- ++| ++|// Access to frame relative to BASE. ++|.define FRAME_PC, -8 ++|.define FRAME_FUNC, -16 ++| ++|//----------------------------------------------------------------------- ++| ++|// Endian-specific defines. SW64 is little endian. ++|.define OFS_RD, 2 ++|.define OFS_RA, 1 ++|.define OFS_OP, 0 ++| ++|// Instruction decode. ++|.macro decode_BC4b, dst; slli dst, 2, dst; addwi dst, 0, dst; .endmacro ++|.macro decode_BC8b, dst; slli dst, 3, dst; addwi dst, 0, dst; .endmacro ++|.macro decode_OP, dst, ins; andi ins, 0xff, dst; .endmacro ++|.macro decode_RA, dst, ins; extlb ins, 0x1, dst; decode_BC8b dst; .endmacro ++|.macro decode_RB, dst, ins; extlb ins, 0x3, dst; decode_BC8b dst; .endmacro ++|.macro decode_RC, dst, ins; extlb ins, 0x2, dst; decode_BC8b dst; .endmacro ++|.macro decode_RD, dst, ins; extlh ins, 0x2, dst; decode_BC8b dst; .endmacro ++|.macro decode_RDtoRC8, dst, src; ldi dst, 0x7f8(zero); and src, dst, dst; .endmacro ++| ++|// Instruction fetch. ++|.macro ins_NEXT1 ++| ldw INS, 0(PC) ++| ldi PC, 4(PC) ++|.endmacro ++|// Instruction decode+dispatch. ++|.macro ins_NEXT2 ++| decode_OP TMP1, INS ++| decode_BC8b TMP1 ++| addl TMP1, DISPATCH, TMP0 ++| ldl TMP4, 0(TMP0) ++| decode_RD RD, INS ++| decode_RA RA, INS ++| jmp zero, 0(TMP4) ++|.endmacro ++|.macro ins_NEXT ++| ins_NEXT1 ++| ins_NEXT2 ++|.endmacro ++| ++|// Instruction footer. ++|.if 1 ++| // Replicated dispatch. Less unpredictable branches, but higher I-Cache use. ++| .define ins_next, ins_NEXT ++| .define ins_next_, ins_NEXT ++| .define ins_next1, ins_NEXT1 ++| .define ins_next2, ins_NEXT2 ++|.else ++| // Common dispatch. Lower I-Cache use, only one (very) unpredictable branch. ++| // Affects only certain kinds of benchmarks (and only with -j off). ++| .macro ins_next ++| br zero, ->ins_next ++| .endmacro ++| .macro ins_next1 ++| .endmacro ++| .macro ins_next2 ++| br zero, ->ins_next ++| .endmacro ++| .macro ins_next_ ++| ->ins_next: ++| ins_NEXT ++| .endmacro ++|.endif ++| ++|// Call decode and dispatch. ++|.macro ins_callt ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, FRAME_PC(BASE) = PC ++| ldl PC, LFUNC:RB->pc ++| ldw INS, 0(PC) ++| ldi PC, 4(PC) ++| decode_OP TMP1, INS ++| decode_RA RA, INS ++| decode_BC8b TMP1 ++| addl TMP1, DISPATCH, TMP0 ++| ldl TMP0, 0(TMP0) ++| addl RA, BASE, RA ++| jmp zero, 0(TMP0) ++|.endmacro ++| ++|.macro ins_call ++| // BASE = new base, RB = LFUNC/CFUNC, RC = nargs*8, PC = caller PC ++| stl PC, FRAME_PC(BASE) ++| ins_callt ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++| ++|.macro branch_RD ++| zapi RD, 0xf0, RD; srli RD, 1, TMP0 ++| ldih TMP4, -0x2(zero) // -BCBIAS_J*4 ++| addw TMP0, TMP4, TMP0 // (jump - 0x8000)<<2 ++| addl PC, TMP0, PC ++|.endmacro ++| ++|// Assumes DISPATCH is relative to GL. ++#define DISPATCH_GL(field) (GG_DISP2G + (int)offsetof(global_State, field)) ++#define DISPATCH_J(field) (GG_DISP2J + (int)offsetof(jit_State, field)) ++#define GG_DISP2GOT (GG_OFS(got) - GG_OFS(dispatch)) ++#define DISPATCH_GOT(name) (GG_DISP2GOT + sizeof(void*)*LJ_GOT_##name) ++| ++#define PC2PROTO(field) ((int)offsetof(GCproto, field)-(int)sizeof(GCproto)) ++| ++|.macro load_got, func ++| ldl CFUNCADDR, DISPATCH_GOT(func)(DISPATCH) ++|.endmacro ++| ++|.macro call_intern, func ++| stl MULTRES, TMPX(sp) ++| call ra, 0(CFUNCADDR) ++| ldl MULTRES, TMPX(sp) ++|.endmacro ++| ++|.macro call_extern ++| stl MULTRES, TMPX(sp) ++| call ra, 0(CFUNCADDR) ++| ldl MULTRES, TMPX(sp) ++| .endmacro ++| ++|.macro hotcheck, delta, target ++| srli PC, 1, TMP1 ++| andi TMP1, 126, TMP1 ++| addl TMP1, DISPATCH, TMP1 ++| ldhu TMP2, GG_DISP2HOT(TMP1) ++| subwi TMP2, delta, TMP2 ++| sth TMP2, GG_DISP2HOT(TMP1) ++| blt TMP2, target ++|.endmacro ++| ++|.macro hotloop ++| hotcheck HOTCOUNT_LOOP, ->vm_hotloop ++|.endmacro ++| ++|.macro hotcall ++| hotcheck HOTCOUNT_CALL, ->vm_hotcall ++|.endmacro ++| ++|// Set current VM state. Uses TMP0. ++|.macro li_vmstate, st; ldi TMP0, ~LJ_VMST_..st(zero); .endmacro ++|.macro st_vmstate; stw TMP0, DISPATCH_GL(vmstate)(DISPATCH); .endmacro ++| ++|// Move table write barrier back. Overwrites mark and tmp. ++|.macro barrierback, tab, mark, tmp, target ++| ldl tmp, DISPATCH_GL(gc.grayagain)(DISPATCH) ++| andi mark, ~LJ_GC_BLACK & 255, mark // black2gray(tab) ++| stl tab, DISPATCH_GL(gc.grayagain)(DISPATCH) ++| stb mark, tab->marked ++| stl tmp, tab->gclist ++| br zero, target ++|.endmacro ++| ++|.macro .DEXTM, rt, rs, pos, size ++| slli rs, 64-pos-size, rt ++| srli rt, 64-size, rt ++|.endmacro ++| ++|.macro .DINS, rt, rs, pos, size ++| ldi CARG5, 1(zero); slli CARG5, size, CARG5; ldi CARG5, -1(CARG5); ++| and rs, CARG5, TMP4; ++| slli TMP4, pos, TMP4; ++| slli CARG5, pos, CARG5; ++| bic rt, CARG5, rt; ++| bis rt, TMP4, rt; ++|.endmacro ++| ++|// Clear type tag. Isolate lowest 47 bits of reg. ++|.macro cleartp, reg; .DEXTM reg, reg, 0, 47; .endmacro ++|.macro cleartp, dst, reg; .DEXTM dst, reg, 0, 47; .endmacro ++| ++|// Set type tag: Merge 17 type bits into bits [47, 63] of dst. ++|.macro settp, dst, tp; .DINS dst, tp, 47, 17; .endmacro ++| ++|// Extract (negative) type tag. ++|.macro gettp, dst, src; srai src, 47, dst; .endmacro ++| ++|// Macros to check the TValue type and extract the GCobj. Branch on failure. ++|.macro checktp, reg, tp, target ++| gettp TMP4, reg ++| ldi TMP4, tp(TMP4) ++| cleartp reg ++| bne TMP4, target ++|.endmacro ++|.macro checktp, dst, reg, tp, target ++| gettp TMP4, reg ++| ldi TMP4, tp(TMP4) ++| cleartp dst, reg ++| bne TMP4, target ++|.endmacro ++|.macro checkstr, reg, target; checktp reg, -LJ_TSTR, target; .endmacro ++|.macro checktab, reg, target; checktp reg, -LJ_TTAB, target; .endmacro ++|.macro checkfunc, reg, target; checktp reg, -LJ_TFUNC, target; .endmacro ++|.macro checkint, reg, target ++| gettp TMP4, reg ++| ldi TISNUM, LJ_TISNUM(zero) ++| cmpeq TMP4, TISNUM, AT ++| beq AT, target ++|.endmacro ++|.macro checknum, reg, target ++| gettp TMP4, reg ++| ldi AT, LJ_TISNUM(zero) ++| cmpult TMP4, AT, TMP4 ++| beq TMP4, target ++|.endmacro ++| ++|.macro mov_false, reg ++| ldi reg, 0x0001(zero) ++| slli reg, 47, reg ++| ornot zero, reg, reg // ~reg ++|.endmacro ++|.macro mov_true, reg ++| ldi reg, 0x0001(zero) ++| slli reg, 48, reg ++| ornot zero, reg, reg // ~reg ++|.endmacro ++| ++|.macro fcmp, op, a, b, reg, tmp; ++| fcmp..op a, b, tmp ++| fcvtdl tmp, tmp ++| fimovd tmp, reg ++|.endmacro ++| ++|//----------------------------------------------------------------------- ++ ++/* Generate subroutines used by opcodes and other parts of the VM. */ ++/* The .code_sub section should be last to help static branch prediction. */ ++static void build_subroutines(BuildCtx *ctx) ++{ ++ |.code_sub ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Return handling ---------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_returnp: ++ | // See vm_return. Also: TMP2 = previous base. ++ | andi PC, FRAME_P, TMP0 ++ | ++ | // Return from pcall or xpcall fast func. ++ | mov_true TMP1 ++ | beq TMP0, ->cont_dispatch ++ | ldl PC, FRAME_PC(TMP2) // Fetch PC of previous frame. ++ | bis TMP2, zero, BASE // Restore caller base. ++ | // Prepending may overwrite the pcall frame, so do it at the end. ++ | stl TMP1, -8(RA) // Prepend true to results. ++ | ldi RA, -8(RA) ++ | ++ |->vm_returnc: ++ | addwi RD, 8, RD // RD = (nresults+1)*8. ++ | andi PC, FRAME_TYPE, TMP0 ++ | addwi zero, LUA_YIELD, CRET1 ++ | beq RD, ->vm_unwind_c_eh ++ | bis RD, zero, MULTRES ++ | beq TMP0, ->BC_RET_Z // Handle regular return to Lua. ++ | ++ |->vm_return: ++ | // BASE = base, RA = resultptr, RD/MULTRES = (nresults+1)*8, PC = return ++ | // TMP0 = PC & FRAME_TYPE ++ | subwi zero, 8, TMP2 // TMP2 = 0xfffffff8 ++ | xori TMP0, FRAME_C, TMP0 ++ | and TMP2, PC, TMP2 ++ | subl BASE, TMP2, TMP2 // TMP2 = previous base. ++ | bne TMP0, ->vm_returnp ++ | ++ | subwi RD, 8, TMP1 ++ | stl TMP2, L->base ++ | li_vmstate C ++ | ldw TMP2, SAVE_NRES(sp) ++ | ldi BASE, -16(BASE) ++ | st_vmstate ++ | s8addwi TMP2, 0, TMP2 ++ | beq TMP1, >2 ++ |1: ++ | subwi TMP1, 8, TMP1 ++ | ldl CRET1, 0(RA) ++ | addli RA, 8, RA ++ | stl CRET1, 0(BASE) ++ | addli BASE, 8, BASE ++ | bne TMP1, <1 ++ | ++ |2: ++ | cmpeq TMP2, RD, AT ++ | beq AT, >6 ++ |3: ++ | stl BASE, L->top // Store new top. ++ | ++ |->vm_leave_cp: ++ | ldl TMP0, SAVE_CFRAME(sp) // Restore previous C frame. ++ | bis zero, zero, CRET1 // Ok return status for vm_pcall. ++ | stl TMP0, L->cframe ++ | ++ |->vm_leave_unw: ++ | restoreregs_ret ++ | ++ |6: ++ | ldl TMP1, L->maxstack ++ | cmplt TMP2, RD, TMP0 ++ | // More results wanted. Check stack size and fill up results with nil. ++ | cmplt BASE, TMP1, TMP1 ++ | bne TMP0, >7 ++ | beq TMP1, >8 ++ | stl TISNIL, 0(BASE) ++ | addwi RD, 8, RD ++ | addli BASE, 8, BASE ++ | br zero, <2 ++ | ++ |7: // Less results wanted. ++ | subw RD, TMP2, TMP0 ++ | subl BASE, TMP0, TMP0 // Either keep top or shrink it. ++ | selne TMP2, TMP0, BASE, BASE // LUA_MULTRET+1 case? ++ | br zero, <3 ++ | ++ |8: // Corner case: need to grow stack for filling up results. ++ | // This can happen if: ++ | // - A C function grows the stack (a lot). ++ | // - The GC shrinks the stack in between. ++ | // - A return back from a lua_call() with (high) nresults adjustment. ++ | load_got lj_state_growstack ++ | stl BASE, L->top ++ | bis RD, zero, MULTRES ++ | zapi TMP2, 0xf0, CARG2; srli CARG2, 3, CARG2 ++ | bis L, zero, CARG1 ++ | call_intern lj_state_growstack // (lua_State *L, int n) ++ | ldw TMP2, SAVE_NRES(sp) ++ | ldl BASE, L->top // Need the (realloced) L->top in BASE. ++ | bis MULTRES, zero, RD ++ | s8addwi TMP2, 0, TMP2 ++ | br zero, <2 ++ | ++ |->vm_unwind_c: // Unwind C stack, return from vm_pcall. ++ | // (void *cframe, int errcode) ++ | bis CARG1, zero, sp ++ | bis CARG2, zero, CRET1 ++ |->vm_unwind_c_eh: // Landing pad for external unwinder. ++ | ldl L, SAVE_L(sp) ++ | ldi TMP0, ~LJ_VMST_C(zero) ++ | addwi TMP0, 0, TMP0 ++ | ldl GL:TMP1, L->glref ++ | stw TMP0, GL:TMP1->vmstate ++ | br zero, ->vm_leave_unw ++ | ++ |->vm_unwind_ff: // Unwind C stack, return from ff pcall. ++ | // (void *cframe) ++ | ldi AT, CFRAME_RAWMASK(zero) ++ | and CARG1, AT, sp ++ |->vm_unwind_ff_eh: // Landing pad for external unwinder. ++ | ldl L, SAVE_L(sp) ++ | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). ++ | ldi TISNIL, LJ_TNIL(zero) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | ldl BASE, L->base ++ | ldl DISPATCH, L->glref // Setup pointer to dispatch table. ++ | ifmovs TMP3, TOBIT ++ | mov_false TMP1 ++ | li_vmstate INTERP ++ | ldl PC, FRAME_PC(BASE) // Fetch PC of previous frame. ++ | fcvtsd TOBIT, TOBIT ++ | ldi RA, -8(BASE) // Results start at BASE-8. ++ | ldi DISPATCH, GG_G2DISP(DISPATCH) ++ | stl TMP1, 0(RA) // Prepend false to error message. ++ | st_vmstate ++ | ldi RD, 16(zero) // 2 results: false + error message. ++ | br zero, ->vm_returnc ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Grow stack for calls ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_growstack_c: // Grow stack for C function. ++ | ldi CARG2, LUA_MINSTACK(zero) ++ | br zero, >2 ++ | ++ |->vm_growstack_l: // Grow stack for Lua function. ++ | // BASE = new base, RA = BASE+framesize*8, RC = nargs, PC = first PC ++ | addl BASE, RC, RC ++ | subl RA, BASE, RA ++ | stl BASE, L->base ++ | ldi PC, 4(PC) // Must point after first instruction. ++ | stl RC, L->top ++ | zapi RA, 0xf0, CARG2; srli CARG2, 3, CARG2 ++ |2: ++ | // L->base = new base, L->top = top ++ | load_got lj_state_growstack ++ | stl PC, SAVE_PC(sp) ++ | bis L, zero, CARG1 ++ | call_intern lj_state_growstack // (lua_State *L, int n) ++ | ldl BASE, L->base ++ | ldl RC, L->top ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) ++ | subl RC, BASE, RC ++ | cleartp LFUNC:RB ++ | // BASE = new base, RB = LFUNC/CFUNC, RC = nargs, FRAME_PC(BASE) = PC ++ | ins_callt // Just retry the call. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Entry points into the assembler VM --------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_resume: // Setup C frame and resume thread. ++ | // (lua_State *L, TValue *base, int nres1 = 0, ptrdiff_t ef = 0) ++ | saveregs ++ | bis CARG1, zero, L ++ | ldl DISPATCH, L->glref // Setup pointer to dispatch table. ++ | bis CARG2, zero, BASE ++ | ldbu TMP1, L->status ++ | stl L, SAVE_L(sp) ++ | ldi PC, FRAME_CP(zero) ++ | ldi TMP0, CFRAME_RESUME(sp) ++ | ldi DISPATCH, GG_G2DISP(DISPATCH) ++ | stw zero, SAVE_NRES(sp) ++ | stw zero, SAVE_ERRF(sp) ++ | stl CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | stl zero, SAVE_CFRAME(sp) ++ | stl TMP0, L->cframe ++ | beq TMP1, >3 ++ | ++ | // Resume after yield (like a return). ++ | stl L, DISPATCH_GL(cur_L)(DISPATCH) ++ | bis BASE, zero, RA ++ | ldl BASE, L->base ++ | ldl TMP1, L->top ++ | ldl PC, FRAME_PC(BASE) ++ | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). ++ | subl TMP1, BASE, RD ++ | ifmovs TMP3, TOBIT ++ | stb zero, L->status ++ | fcvtsd TOBIT, TOBIT ++ | li_vmstate INTERP ++ | ldi RD, 8(RD) ++ | st_vmstate ++ | bis RD, zero, MULTRES ++ | andi PC, FRAME_TYPE, TMP0 ++ | ldi TISNIL, LJ_TNIL(zero) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | beq TMP0, ->BC_RET_Z ++ | br zero, ->vm_return ++ | ++ |->vm_pcall: // Setup protected C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1, ptrdiff_t ef) ++ | saveregs ++ | stw CARG4, SAVE_ERRF(sp) ++ | ldi PC, FRAME_CP(zero) ++ | br zero, >1 ++ | ++ |->vm_call: // Setup C frame and enter VM. ++ | // (lua_State *L, TValue *base, int nres1) ++ | saveregs ++ | ldi PC, FRAME_C(zero) ++ | ++ |1: // Entry point for vm_pcall above (PC = ftype). ++ | ldl TMP1, L:CARG1->cframe ++ | bis CARG1, zero, L ++ | stw CARG3, SAVE_NRES(sp) ++ | ldl DISPATCH, L->glref // Setup pointer to dispatch table. ++ | stl CARG1, SAVE_L(sp) ++ | bis CARG2, zero, BASE ++ | ldi DISPATCH, GG_G2DISP(DISPATCH) ++ | stl CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | stl TMP1, SAVE_CFRAME(sp) ++ | stl sp, L->cframe // Add our C frame to cframe chain. ++ | ++ |3: // Entry point for vm_cpcall/vm_resume (BASE = base, PC = ftype). ++ | stl L, DISPATCH_GL(cur_L)(DISPATCH) ++ | ldl TMP2, L->base // TMP2 = old base (used in vmeta_call). ++ | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). ++ | ldl TMP1, L->top ++ | ifmovs TMP3, TOBIT ++ | addl PC, BASE, PC ++ | subl TMP1, BASE, NARGS8:RC ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | subl PC, TMP2, PC // PC = frame delta + frame type ++ | fcvtsd TOBIT, TOBIT ++ | li_vmstate INTERP ++ | ldi TISNIL, LJ_TNIL(zero) ++ | st_vmstate ++ | ++ |->vm_call_dispatch: ++ | // TMP2 = old base, BASE = new base, RC = nargs*8, PC = caller PC ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) ++ | checkfunc LFUNC:RB, ->vmeta_call ++ | ++ |->vm_call_dispatch_f: ++ | ins_call ++ | // BASE = new base, RB = func, RC = nargs*8, PC = caller PC ++ | ++ |->vm_cpcall: // Setup protected C frame, call C. ++ | // (lua_State *L, lua_CFunction func, void *ud, lua_CPFunction cp) ++ | saveregs ++ | bis CARG1, zero, L ++ | ldl TMP0, L:CARG1->stack ++ | stl CARG1, SAVE_L(sp) ++ | ldl TMP1, L->top ++ | ldl DISPATCH, L->glref // Setup pointer to dispatch table. ++ | stl CARG1, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | subl TMP0, TMP1, TMP0 // Compute -savestack(L, L->top). ++ | ldl TMP1, L->cframe ++ | ldi DISPATCH, GG_G2DISP(DISPATCH) ++ | stw TMP0, SAVE_NRES(sp) // Neg. delta means cframe w/o frame. ++ | stw zero, SAVE_ERRF(sp) // No error function. ++ | stl TMP1, SAVE_CFRAME(sp) ++ | stl sp, L->cframe // Add our C frame to cframe chain. ++ | stl L, DISPATCH_GL(cur_L)(DISPATCH) ++ | ldi CFUNCADDR, 0(CARG4) ++ | call r26, 0(CFUNCADDR) // (lua_State *L, lua_CFunction func, void *ud) ++ | bis CRET1, zero, BASE ++ | ldi PC, FRAME_CP(zero) ++ | bne CRET1, <3 // Else continue with the call. ++ | br zero, ->vm_leave_cp // No base? Just remove C frame. ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Metamethod handling ------------------------------------------------ ++ |//----------------------------------------------------------------------- ++ | ++ |// The lj_meta_* functions (except for lj_meta_cat) don't reallocate the ++ |// stack, so BASE doesn't need to be reloaded across these calls. ++ | ++ |//-- Continuation dispatch ---------------------------------------------- ++ | ++ |->cont_dispatch: ++ | // BASE = meta base, RA = resultptr, RD = (nresults+1) ++ | ldl TMP0, -32(BASE) // Continuation. ++ | bis BASE, zero, RB ++ | bis TMP2, zero, BASE // Restore caller BASE. ++ | ldl LFUNC:TMP1, FRAME_FUNC(TMP2) ++ |.if FFI ++ | cmpulti TMP0, 2, AT ++ |.endif ++ | ldl PC, -24(RB) // Restore PC from [cont|PC]. ++ | cleartp LFUNC:TMP1 ++ | addl RA, RD, TMP2 ++ | ldl TMP1, LFUNC:TMP1->pc ++ | stl TISNIL, -8(TMP2) // Ensure one valid arg. ++ |.if FFI ++ | bne AT, >1 ++ |.endif ++ | // BASE = base, RA = resultptr, RB = meta base ++ | ldl KBASE, PC2PROTO(k)(TMP1) ++ | jmp zero, 0(TMP0) // Jump to continuation. ++ | ++ |.if FFI ++ |1: ++ | ldi TMP1, -32(RB) ++ | bne TMP0, ->cont_ffi_callback // cont = 1: return from FFI callback. ++ | // cont = 0: tailcall from C function. ++ | subl TMP1, BASE, RC ++ | br zero, ->vm_call_tail ++ |.endif ++ | ++ |->cont_cat: // RA = resultptr, RB = meta base ++ | ldw INS, -4(PC) ++ | ldi CARG2, -32(RB) ++ | ldl CRET1, 0(RA) ++ | decode_RB MULTRES, INS ++ | decode_RA RA, INS ++ | addl BASE, MULTRES, TMP1 ++ | stl BASE, L->base ++ | subl CARG2, TMP1, CARG3 ++ | stl CRET1, 0(CARG2) ++ | cmpeq TMP1, CARG2, AT ++ | beq AT, ->BC_CAT_Z ++ | addl RA, BASE, RA ++ | stl CRET1, 0(RA) ++ | br zero, ->cont_nop ++ | ++ |//-- Table indexing metamethods ----------------------------------------- ++ | ++ |->vmeta_tgets1: ++ | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) ++ | ldi TMP0, LJ_TSTR(zero) ++ | settp STR:RC, TMP0 ++ | stl STR:RC, 0(CARG3) ++ | br zero, >1 ++ | ++ |->vmeta_tgets: ++ | ldi CARG2, DISPATCH_GL(tmptv)(DISPATCH) ++ | ldi TMP0, LJ_TTAB(zero) ++ | ldi TMP1, LJ_TSTR(zero) ++ | settp TAB:RB, TMP0 ++ | ldi CARG3, DISPATCH_GL(tmptv2)(DISPATCH) ++ | stl TAB:RB, 0(CARG2) ++ | settp STR:RC, TMP1 ++ | stl STR:RC, 0(CARG3) ++ | br zero, >1 ++ | ++ |->vmeta_tgetb: ++ | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp TMP0, TISNUM ++ | stl TMP0, 0(CARG3) ++ | ++ |->vmeta_tgetv: ++ |1: ++ | load_got lj_meta_tget ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_meta_tget // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | ldi TMP1, -FRAME_CONT(BASE) ++ | beq CRET1, >3 ++ | ldl TMP0, 0(CRET1) ++ | stl TMP0, 0(RA) ++ | ins_next ++ | ++ |3: // Call __index metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k ++ | ldl BASE, L->top ++ | stl PC, -24(BASE) // [cont|PC] ++ | subl BASE, TMP1, PC ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | cleartp LFUNC:RB ++ | ldi NARGS8:RC, 16(zero) ++ | br zero, ->vm_call_dispatch_f ++ | ++ |->vmeta_tgetr: ++ | load_got lj_tab_getinth ++ | call_intern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | bis TISNIL, zero, CARG2 ++ | beq CRET1, ->BC_TGETR_Z ++ | ldl CARG2, 0(CRET1) ++ | br zero, ->BC_TGETR_Z ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->vmeta_tsets1: ++ | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) ++ | ldi TMP0, LJ_TSTR(zero) ++ | settp STR:RC, TMP0 ++ | stl STR:RC, 0(CARG3) ++ | br zero, >1 ++ | ++ |->vmeta_tsets: ++ | ldi CARG2, DISPATCH_GL(tmptv)(DISPATCH) ++ | ldi TMP0, LJ_TTAB(zero) ++ | ldi TMP1, LJ_TSTR(zero) ++ | settp TAB:RB, TMP0 ++ | ldi CARG3, DISPATCH_GL(tmptv2)(DISPATCH) ++ | stl TAB:RB, 0(CARG2) ++ | settp STR:RC, TMP1 ++ | stl STR:RC, 0(CARG3) ++ | br zero, >1 ++ | ++ |->vmeta_tsetb: // TMP0 = index ++ | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp TMP0, TISNUM ++ | stl TMP0, 0(CARG3) ++ | ++ |->vmeta_tsetv: ++ |1: ++ | load_got lj_meta_tset ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_meta_tset // (lua_State *L, TValue *o, TValue *k) ++ | // Returns TValue * (finished) or NULL (metamethod). ++ | ldl TMP2, 0(RA) ++ | beq CRET1, >3 ++ | // NOBARRIER: lj_meta_tset ensures the table is not black. ++ | stl TMP2, 0(CRET1) ++ | ins_next ++ | ++ |3: // Call __newindex metamethod. ++ | // BASE = base, L->top = new base, stack = cont/func/t/k/(v) ++ | ldi TMP1, -FRAME_CONT(BASE) ++ | ldl BASE, L->top ++ | stl PC, -24(BASE) // [cont|PC] ++ | subl BASE, TMP1, PC ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | ldi NARGS8:RC, 24(zero) // 3 args for func(t, k, v) ++ | cleartp LFUNC:RB ++ | stl TMP2, 16(BASE) // Copy value to third argument. ++ | br zero, ->vm_call_dispatch_f ++ | ++ |->vmeta_tsetr: ++ | load_got lj_tab_setinth ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_tab_setinth // (lua_State *L, GCtab *t, int32_t key) ++ | // Returns TValue *. ++ | br zero, ->BC_TSETR_Z ++ | ++ |//-- Comparison metamethods --------------------------------------------- ++ | ++ |->vmeta_comp: ++ | // RA/RD point to o1/o2. ++ | bis RA, zero, CARG2 ++ | bis RD, zero, CARG3 ++ | load_got lj_meta_comp ++ | ldi PC, -4(PC) ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | decode_OP CARG4, INS ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_meta_comp // (lua_State *L, TValue *o1, *o2, int op) ++ | // Returns 0/1 or TValue * (metamethod). ++ |3: ++ | cmpulti CRET1, 2, TMP1 ++ | beq TMP1, ->vmeta_binop ++ | subw zero, CRET1, TMP2 ++ |4: ++ | ldhu RD, OFS_RD(PC) ++ | ldi PC, 4(PC) ++ | ldih TMP1, -0x2(zero) // -BCBIAS_J*4 ++ | s4addwi RD, 0, RD ++ | addw RD, TMP1, RD ++ | and RD, TMP2, RD ++ | addl PC, RD, PC ++ |->cont_nop: ++ | ins_next ++ | ++ |->cont_ra: // RA = resultptr ++ | ldbu TMP1, -4+OFS_RA(PC) ++ | ldl TMP2, 0(RA) ++ | s8addwi TMP1, 0, TMP1 ++ | addl TMP1, BASE, TMP1 ++ | stl TMP2, 0(TMP1) ++ | br zero, ->cont_nop ++ | ++ |->cont_condt: // RA = resultptr ++ | ldl TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | // cmpulti TMP0, LJ_TISTRUECOND, TMP1 ++ | ldi TMP1, LJ_TISTRUECOND(zero) ++ | cmpult TMP0, TMP1, TMP1 ++ | subw zero, TMP1, TMP2 // Branch if result is true. ++ | br zero, <4 ++ | ++ |->cont_condf: // RA = resultptr ++ | ldl TMP0, 0(RA) ++ | gettp TMP0, TMP0 ++ | // cmpulti TMP0, LJ_TISTRUECOND, TMP1 ++ | ldi TMP1, LJ_TISTRUECOND(zero) ++ | cmpult TMP0, TMP1, TMP1 ++ | subwi TMP1, 1, TMP2 // Branch if result is false. ++ | br zero, <4 ++ | ++ |->vmeta_equal: ++ | // CARG1/CARG2 point to o1/o2. TMP0 is set to 0/1. ++ | load_got lj_meta_equal ++ | cleartp LFUNC:CARG3, CARG2 ++ | cleartp LFUNC:CARG2, CARG1 ++ | bis TMP0, zero, CARG4 ++ | ldi PC, -4(PC) ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_meta_equal // (lua_State *L, GCobj *o1, *o2, int ne) ++ | // Returns 0/1 or TValue * (metamethod). ++ | br zero, <3 ++ | ++ |->vmeta_equal_cd: ++ |.if FFI ++ | load_got lj_meta_equal_cd ++ | bis INS, zero, CARG2 ++ | ldi PC, -4(PC) ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_meta_equal_cd // (lua_State *L, BCIns op) ++ | // Returns 0/1 or TValue * (metamethod). ++ | br zero, <3 ++ |.endif ++ | ++ |->vmeta_istype: ++ | load_got lj_meta_istype ++ | ldi PC, -4(PC) ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | zapi RA, 0xf0, CARG2 ++ | srli CARG2, 3, CARG2 ++ | zapi RD, 0xf0, CARG3 ++ | srli CARG3, 3, CARG3 ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_meta_istype // (lua_State *L, BCReg ra, BCReg tp) ++ | br zero, ->cont_nop ++ | ++ |//-- Arithmetic metamethods --------------------------------------------- ++ | ++ |->vmeta_unm: ++ | bis RB, zero, RC ++ | ++ |->vmeta_arith: ++ | load_got lj_meta_arith ++ | stl BASE, L->base ++ | bis RA, zero, CARG2 ++ | stl PC, SAVE_PC(sp) ++ | bis RB, zero, CARG3 ++ | bis RC, zero, CARG4 ++ | decode_OP CARG5, INS ++ | bis L, zero, CARG1 ++ | call_intern lj_meta_arith // (lua_State *L, TValue *ra,*rb,*rc, BCReg op) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | beq CRET1, ->cont_nop ++ | ++ | // Call metamethod for binary op. ++ |->vmeta_binop: ++ | // BASE = old base, CRET1 = new base, stack = cont/func/o1/o2 ++ | subl CRET1, BASE, TMP1 ++ | stl PC, -24(CRET1) // [cont|PC] ++ | bis BASE, zero, TMP2 ++ | ldi PC, FRAME_CONT(TMP1) ++ | bis CRET1, zero, BASE ++ | ldi NARGS8:RC, 16(zero) // 2 args for func(o1, o2). ++ | br zero, ->vm_call_dispatch ++ | ++ |->vmeta_len: ++ | // CARG2 already set by BC_LEN. ++#if LJ_52 ++ | bis CARG1, zero, MULTRES ++#endif ++ | load_got lj_meta_len ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | stl PC, SAVE_PC(sp) ++ | call_intern lj_meta_len // (lua_State *L, TValue *o) ++ | // Returns NULL (retry) or TValue * (metamethod base). ++#if LJ_52 ++ | bne CRET1, ->vmeta_binop // Binop call for compatibility. ++ | bis MULTRES, zero, CARG1 ++ | br zero, ->BC_LEN_Z ++#else ++ | br zero, ->vmeta_binop // Binop call for compatibility. ++#endif ++ | ++ |//-- Call metamethod ---------------------------------------------------- ++ | ++ |->vmeta_call: // Resolve and call __call metamethod. ++ | // TMP2 = old base, BASE = new base, RC = nargs*8 ++ | load_got lj_meta_call ++ | stl TMP2, L->base // This is the callers base! ++ | ldi CARG2, -16(BASE) ++ | stl PC, SAVE_PC(sp) ++ | addl BASE, RC, CARG3 ++ | bis L, zero, CARG1 ++ | bis NARGS8:RC, zero, MULTRES ++ | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) // Guaranteed to be a function here. ++ | ldi NARGS8:RC, 8(MULTRES) // Got one more argument now. ++ | cleartp LFUNC:RB ++ | ins_call ++ | ++ |->vmeta_callt: // Resolve __call for BC_CALLT. ++ | // BASE = old base, RA = new base, RC = nargs*8 ++ | load_got lj_meta_call ++ | stl BASE, L->base ++ | subli RA, 16, CARG2 ++ | stl PC, SAVE_PC(sp) ++ | addl RA, RC, CARG3 ++ | bis L, zero, CARG1 ++ | bis NARGS8:RC, zero, MULTRES ++ | call_intern lj_meta_call // (lua_State *L, TValue *func, TValue *top) ++ | ldl RB, FRAME_FUNC(RA) // Guaranteed to be a function here. ++ | ldl TMP1, FRAME_PC(BASE) ++ | addli MULTRES, 8, NARGS8:RC // Got one more argument now. ++ | cleartp LFUNC:CARG3, RB ++ | br zero, ->BC_CALLT_Z ++ | ++ |//-- Argument coercion for 'for' statement ------------------------------ ++ | ++ |->vmeta_for: ++ | load_got lj_meta_for ++ | stl BASE, L->base ++ | bis RA, zero, CARG2 ++ | stl PC, SAVE_PC(sp) ++ | bis INS, zero, MULTRES ++ | bis L, zero, CARG1 ++ | call_intern lj_meta_for // (lua_State *L, TValue *base) ++ |.if JIT ++ | decode_OP TMP0, MULTRES ++ | ldi TMP1, BC_JFORI(zero) ++ |.endif ++ | decode_RA RA, MULTRES ++ | decode_RD RD, MULTRES ++ |.if JIT ++ | cmpeq TMP0, TMP1, AT ++ | bne AT, =>BC_JFORI ++ |.endif ++ | br zero, =>BC_FORI ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Fast functions ----------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro .ffunc, name ++ |->ff_ .. name: ++ |.endmacro ++ | ++ |.macro .ffunc_1, name ++ |->ff_ .. name: ++ | ldl CARG1, 0(BASE) ++ | beq NARGS8:RC, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_2, name ++ |->ff_ .. name: ++ | cmpulti NARGS8:RC, 16, TMP0 ++ | ldl CARG1, 0(BASE) ++ | ldl CARG2, 8(BASE) ++ | bne TMP0, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_n, name ++ |->ff_ .. name: ++ | ldl CARG1, 0(BASE) ++ | fldd FCARG1, 0(BASE) ++ | beq NARGS8:RC, ->fff_fallback ++ | checknum CARG1, ->fff_fallback ++ |.endmacro ++ | ++ |.macro .ffunc_nn, name ++ |->ff_ .. name: ++ | ldl CARG1, 0(BASE) ++ | ldl CARG2, 8(BASE) ++ | cmpulti NARGS8:RC, 16, TMP0 ++ | gettp TMP1, CARG1 ++ | bne TMP0, ->fff_fallback ++ | gettp TMP2, CARG2 ++ | // cmpulti TMP1, LJ_TISNUM, TMP1 ++ | // cmpulti TMP2, LJ_TISNUM, TMP2 ++ | ldi AT, LJ_TISNUM(zero) ++ | cmpult TMP1, AT, TMP1 ++ | cmpult TMP2, AT, TMP2 ++ | fldd FCARG1, 0(BASE) ++ | and TMP1, TMP2, TMP1 ++ | fldd FCARG2, 8(BASE) ++ | beq TMP1, ->fff_fallback ++ |.endmacro ++ | ++ |// Inlined GC threshold check. ++ |.macro ffgccheck ++ | ldl TMP0, DISPATCH_GL(gc.total)(DISPATCH) ++ | ldl TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) ++ | cmpult TMP0, TMP1, AT ++ | bne AT, >1 ++ | br ra, ->fff_gcstep ++ |1: ++ |.endmacro ++ | ++ |//-- Base library: checks ----------------------------------------------- ++ |.ffunc_1 assert ++ | gettp TMP1, CARG1 ++ | // cmpulti TMP1, LJ_TISTRUECOND, TMP1 ++ | ldi AT, LJ_TISTRUECOND(zero) ++ | cmpult TMP1, AT, TMP1 ++ | ldi RA, -16(BASE) ++ | beq TMP1, ->fff_fallback ++ | ldl PC, FRAME_PC(BASE) ++ | addwi NARGS8:RC, 8, RD // Compute (nresults+1)*8. ++ | ldi TMP1, 8(BASE) ++ | addl RA, RD, TMP2 ++ | stl CARG1, 0(RA) ++ | cmpeq BASE, TMP2, AT ++ | bne AT, ->fff_res // Done if exactly 1 argument. ++ |1: ++ | ldl TMP0, 0(TMP1) ++ | stl TMP0, -16(TMP1) ++ | bis TMP1, zero, AT ++ | ldi TMP1, 8(TMP1) ++ | cmpeq AT, TMP2, AT ++ | beq AT, <1 ++ | br zero, ->fff_res ++ | ++ |.ffunc_1 type ++ | gettp TMP0, CARG1 ++ | ldi TMP1, ~LJ_TISNUM(zero) ++ | addwi TMP1, 0, TMP1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult TISNUM, TMP0, TMP2 ++ | ornot zero, TMP0, AT // ~TMP0 ++ | seleq TMP2, TMP1, AT, AT ++ | s8addl AT, CFUNC:RB, AT ++ | ldl CARG1, CFUNC:AT->upvalue ++ | br zero, ->fff_restv ++ | ++ |//-- Base library: getters and setters --------------------------------- ++ | ++ |.ffunc_1 getmetatable ++ | gettp TMP2, CARG1 ++ | ldi TMP0, -LJ_TTAB(TMP2) ++ | ldi TMP1, -LJ_TUDATA(TMP2) ++ | seleq TMP0, zero, TMP1, TMP0 ++ | cleartp TAB:CARG1 ++ | bne TMP0, >6 ++ |1: // Field metatable must be at same offset for GCtab and GCudata! ++ | ldl TAB:RB, TAB:CARG1->metatable ++ |2: ++ | ldl STR:RC, DISPATCH_GL(gcroot[GCROOT_MMNAME+MM_metatable])(DISPATCH) ++ | ldi CARG1, LJ_TNIL(zero) ++ | beq TAB:RB, ->fff_restv ++ | ldw TMP0, TAB:RB->hmask ++ | ldw TMP1, STR:RC->hash ++ | ldl NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP0, TMP1 // idx = str->hash & tab->hmask ++ | slli TMP1, 5, TMP0 ++ | slli TMP1, 3, TMP1 ++ | subl TMP0, TMP1, TMP1 ++ | addl NODE:TMP2, TMP1, NODE:TMP2 // node = tab->node + (idx*32-idx*8) ++ | ldi CARG4, LJ_TSTR(zero) ++ | addwi CARG4, 0, CARG4 ++ | settp STR:RC, CARG4 // Tagged key to look for. ++ |3: // Rearranged logic, because we expect _not_ to find the key. ++ | ldl TMP0, NODE:TMP2->key ++ | ldl CARG1, NODE:TMP2->val ++ | ldl NODE:TMP2, NODE:TMP2->next ++ | ldi TMP3, LJ_TTAB(zero) ++ | cmpeq RC, TMP0, AT ++ | bne AT, >5 ++ | bne NODE:TMP2, <3 ++ |4: ++ | bis RB, zero, CARG1 ++ | settp CARG1, TMP3 ++ | br zero, ->fff_restv // Not found, keep default result. ++ |5: ++ | cmpeq CARG1, TISNIL, AT ++ | beq AT, ->fff_restv ++ | br zero, <4 // Ditto for nil value. ++ | ++ |6: ++ | // cmpulti TMP2, LJ_TISNUM, AT ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult TMP2, TISNUM, AT ++ | selne AT, TISNUM, TMP2, TMP2 ++ | slli TMP2, 3, TMP2 ++ | subl DISPATCH, TMP2, TMP0 ++ | ldl TAB:RB, DISPATCH_GL(gcroot[GCROOT_BASEMT])-8(TMP0) ++ | br zero, <2 ++ | ++ |.ffunc_2 setmetatable ++ | // Fast path: no mt for table yet and not clearing the mt. ++ | checktp TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | gettp TMP3, CARG2 ++ | ldl TAB:TMP0, TAB:TMP1->metatable ++ | ldbu TMP2, TAB:TMP1->marked ++ | ldi AT, -LJ_TTAB(TMP3) ++ | cleartp TAB:CARG2 ++ | bis AT, TAB:TMP0, AT ++ | bne AT, ->fff_fallback ++ | andi TMP2, LJ_GC_BLACK, AT // isblack(table) ++ | stl TAB:CARG2, TAB:TMP1->metatable ++ | beq AT, ->fff_restv ++ | barrierback TAB:TMP1, TMP2, TMP0, ->fff_restv ++ | ++ |.ffunc rawget ++ | ldl CARG2, 0(BASE) ++ | cmpulti NARGS8:RC, 16, TMP0 ++ | load_got lj_tab_get ++ | gettp TMP1, CARG2 ++ | cleartp CARG2 ++ | ldi TMP1, -LJ_TTAB(TMP1) ++ | bis TMP0, TMP1, TMP0 ++ | ldi CARG3, 8(BASE) ++ | bne TMP0, ->fff_fallback ++ | bis L, zero, CARG1 ++ | call_intern lj_tab_get // (lua_State *L, GCtab *t, cTValue *key) ++ | // Returns cTValue *. ++ | ldl CARG1, 0(CRET1) ++ | br zero, ->fff_restv ++ | ++ |//-- Base library: conversions ------------------------------------------ ++ | ++ |.ffunc tonumber ++ | // Only handles the number case inline (without a base argument). ++ | ldl CARG1, 0(BASE) ++ | xori NARGS8:RC, 8, TMP0 // Exactly one number argument. ++ | gettp TMP1, CARG1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult TISNUM, TMP1, TMP1 ++ | bis TMP0, TMP1, TMP0 ++ | bne TMP0, ->fff_fallback // No args or CARG1 is not number ++ | br zero, ->fff_restv ++ | ++ |.ffunc_1 tostring ++ | // Only handles the string or number case inline. ++ | gettp TMP0, CARG1 ++ | ldi AT, -LJ_TSTR(TMP0) ++ | // A __tostring method in the string base metatable is ignored. ++ | ldl TMP1, DISPATCH_GL(gcroot[GCROOT_BASEMT_NUM])(DISPATCH) ++ | beq AT, ->fff_restv // String key? ++ | // Handle numbers inline, unless a number base metatable is present. ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult TISNUM, TMP0, TMP0 ++ | stl BASE, L->base // Add frame since C call can throw. ++ | bis TMP0, TMP1, TMP0 ++ | bne TMP0, ->fff_fallback ++ | stl PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | ffgccheck ++ | load_got lj_strfmt_number ++ | bis L, zero, CARG1 ++ | bis BASE, zero, CARG2 ++ | call_intern lj_strfmt_number // (lua_State *L, cTValue *o) ++ | // Returns GCstr *. ++ | ldi AT, LJ_TSTR(zero) ++ | settp CRET1, AT ++ | bis CRET1, zero, CARG1 ++ | br zero, ->fff_restv ++ | ++ |//-- Base library: iterators ------------------------------------------- ++ | ++ |.ffunc_1 next ++ | checktp CARG2, CARG1, -LJ_TTAB, ->fff_fallback ++ | addl BASE, NARGS8:RC, TMP2 ++ | ldl PC, FRAME_PC(BASE) ++ | stl TISNIL, 0(TMP2) // Set missing 2nd arg to nil. ++ | load_got lj_tab_next ++ | stl BASE, L->base // Add frame since C call can throw. ++ | stl BASE, L->top // Dummy frame length is ok. ++ | ldi CARG3, 8(BASE) ++ | stl PC, SAVE_PC(sp) ++ | bis L, zero, CARG1 ++ | call_intern lj_tab_next // (GCtab *t, cTValue *key, TValue *o) ++ | // Returns 1=found, 0=end, -1=error. ++ | bis TISNIL, zero, CARG1 ++ | beq CRET1, ->fff_restv // End of traversal: return nil. ++ | ldl TMP0, 8(BASE) ++ | ldi RA, -16(BASE) ++ | ldl TMP2, 16(BASE) ++ | stl TMP0, 0(RA) ++ | stl TMP2, 8(RA) ++ | ldi RD, (2+1)*8(zero) ++ | br zero, ->fff_res ++ | ++ |.ffunc_1 pairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ldl PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ldl TAB:TMP2, TAB:TMP1->metatable ++ | ldl TMP0, CFUNC:RB->upvalue[0] ++ | ldi RA, -16(BASE) ++ | bne TAB:TMP2, ->fff_fallback ++#else ++ | ldl TMP0, CFUNC:RB->upvalue[0] ++ | ldi RA, -16(BASE) ++#endif ++ | stl TISNIL, 0(BASE) ++ | stl CARG1, -8(BASE) ++ | stl TMP0, 0(RA) ++ | ldi RD, (3+1)*8(zero) ++ | br zero, ->fff_res ++ | ++ |.ffunc_2 ipairs_aux ++ | checktab CARG1, ->fff_fallback ++ | checkint CARG2, ->fff_fallback ++ | ldw TMP0, TAB:CARG1->asize ++ | ldl TMP1, TAB:CARG1->array ++ | ldl PC, FRAME_PC(BASE) ++ | addwi CARG2, 0, TMP2 ++ | addwi TMP2, 1, TMP2 ++ | cmpult TMP2, TMP0, AT ++ | ldi RA, -16(BASE) ++ | zapi TMP2, 0xf0, TMP0 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp TMP0, TISNUM ++ | stl TMP0, 0(RA) ++ | beq AT, >2 // Not in array part? ++ | s8addl TMP2, TMP1, TMP3 ++ | ldl TMP1, 0(TMP3) ++ |1: ++ | ldi RD, (0+1)*8(zero) ++ | cmpeq TMP1, TISNIL, AT ++ | bne AT, ->fff_res // End of iteration, return 0 results. ++ | stl TMP1, -8(BASE) ++ | ldi RD, (2+1)*8(zero) ++ | br zero, ->fff_res ++ |2: // Check for empty hash part first. Otherwise call C function. ++ | ldw TMP0, TAB:CARG1->hmask ++ | ldi RD, (0+1)*8(zero) ++ | load_got lj_tab_getinth ++ | beq TMP0, ->fff_res ++ | bis TMP2, zero, CARG2 ++ | call_intern lj_tab_getinth // (GCtab *t, int32_t key) ++ | // Returns cTValue * or NULL. ++ | ldi RD, (0+1)*8(zero) ++ | beq CRET1, ->fff_res ++ | ldl TMP1, 0(CRET1) ++ | br zero, <1 ++ | ++ |.ffunc_1 ipairs ++ | checktp TAB:TMP1, CARG1, -LJ_TTAB, ->fff_fallback ++ | ldl PC, FRAME_PC(BASE) ++#if LJ_52 ++ | ldl TAB:TMP2, TAB:TMP1->metatable ++#endif ++ | ldl CFUNC:TMP0, CFUNC:RB->upvalue[0] ++ | ldi RA, -16(BASE) ++#if LJ_52 ++ | bne TAB:TMP2, ->fff_fallback ++#endif ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | slli TISNUM, 47, TMP1 ++ | stl CARG1, -8(BASE) ++ | stl TMP1, 0(BASE) ++ | stl CFUNC:TMP0, 0(RA) ++ | ldi RD, (3+1)*8(zero) ++ | br zero, ->fff_res ++ | ++ |//-- Base library: catch errors ---------------------------------------- ++ | ++ |.ffunc pcall ++ | ldi NARGS8:RC, -8(NARGS8:RC) ++ | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) ++ | bis BASE, zero, TMP2 ++ | cmplt NARGS8:RC, zero, AT ++ | bne AT, ->fff_fallback ++ | ldi BASE, 16(BASE) ++ | // Remember active hook before pcall. ++ | zapi TMP3, 0xf0, TMP3 ++ | srli TMP3, HOOK_ACTIVE_SHIFT, TMP3 ++ | andi TMP3, 1, TMP3 ++ | ldi PC, 16+FRAME_PCALL(TMP3) ++ | beq NARGS8:RC, ->vm_call_dispatch ++ |1: ++ | addl BASE, NARGS8:RC, TMP0 ++ |2: ++ | ldl TMP1, -16(TMP0) ++ | stl TMP1, -8(TMP0) ++ | ldi TMP0, -8(TMP0) ++ | cmpeq TMP0, BASE, AT ++ | beq AT, <2 ++ | br zero, ->vm_call_dispatch ++ | ++ |.ffunc xpcall ++ | ldi NARGS8:TMP0, -16(NARGS8:RC) ++ | ldl CARG1, 0(BASE) ++ | ldl CARG2, 8(BASE) ++ | ldbu TMP1, DISPATCH_GL(hookmask)(DISPATCH) ++ | cmplt NARGS8:TMP0, zero, AT ++ | bne AT, ->fff_fallback ++ | gettp TMP2, CARG2 ++ | ldi TMP2, -LJ_TFUNC(TMP2) ++ | bne TMP2, ->fff_fallback // Traceback must be a function. ++ | bis BASE, zero, TMP2 ++ | bis NARGS8:TMP0, zero, NARGS8:RC ++ | ldi BASE, 24(BASE) ++ | // Remember active hook before pcall. ++ | zapi TMP3, 0xf0, TMP3 ++ | srli TMP3, HOOK_ACTIVE_SHIFT, TMP3 ++ | stl CARG2, 0(TMP2) // Swap function and traceback. ++ | andi TMP3, 1, TMP3 ++ | stl CARG1, 8(TMP2) ++ | ldi PC, 24+FRAME_PCALL(TMP3) ++ | beq NARGS8:RC, ->vm_call_dispatch ++ | br zero, <1 ++ | ++ |//-- Coroutine library -------------------------------------------------- ++ | ++ |.macro coroutine_resume_wrap, resume ++ |.if resume ++ |.ffunc_1 coroutine_resume ++ | checktp CARG1, CARG1, -LJ_TTHREAD, ->fff_fallback ++ |.else ++ |.ffunc coroutine_wrap_aux ++ | ldl L:CARG1, CFUNC:RB->upvalue[0].gcr ++ | cleartp L:CARG1 ++ |.endif ++ | ldbu TMP0, L:CARG1->status ++ | ldl TMP1, L:CARG1->cframe ++ | ldl CARG2, L:CARG1->top ++ | ldl TMP2, L:CARG1->base ++ | subwi TMP0, LUA_YIELD, CARG4 ++ | addl CARG2, TMP0, CARG3 ++ | ldi TMP3, 8(CARG2) ++ | seleq CARG4, CARG2, TMP3, CARG2 ++ | cmplt zero, CARG4, AT ++ | bne AT, ->fff_fallback // st > LUA_YIELD? ++ | xor TMP2, CARG3, TMP2 ++ | bis TMP2, TMP0, CARG4 ++ | bne TMP1, ->fff_fallback // cframe != 0? ++ | ldl TMP0, L:CARG1->maxstack ++ | ldl PC, FRAME_PC(BASE) ++ | beq CARG4, ->fff_fallback // base == top && st == 0? ++ | addl CARG2, NARGS8:RC, TMP2 ++ | cmpult TMP0, TMP2, CARG4 ++ | stl BASE, L->base ++ | stl PC, SAVE_PC(sp) ++ | bne CARG4, ->fff_fallback // Stack overflow? ++ |1: ++ |.if resume ++ | ldi BASE, 8(BASE) // Keep resumed thread in stack for GC. ++ | ldi NARGS8:RC, -8(NARGS8:RC) ++ | ldi TMP2, -8(TMP2) ++ |.endif ++ | stl TMP2, L:CARG1->top ++ | stl BASE, L->top ++ | addl BASE, NARGS8:RC, TMP1 ++ | bis CARG2, zero, CARG3 ++ |2: // Move args to coroutine. ++ | ldl TMP0, 0(BASE) ++ | cmpult BASE, TMP1, TMP3 ++ | ldi BASE, 8(BASE) ++ | beq TMP3, >3 ++ | stl TMP0, 0(CARG3) ++ | ldi CARG3, 8(CARG3) ++ | br zero, <2 ++ |3: ++ | bis L:CARG1, zero, L:RA ++ | br ra, ->vm_resume // (lua_State *L, TValue *base, 0, 0) ++ | // Returns thread status. ++ |4: ++ | ldl TMP2, L:RA->base ++ | // cmpulti CRET1, LUA_YIELD+1, TMP1 ++ | ldi TMP1, LUA_YIELD+1(zero) ++ | cmpult CRET1, TMP1, TMP1 ++ | ldl TMP3, L:RA->top ++ | li_vmstate INTERP ++ | ldl BASE, L->base ++ | stl L, DISPATCH_GL(cur_L)(DISPATCH) ++ | st_vmstate ++ | subl TMP3, TMP2, RD ++ | beq TMP1, >8 ++ | ldl TMP0, L->maxstack ++ | addl BASE, RD, TMP1 ++ | beq RD, >6 // No results? ++ | addl TMP2, RD, TMP3 ++ | cmpult TMP0, TMP1, AT ++ | bne AT, >9 // Need to grow stack? ++ | stl TMP2, L:RA->top // Clear coroutine stack. ++ | bis BASE, zero, TMP1 ++ |5: // Move results from coroutine. ++ | ldl TMP0, 0(TMP2) ++ | ldi TMP2, 8(TMP2) ++ | stl TMP0, 0(TMP1) ++ | ldi TMP1, 8(TMP1) ++ | cmpult TMP2, TMP3, AT ++ | bne AT, <5 ++ |6: ++ |.if resume ++ | mov_true TMP1 ++ | ldi RD, 16(RD) ++ |7: ++ | stl TMP1, -8(BASE) // Prepend true/false to results. ++ | ldi RA, -8(BASE) ++ |.else ++ | bis BASE, zero, RA ++ | ldi RD, 8(RD) ++ |.endif ++ | andi PC, FRAME_TYPE, TMP0 ++ | stl PC, SAVE_PC(sp) ++ | bis RD, zero, MULTRES ++ | beq TMP0, ->BC_RET_Z ++ | br zero, ->vm_return ++ | ++ |8: // Coroutine returned with error (at co->top-1). ++ |.if resume ++ | ldi TMP3, -8(TMP3) ++ | mov_false TMP1 ++ | addwi zero, (2+1)*8, RD ++ | ldl TMP0, 0(TMP3) ++ | stl TMP3, L:RA->top // Remove error from coroutine stack. ++ | stl TMP0, 0(BASE) // Copy error message. ++ | br zero, <7 ++ |.else ++ | load_got lj_ffh_coroutine_wrap_err ++ | bis L, zero, CARG1 ++ | bis L:RA, zero, CARG2 ++ | call_intern lj_ffh_coroutine_wrap_err // (lua_State *L, lua_State *co) ++ |.endif ++ | ++ |9: // Handle stack expansion on return from yield. ++ | load_got lj_state_growstack ++ | bis L, zero, CARG1 ++ | zapi RD, 0xf0, CARG2 ++ | srli CARG2, 3, CARG2 ++ | call_intern lj_state_growstack // (lua_State *L, int n) ++ | ldi CRET1, 0(zero) ++ | br zero, <4 ++ |.endmacro ++ | ++ | coroutine_resume_wrap 1 // coroutine.resume ++ | coroutine_resume_wrap 0 // coroutine.wrap ++ | ++ |.ffunc coroutine_yield ++ | ldl TMP0, L->cframe ++ | addl BASE, NARGS8:RC, TMP1 ++ | addwi zero, LUA_YIELD, CRET1 ++ | stl BASE, L->base ++ | andi TMP0, CFRAME_RESUME, TMP0 ++ | stl TMP1, L->top ++ | beq TMP0, ->fff_fallback ++ | stl zero, L->cframe ++ | stb CRET1, L->status ++ | br zero, ->vm_leave_unw ++ | ++ |//-- Math library ------------------------------------------------------- ++ | ++ |.macro math_round, func ++ |->ff_math_ .. func: ++ | ldl CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | beq NARGS8:RC, ->fff_fallback ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP0, TISNUM, AT ++ | bne AT, ->fff_restv ++ | fldd FCARG1, 0(BASE) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult TMP0, TISNUM, AT ++ | beq AT, ->fff_fallback ++ | br ra, ->vm_ .. func ++ | br zero, ->fff_resn ++ |.endmacro ++ | ++ | math_round floor ++ | math_round ceil ++ | ++ |.ffunc_1 math_abs ++ | gettp CARG2, CARG1 ++ | ldi TMP2, -LJ_TISNUM(CARG2) ++ | addwi CARG1, 0, TMP1 ++ | bne TMP2, >1 ++ | addwi TMP1, 0, TMP0 ++ | srai TMP0, 31, TMP0 // Extract sign. int ++ | xor TMP1, TMP0, TMP1 ++ | subl TMP1, TMP0, CARG1 ++ | slli CARG1, 32, TMP3 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp CARG1, TISNUM ++ | cmplt TMP3, zero, AT ++ | beq AT, ->fff_restv ++ | ldi CARG1, 0x41e(zero) // 2^31 as a double. ++ | slli CARG1, 4, CARG1 // 0x41e0 ++ | addwi CARG1, 0, CARG1 ++ | slli CARG1, 48, CARG1 ++ | br zero, ->fff_restv ++ |1: ++ | // cmpulti CARG2, LJ_TISNUM, TMP2 ++ | ldi TMP2, LJ_TISNUM(zero) ++ | cmpult CARG2, TMP2, TMP2 ++ | .DEXTM CARG1, CARG1, 0, 63 ++ | beq TMP2, ->fff_fallback // int ++ |// fallthrough ++ | ++ |->fff_restv: ++ | // CARG1 = TValue result. ++ | ldl PC, FRAME_PC(BASE) ++ | ldi RA, -16(BASE) ++ | stl CARG1, -16(BASE) ++ |->fff_res1: ++ | // RA = results, PC = return. ++ | ldi RD, (1+1)*8(zero) ++ |->fff_res: ++ | // RA = results, RD = (nresults+1)*8, PC = return. ++ | andi PC, FRAME_TYPE, TMP0 ++ | bis RD, zero, MULTRES ++ | ldi RA, -16(BASE) ++ | bne TMP0, ->vm_return ++ | ldw INS, -4(PC) ++ | decode_RB RB, INS ++ |5: ++ | cmpult RD, RB, TMP2 ++ | decode_RA TMP0, INS ++ | bne TMP2, >6 // More results expected? ++ | // Adjust BASE. KBASE is assumed to be set for the calling frame. ++ | subl RA, TMP0, BASE ++ | ins_next ++ | ++ |6: // Fill up results with nil. ++ | addl RA, RD, TMP1 ++ | ldi RD, 8(RD) ++ | stl TISNIL, -8(TMP1) ++ | br zero, <5 ++ | ++ |.macro math_extern, func ++ | .ffunc_n math_ .. func ++ | load_got func ++ | call_extern ++ | br zero, ->fff_resn ++ |.endmacro ++ | ++ |.macro math_extern2, func ++ | .ffunc_nn math_ .. func ++ | load_got func ++ | call_extern ++ | br zero, ->fff_resn ++ |.endmacro ++ | ++ |.ffunc_n math_sqrt ++ | fsqrtd FCARG1, FCRET1 ++ |->fff_resn: ++ | ldl PC, FRAME_PC(BASE) ++ | fstd FCRET1, -16(BASE) ++ | br zero, ->fff_res1 ++ | ++ |.ffunc math_log ++ | ldi TMP1, 8(zero) ++ | ldl CARG1, 0(BASE) ++ | fldd FCARG1, 0(BASE) ++ | cmpeq NARGS8:RC, TMP1, AT ++ | beq AT, ->fff_fallback // Need exactly 1 argument. ++ | checknum CARG1, ->fff_fallback ++ | load_got log ++ | call_extern ++ | br zero, ->fff_resn ++ | ++ | math_extern log10 ++ | math_extern exp ++ | math_extern sin ++ | math_extern cos ++ | math_extern tan ++ | math_extern asin ++ | math_extern acos ++ | math_extern atan ++ | math_extern sinh ++ | math_extern cosh ++ | math_extern tanh ++ | math_extern2 pow ++ | math_extern2 atan2 ++ | math_extern2 fmod ++ | ++ |.ffunc_2 math_ldexp ++ | checknum CARG1, ->fff_fallback ++ | checkint CARG2, ->fff_fallback ++ | load_got ldexp ++ | fldd FCARG1, 0(BASE) ++ | ldw CARG2, 8(BASE) // (double x, int exp) ++ | call_extern ++ | br zero, ->fff_resn ++ | ++ |.ffunc_n math_frexp ++ | load_got frexp ++ | ldl PC, FRAME_PC(BASE) ++ | ldi CARG2, DISPATCH_GL(tmptv)(DISPATCH) ++ | call_extern ++ | ldw TMP1, DISPATCH_GL(tmptv)(DISPATCH) ++ | ldi RA, -16(BASE) ++ | ifmovs TMP1, FCARG2 ++ | fstd FCRET1, 0(RA) ++ | fcvtwl FCARG2, FCARG2 ++ | fcvtld FCARG2, FCARG2 ++ | fstd FCARG2, 8(RA) ++ | ldi RD, (2+1)*8(zero) ++ | br zero, ->fff_res ++ | ++ |.ffunc_n math_modf ++ | load_got modf ++ | ldl PC, FRAME_PC(BASE) ++ | ldi CARG2, -16(BASE) ++ | ldi RA, -16(BASE) ++ | call_extern ++ | fstd FCRET1, -8(BASE) ++ | ldi RD, (2+1)*8(zero) ++ | br zero, ->fff_res ++ | ++ | ++ |.macro math_minmax, name, intins, fpins ++ | .ffunc_1 name ++ | addl BASE, NARGS8:RC, TMP3 ++ | addli BASE, 8, TMP2 ++ | checkint CARG1, >5 ++ |1: // Handle integers. ++ | ldl CARG2, 0(TMP2) ++ | cmpeq TMP2, TMP3, AT ++ | bne AT, ->fff_restv ++ | addwi CARG1, 0, CARG1 ++ | checkint CARG2, >3 ++ | addwi CARG2, 0, CARG2 ++ | cmplt CARG1, CARG2, AT ++ | intins AT, CARG2, CARG1, CARG1 ++ | ldi TMP2, 8(TMP2) ++ | zapi CARG1, 0xf0, CARG1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp CARG1, TISNUM ++ | br zero, <1 ++ | ++ |3: // Convert intermediate result to number and continue with number loop. ++ | ifmovs CARG1, FCRET1 ++ | checknum CARG2, ->fff_fallback ++ | fcvtwl FCRET1, FCRET1 ++ | fcvtld FCRET1, FCRET1 ++ | fldd FCARG1, 0(TMP2) ++ | br zero, >7 ++ | ++ |5: ++ | fldd FCRET1, 0(BASE) ++ | ldl CARG2, 0(TMP2) ++ | checknum CARG1, ->fff_fallback ++ |6: // Handle numbers. ++ | cmpeq TMP2, TMP3, AT ++ | bne AT, ->fff_resn ++ | fldd FCARG1, 0(TMP2) ++ | checknum CARG2, >8 ++ |7: ++ | fcmplt FCRET1, FCARG1, FAT ++ | fpins FAT, FCARG1, FCRET1, FCRET1 ++ | ldi TMP2, 8(TMP2) ++ | br zero, <6 ++ | ++ |8: // Convert integer to number and continue with number loop. ++ | flds FCARG1, 0(TMP2) ++ | checkint CARG2, ->fff_fallback ++ | fcvtwl FCARG1, FCARG1 ++ | fcvtld FCARG1, FCARG1 ++ | br zero, <7 ++ | ++ |.endmacro ++ | ++ | math_minmax math_min, seleq, fseleq ++ | math_minmax math_max, selne, fselne ++ | ++ |//-- String library ----------------------------------------------------- ++ | ++ |.ffunc string_byte // Only handle the 1-arg case here. ++ | ldl CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori NARGS8:RC, 8, TMP1 ++ | ldi TMP0, -LJ_TSTR(TMP0) ++ | bis TMP1, TMP0, TMP1 ++ | cleartp STR:CARG1 ++ | bne TMP1, ->fff_fallback // Need exactly 1 string argument. ++ | ldw TMP0, STR:CARG1->len ++ | ldl PC, FRAME_PC(BASE) ++ | cmpult zero, TMP0, RD ++ | ldbu TMP2, STR:CARG1[1] // Access is always ok (NUL at end). ++ | addwi RD, 1, RD ++ | s8addwi RD, 0, RD // RD = ((str->len != 0)+1)*8 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp TMP2, TISNUM ++ | stl TMP2, -16(BASE) ++ | br zero, ->fff_res ++ | ++ |.ffunc string_char // Only handle the 1-arg case here. ++ | ffgccheck ++ | ldl CARG1, 0(BASE) ++ | gettp TMP0, CARG1 ++ | xori NARGS8:RC, 8, TMP1 // Need exactly 1 argument. ++ | ldi TMP0, -LJ_TISNUM(TMP0) // Integer. ++ | ldi TMP2, 255(zero) ++ | addwi CARG1, 0, CARG1 ++ | bis TMP1, TMP0, TMP1 ++ | cmpult TMP2, CARG1, TMP2 // !(255 < n). ++ | bis TMP1, TMP2, TMP1 ++ | ldi CARG3, 1(zero) ++ | bne TMP1, ->fff_fallback ++ | ldi CARG2, TMPD_OFS(sp) ++ | stb CARG1, TMPD(sp) ++ |->fff_newstr: ++ | load_got lj_str_new ++ | stl BASE, L->base ++ | stl PC, SAVE_PC(sp) ++ | bis L, zero, CARG1 ++ | call_intern lj_str_new // (lua_State *L, char *str, size_t l) ++ | // Returns GCstr *. ++ | ldl BASE, L->base ++ |->fff_resstr: ++ | ldi AT, LJ_TSTR(zero) ++ | settp CRET1, AT ++ | bis CRET1, zero, CARG1 ++ | br zero, ->fff_restv ++ | ++ |.ffunc string_sub ++ | ffgccheck ++ | ldl CARG1, 0(BASE) ++ | ldl CARG2, 8(BASE) ++ | ldl CARG3, 16(BASE) ++ | subwi NARGS8:RC, 16, TMP0 ++ | gettp TMP1, CARG1 ++ | cmplt TMP0, zero, AT ++ | bne AT, ->fff_fallback ++ | cleartp STR:CARG1, CARG1 ++ | subwi zero, 1, CARG4 ++ | beq TMP0, >1 ++ | addwi CARG3, 0, CARG4 ++ | checkint CARG3, ->fff_fallback ++ |1: ++ | checkint CARG2, ->fff_fallback ++ | ldi TMP0, -LJ_TSTR(TMP1) ++ | addwi CARG2, 0, CARG3 ++ | bne TMP0, ->fff_fallback ++ | ldw CARG2, STR:CARG1->len ++ | // STR:CARG1 = str, CARG2 = str->len, CARG3 = start, CARG4 = end ++ | addwi CARG2, 1, TMP0 ++ | cmplt CARG4, zero, TMP3 ++ | addw CARG4, TMP0, TMP2 ++ | cmplt CARG3, zero, TMP1 ++ | selne TMP3, TMP2, CARG4, CARG4 // if (end < 0) end += len+1 ++ | addw CARG3, TMP0, TMP2 ++ | selne TMP1, TMP2, CARG3, CARG3 // if (start < 0) start += len+1 ++ | ldi TMP3, 1(zero) ++ | cmplt CARG4, zero, TMP2 ++ | cmplt zero, CARG3, TMP1 ++ | selne TMP2, zero, CARG4, CARG4 // if (end < 0) end = 0 ++ | selne TMP1, CARG3, TMP3, CARG3 // if (start < 1) start = 1 ++ | cmplt CARG2, CARG4, TMP2 ++ | seleq TMP2, CARG4, CARG2, CARG4 // if (end > len) end = len ++ | addl STR:CARG1, CARG3, CARG2 ++ | subl CARG4, CARG3, CARG3 // len = end - start ++ | ldi CARG2, sizeof(GCstr)-1(CARG2) ++ | addwi CARG3, 1, CARG3 // len += 1 ++ | cmplt CARG3, zero, AT ++ | beq AT, ->fff_newstr ++ |->fff_emptystr: // Return empty string. ++ | ldi TMP1, LJ_TSTR(zero) ++ | ldi STR:CARG1, DISPATCH_GL(strempty)(DISPATCH) ++ | settp CARG1, TMP1 ++ | br zero, ->fff_restv ++ | ++ |.macro ffstring_op, name ++ | .ffunc string_ .. name ++ | ffgccheck ++ | ldl CARG2, 0(BASE) ++ | beq NARGS8:RC, ->fff_fallback ++ | checkstr STR:CARG2, ->fff_fallback ++ | ldi SBUF:CARG1, DISPATCH_GL(tmpbuf)(DISPATCH) ++ | load_got lj_buf_putstr_ .. name ++ | ldl TMP0, SBUF:CARG1->b ++ | stl L, SBUF:CARG1->L ++ | stl BASE, L->base ++ | stl TMP0, SBUF:CARG1->p ++ | stl PC, SAVE_PC(sp) ++ | call_intern extern lj_buf_putstr_ .. name ++ |// or SBUF:CARG1, SBUF:CRET1, zero ++ | load_got lj_buf_tostr ++ | bis SBUF:CRET1, zero, SBUF:CARG1 ++ | call_intern lj_buf_tostr ++ | ldl BASE, L->base ++ | br zero, ->fff_resstr ++ |.endmacro ++ | ++ |ffstring_op reverse ++ |ffstring_op lower ++ |ffstring_op upper ++ | ++ |//-- Bit library -------------------------------------------------------- ++ | ++ |->vm_tobit_fb: ++ | fldd FCARG1, 0(BASE) ++ | beq TMP1, ->fff_fallback ++ | faddd FCARG1, TOBIT, FCARG1 ++ | fimovd FCARG1, CRET1 ++ | zapi CRET1, 0xf0, CRET1 ++ | ret zero, 0(ra) ++ | ++ |.macro .ffunc_bit, name ++ | .ffunc_1 bit_..name ++ | gettp TMP0, CARG1 ++ | zapi CARG1, 0xf0, CRET1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP0, TISNUM, AT ++ | bne AT, >1 ++ | ldi TMP1, LJ_TISNUM(zero) ++ | cmpult TMP0, TMP1, TMP1 ++ | br ra, ->vm_tobit_fb ++ |1: ++ |.endmacro ++ | ++ |.macro .ffunc_bit_op, name, bins ++ | .ffunc_bit name ++ | ldi TMP2, 8(BASE) ++ | addl BASE, NARGS8:RC, TMP3 ++ |1: ++ | ldl TMP1, 0(TMP2) ++ | cmpeq TMP2, TMP3, AT ++ | bne AT, ->fff_resi ++ | gettp TMP0, TMP1 ++ | ldi TMP2, 8(TMP2) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP0, TISNUM, AT ++ | beq AT, >2 ++ | zapi TMP1, 0xf0, TMP1 ++ | bins CRET1, TMP1, CRET1 ++ | br zero, <1 ++ |2: ++ | fldd FCARG1, -8(TMP2) ++ | // cmpulti TMP0, LJ_TISNUM, TMP0 ++ | ldi AT, LJ_TISNUM(zero) ++ | cmpult TMP0, AT, TMP0 ++ | faddd FCARG1, TOBIT, FCARG1 ++ | beq TMP0, ->fff_fallback ++ | fimovd FCARG1, TMP1 ++ | zapi TMP1, 0xf0, TMP1 ++ | bins CRET1, TMP1, CRET1 ++ | br zero, <1 ++ |.endmacro ++ | ++ |.ffunc_bit_op band, and ++ |.ffunc_bit_op bor, bis ++ |.ffunc_bit_op bxor, xor ++ | ++ |.ffunc_bit bswap ++ | srli CRET1, 8, TMP0 ++ | srli CRET1, 24, TMP1 ++ | srli TMP0, 8, TMP2 ++ | andi TMP2, 0xff, TMP3 ++ | slli TMP3, 8, TMP3 ++ | .DINS TMP1, CRET1, 24, 8 ++ | .DINS TMP3, TMP0, 16, 8 ++ | bis TMP1, TMP3, CRET1 ++ | br zero, ->fff_resi ++ | ++ |.ffunc_bit tobit ++ |->fff_resi: ++ | ldl PC, FRAME_PC(BASE) ++ | ldi RA, -16(BASE) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp CRET1, TISNUM ++ | stl CRET1, -16(BASE) ++ | br zero, ->fff_res1 ++ | ++ |.ffunc_bit bnot ++ | ornot zero, CRET1, CRET1 // ~CRET1 ++ | zapi CRET1, 0xf0, CRET1 ++ | br zero, ->fff_resi ++ | ++ |.macro .ffunc_bit_sh, name, shins, shmod ++ | .ffunc_2 bit_..name ++ | gettp TMP0, CARG1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP0, TISNUM, AT ++ | bne AT, >1 ++ | // cmpulti TMP0, LJ_TISNUM, TMP1 ++ | ldi AT, LJ_TISNUM(zero) ++ | cmpult TMP0, AT, TMP1 ++ | br ra, ->vm_tobit_fb ++ | bis CRET1, zero, CARG1 ++ |1: ++ | gettp TMP0, CARG2 ++ | zapi CARG2, 0xf0, CARG2 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP0, TISNUM, AT ++ | beq AT, ->fff_fallback ++ | addwi CARG1, 0, CARG1 ++ |.if shmod == 1 ++ | subw zero, CARG2, CARG2 ++ |.endif ++ | shins CRET1, CARG1, CARG2 ++ | zapi CRET1, 0xf0, CRET1 ++ | br zero, ->fff_resi ++ |.endmacro ++ | ++ |.macro .SLLW, rd, rs, rt ++ | andi rt, 0x1f, AT ++ | sll rs, AT, rd ++ | addwi rd, 0x0, rd ++ |.endmacro ++ | ++ |.macro .SRLW, rd, rs, rt ++ | andi rt, 0x1f, AT ++ | zapi rs, 0xf0, rd ++ | srl rd, AT, rd ++ | addwi rd, 0, rd ++ |.endmacro ++ | ++ |.macro .SRAW, rd, rs, rt ++ | andi rt, 0x1f, AT ++ | addwi rs, 0x0, rd ++ | sra rd, AT, rd ++ |.endmacro ++ | ++ |.macro .RORW, rd, rs, rt //TODO CHECK ++ | andi rt, 0x1f, TMP0 ++ | ldi TMP1, 32(zero) ++ | subw TMP1, TMP0, TMP1 ++ | andi TMP0, 0x1f, TMP2 ++ | zapi rs, 0xf0, TMP0 ++ | srl TMP0, TMP2, TMP0 ++ | addwi TMP0, 0, TMP0 ++ | andi TMP1, 0x1f, TMP2 ++ | sll rs, TMP2, rd ++ | addwi rd, 0x0, rd ++ | bis TMP0, rd, rd ++ | addwi rd, 0x0, rd ++ |.endmacro ++ | ++ |.ffunc_bit_sh lshift, .SLLW, 0 ++ |.ffunc_bit_sh rshift, .SRLW, 0 ++ |.ffunc_bit_sh arshift, .SRAW, 0 ++ |.ffunc_bit_sh rol, .RORW, 1 ++ |.ffunc_bit_sh ror, .RORW, 0 ++ | ++ |//----------------------------------------------------------------------- ++ | ++ |->fff_fallback: // Call fast function fallback handler. ++ | // BASE = new base, RB = CFUNC, RC = nargs*8 ++ | ldl PC, FRAME_PC(BASE) // Fallback may overwrite PC. ++ | ldl CARG3, CFUNC:RB->f ++ | addl BASE, NARGS8:RC, TMP1 ++ | stl BASE, L->base ++ | ldi TMP0, 8*LUA_MINSTACK(TMP1) ++ | ldl TMP2, L->maxstack ++ | stl PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | stl TMP1, L->top ++ | bis L, zero, CARG1 ++ | cmpult TMP2, TMP0, AT ++ | bne AT, >5 // Need to grow stack. ++ | ldi CFUNCADDR, 0(CARG3) ++ | call r26, 0(CFUNCADDR) // (lua_State *L) ++ | // Either throws an error, or recovers and returns -1, 0 or nresults+1. ++ | ldl BASE, L->base ++ | s8addwi CRET1, 0, RD ++ | ldi RA, -16(BASE) ++ | cmplt zero, CRET1, AT ++ | bne AT, ->fff_res // Returned nresults+1? ++ |1: // Returned 0 or -1: retry fast path. ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) ++ | ldl TMP0, L->top ++ | subl TMP0, BASE, NARGS8:RC ++ | cleartp LFUNC:RB ++ | bne CRET1, ->vm_call_tail // Returned -1? ++ | ins_callt // Returned 0: retry fast path. ++ | ++ |// Reconstruct previous base for vmeta_call during tailcall. ++ |->vm_call_tail: ++ | andi PC, FRAME_TYPE, TMP0 ++ | ldi TMP2, ~FRAME_TYPEP(zero) ++ | and TMP2, PC, TMP1 ++ | bne TMP0, >3 ++ | ldbu TMP1, OFS_RA(PC) ++ | s8addwi TMP1, 16, TMP1 ++ |3: ++ | subl BASE, TMP1, TMP2 ++ | br zero, ->vm_call_dispatch // Resolve again for tailcall. ++ | ++ |5: // Grow stack for fallback handler. ++ | load_got lj_state_growstack ++ | ldi CARG2, LUA_MINSTACK(zero) ++ | bis L, zero, CARG1 ++ | call_intern lj_state_growstack // (lua_State *L, int n) ++ | ldl BASE, L->base ++ | ldi CRET1, 0(zero) // Set zero-flag to force retry. ++ | br zero, <1 ++ | ++ |->fff_gcstep: // Call GC step function. ++ | // BASE = new base, RC = nargs*8 ++ | bis ra, zero, MULTRES ++ | load_got lj_gc_step ++ | addl BASE, NARGS8:RC, TMP0 // Calculate L->top. ++ | stl BASE, L->base ++ | stl PC, SAVE_PC(sp) // Redundant (but a defined value). ++ | bis L, zero, CARG1 ++ | stl TMP0, L->top ++ | call_intern lj_gc_step // (lua_State *L) ++ | ldl BASE, L->base ++ | ldl TMP0, L->top ++ | ldl CFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp CFUNC:RB ++ | subl TMP0, BASE, NARGS8:RC ++ | jmp zero, 0(MULTRES) ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Special dispatch targets ------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |->vm_record: // Dispatch target for recording phase. ++ |.if JIT ++ | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) ++ | andi TMP3, HOOK_VMEVENT, TMP1 // No recording while in vmevent. ++ | ldw TMP2, DISPATCH_GL(hookcount)(DISPATCH) ++ | bne TMP1, >5 ++ | // Decrement the hookcount for consistency, but always do the call. ++ | andi TMP3, HOOK_ACTIVE, TMP1 ++ | subwi TMP2, 1, TMP2 ++ | bne TMP1, >1 ++ | andi TMP3, LUA_MASKLINE|LUA_MASKCOUNT, TMP1 ++ | beq TMP1, >1 ++ | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) ++ | br zero, >1 ++ |.endif ++ | ++ |->vm_rethook: // Dispatch target for return hooks. ++ | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) ++ | andi TMP3, HOOK_ACTIVE, TMP1 // Hook already active? ++ | beq TMP1, >1 ++ |5: // Re-dispatch to static ins. ++ | ldl TMP1, GG_DISP2STATIC(TMP0) // Assumes TMP0 holds DISPATCH+OP*4. ++ | jmp zero, 0(TMP1) ++ | ++ |->vm_inshook: // Dispatch target for instr/line hooks. ++ | ldbu TMP3, DISPATCH_GL(hookmask)(DISPATCH) ++ | ldw TMP2, DISPATCH_GL(hookcount)(DISPATCH) ++ | andi TMP3, HOOK_ACTIVE, TMP1 // Hook already active? ++ | bne TMP1, <5 ++ | andi TMP3, LUA_MASKLINE|LUA_MASKCOUNT, TMP1 ++ | subwi TMP2, 1, TMP2 ++ | beq TMP1, <5 ++ | stw TMP2, DISPATCH_GL(hookcount)(DISPATCH) ++ | beq TMP2, >1 ++ | andi TMP3, LUA_MASKLINE, TMP1 ++ | load_got lj_dispatch_ins ++ | beq TMP1, <5 ++ |1: ++ | load_got lj_dispatch_ins ++ | stw MULTRES, TMPD(sp) ++ | bis PC, zero, CARG2 ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | // SAVE_PC must hold the _previous_ PC. The callee updates it with PC. ++ | call_intern lj_dispatch_ins // (lua_State *L, const BCIns *pc) ++ |3: ++ | ldl BASE, L->base ++ |4: // Re-dispatch to static ins. ++ | ldw INS, -4(PC) ++ | decode_OP TMP1, INS ++ | decode_BC8b TMP1 ++ | addl TMP1, DISPATCH, TMP0 ++ | decode_RD RD, INS ++ | ldl TMP1, GG_DISP2STATIC(TMP0) ++ | decode_RA RA, INS ++ | jmp zero, 0(TMP1) ++ | ++ |->cont_hook: // Continue from hook yield. ++ | ldi PC, 4(PC) ++ | ldw MULTRES, -24(RB) // Restore MULTRES for *M ins. ++ | br zero, <4 ++ | ++ |->vm_hotloop: // Hot loop counter underflow. ++ |.if JIT ++ | ldl LFUNC:TMP1, FRAME_FUNC(BASE) ++ | ldi CARG1, GG_DISP2J(DISPATCH) ++ | cleartp LFUNC:TMP1 ++ | stl PC, SAVE_PC(sp) ++ | ldl TMP1, LFUNC:TMP1->pc ++ | bis PC, zero, CARG2 ++ | stl L, DISPATCH_J(L)(DISPATCH) ++ | ldbu TMP1, PC2PROTO(framesize)(TMP1) ++ | load_got lj_trace_hot ++ | stl BASE, L->base ++ | s8addl TMP1, BASE, TMP1 ++ | stl TMP1, L->top ++ | call_intern lj_trace_hot // (jit_State *J, const BCIns *pc) ++ | br zero, <3 ++ |.endif ++ | ++ | ++ |->vm_callhook: // Dispatch target for call hooks. ++ | bis PC, zero, CARG2 ++ |.if JIT ++ | br zero, >1 ++ |.endif ++ | ++ |->vm_hotcall: // Hot call counter underflow. ++ |.if JIT ++ | bisi PC, 1, CARG2 ++ |1: ++ |.endif ++ | load_got lj_dispatch_call ++ | addl BASE, RC, TMP0 ++ | stl PC, SAVE_PC(sp) ++ | stl BASE, L->base ++ | subl RA, BASE, RA ++ | stl TMP0, L->top ++ | bis L, zero, CARG1 ++ | call_intern lj_dispatch_call // (lua_State *L, const BCIns *pc) ++ | // Returns ASMFunction. ++ | ldl BASE, L->base ++ | ldl TMP0, L->top ++ | stl zero, SAVE_PC(sp) // Invalidate for subsequent line hook. ++ | addl RA, BASE, RA ++ | subl TMP0, BASE, NARGS8:RC ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) ++ | cleartp LFUNC:RB ++ | ldw INS, -4(PC) ++ | jmp zero, 0(CRET1) ++ | ++ |->cont_stitch: // Trace stitching. ++ |.if JIT ++ | // RA = resultptr, RB = meta base ++ | ldw INS, -4(PC) ++ | ldl TRACE:TMP2, -40(RB) // Save previous trace. ++ | decode_RA RC, INS ++ | ldi TMP1, -8(MULTRES) ++ | cleartp TRACE:TMP2 ++ | addl RC, BASE, RC // Call base. ++ | beq TMP1, >2 ++ |1: // Move results down. ++ | ldl CARG1, 0(RA) ++ | ldi TMP1, -8(TMP1) ++ | ldi RA, 8(RA) ++ | stl CARG1, 0(RC) ++ | ldi RC, 8(RC) ++ | bne TMP1, <1 ++ |2: ++ | decode_RA RA, INS ++ | decode_RB RB, INS ++ | addl RA, RB, RA ++ | addl RA, BASE, RA ++ |3: ++ | cmpult RC, RA, TMP1 ++ | bne TMP1, >9 // More results wanted? ++ | ++ | ldhu TMP3, TRACE:TMP2->traceno ++ | ldhu RD, TRACE:TMP2->link ++ | load_got lj_dispatch_stitch ++ | cmpeq RD, TMP3, AT ++ | bne AT, ->cont_nop // Blacklisted. ++ | s8addwi RD, 0, RD ++ | bne RD, =>BC_JLOOP // Jump to stitched trace. ++ | ++ | // Stitch a new trace to the previous trace. ++ | stw TMP3, DISPATCH_J(exitno)(DISPATCH) ++ | stl L, DISPATCH_J(L)(DISPATCH) ++ | stl BASE, L->base ++ | ldi CARG1, GG_DISP2J(DISPATCH) ++ | bis PC, zero, CARG2 ++ | call_intern lj_dispatch_stitch // (jit_State *J, const BCIns *pc) ++ | ldl BASE, L->base ++ | br zero, ->cont_nop ++ | ++ |9: ++ | stl TISNIL, 0(RC) ++ | ldi RC, 8(RC) ++ | br zero, <3 ++ |.endif ++ | ++ |->vm_profhook: // Dispatch target for profiler hook. ++#if LJ_HASPROFILE ++ | load_got lj_dispatch_profile ++ | bis L, zero, CARG1 ++ | bis PC, zero, CARG2 ++ | stl BASE, L->base ++ | stw MULTRES, TMPD(sp) ++ | call_intern lj_dispatch_profile // (lua_State *L, const BCIns *pc) ++ | // HOOK_PROFILE is off again, so re-dispatch to dynamic instruction. ++ | ldi PC, -4(PC) ++ | ldl BASE, L->base ++ | br zero, ->cont_nop ++#endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Trace exit handler ------------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.macro savex_, a, b ++ | fstd f..a, a*8(sp) ++ | fstd f..b, b*8(sp) ++ | stl r..a, 32*8+a*8(sp) ++ | stl r..b, 32*8+b*8(sp) ++ |.endmacro ++ | ++ |->vm_exit_handler: ++ |.if JIT ++ |//Save all registers except RA and SP.On SW64 is r30 and r26. ++ | ldi sp, -(32*8+32*8)(sp) ++ | savex_ 0, 1 ++ | savex_ 2, 3 ++ | savex_ 4, 5 ++ | savex_ 6, 7 ++ | savex_ 8, 9 ++ | savex_ 10, 11 ++ | savex_ 12, 13 ++ | savex_ 14, 15 ++ | savex_ 16, 17 ++ | savex_ 18, 19 ++ | savex_ 20, 21 ++ | savex_ 22, 23 ++ | savex_ 24, 25 ++ | savex_ 27, 28 ++ | savex_ 29, 31 ++ | fstd f26, 26*8(sp) ++ | fstd f30, 30*8(sp) ++ | stl zero, 32*8+26*8(sp) // Clear RID_TMP. ++ | ldi TMP2, 32*8+32*8(sp) // Recompute original value of sp. ++ | stl TMP2, 32*8+30*8(sp) // Store sp in RID_SP ++ | li_vmstate EXIT ++ | ldi DISPATCH, -GG_DISP2G-32768(JGL) ++ | ldw TMP1, 0(TMP2) // Load exit number. ++ | st_vmstate ++ | ldl L, DISPATCH_GL(cur_L)(DISPATCH) ++ | ldl BASE, DISPATCH_GL(jit_base)(DISPATCH) ++ | load_got lj_trace_exit ++ | stl L, DISPATCH_J(L)(DISPATCH) ++ | stw ra, DISPATCH_J(parent)(DISPATCH) // Store trace number. ++ | stl BASE, L->base ++ | stw TMP1, DISPATCH_J(exitno)(DISPATCH) // Store exit number. ++ | ldi CARG1, GG_DISP2J(DISPATCH) ++ | stl zero, DISPATCH_GL(jit_base)(DISPATCH) ++ | bis sp, zero, CARG2 ++ | call_intern lj_trace_exit // (jit_State *J, ExitState *ex) ++ | // Returns MULTRES (unscaled) or negated error code. ++ | ldl TMP1, L->cframe ++ | ldi TMP2, -4(zero) ++ | ldl BASE, L->base ++ | and TMP1, TMP2, sp ++ | ldl PC, SAVE_PC(sp) // Get SAVE_PC. ++ | stl L, SAVE_L(sp) // Set SAVE_L (on-trace resume/yield). ++ | br zero, >1 ++ |.endif ++ | ++ |->vm_exit_interp: ++ |.if JIT ++ | // CRET1 = MULTRES or negated error code, BASE, PC and JGL set. ++ | ldl L, SAVE_L(sp) ++ | ldi DISPATCH, -GG_DISP2G-32768(JGL) ++ | stl BASE, L->base ++ |1: ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) ++ | cmplt CRET1, zero, AT ++ | bne AT, >9 // Check for error from exit. ++ | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). ++ | slli CRET1, 3, MULTRES ++ | cleartp LFUNC:RB ++ | stw MULTRES, TMPD(sp) ++ | ldi TISNIL, LJ_TNIL(zero) ++ | ldi TISNUM, LJ_TISNUM(zero) // Setup type comparison constants. ++ | ifmovs TMP3, TOBIT ++ | ldl TMP1, LFUNC:RB->pc ++ | stl zero, DISPATCH_GL(jit_base)(DISPATCH) ++ | ldl KBASE, PC2PROTO(k)(TMP1) ++ | fcvtsd TOBIT, TOBIT ++ | // Modified copy of ins_next which handles function header dispatch, too. ++ | ldw INS, 0(PC) ++ | ldi PC, 4(PC) ++ | // Assumes TISNIL == ~LJ_VMST_INTERP == -1 ++ | stw TISNIL, DISPATCH_GL(vmstate)(DISPATCH) ++ | decode_OP TMP1, INS ++ | decode_BC8b TMP1 ++ | // cmpulti TMP1, BC_FUNCF*8, TMP2 ++ | ldi TMP2, BC_FUNCF*8(zero) ++ | cmpult TMP1, TMP2, TMP2 ++ | addl DISPATCH, TMP1, TMP0 ++ | decode_RD RD, INS ++ | ldl TMP3, 0(TMP0) ++ | decode_RA RA, INS ++ | beq TMP2, >2 ++ | jmp zero, 0(TMP3) ++ |2: ++ | // cmpulti TMP1, (BC_FUNCC+2)*8, TMP2 // Fast function? ++ | ldi TMP2, (BC_FUNCF+2)*8(zero) ++ | cmpult TMP1, TMP2, TMP2 ++ | ldl TMP1, FRAME_PC(BASE) ++ | bne TMP2, >3 ++ | // Check frame below fast function. ++ | andi TMP1, FRAME_TYPE, TMP0 ++ | bne TMP0, >3 // Trace stitching continuation? ++ | // Otherwise set KBASE for Lua function below fast function. ++ | ldw TMP2, -4(TMP1) ++ | decode_RA TMP0, TMP2 ++ | subl BASE, TMP0, TMP1 ++ | ldl LFUNC:TMP2, -32(TMP1) ++ | cleartp LFUNC:TMP2 ++ | ldl TMP1, LFUNC:TMP2->pc ++ | ldl KBASE, PC2PROTO(k)(TMP1) ++ |3: ++ | ldi RC, -8(MULTRES) ++ | addl RA, BASE, RA ++ | jmp zero, 0(TMP3) ++ | ++ |9: // Rethrow error from the right C frame. ++ | load_got lj_err_throw ++ | subw zero, CRET1, CARG2 //TODO LA: sub.w no trap ++ | bis L, zero, CARG1 ++ | call_intern lj_err_throw // (lua_State *L, int errcode) ++ |.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Math helper functions ---------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Modifies AT, TMP0, FCRET1, FCRET2, FCARG1. Keeps all others incl. f2. ++ |.macro vm_round, func ++ | // skip NaN && Inf ++ | // 0 * NaN == NaN ++ | // 0 * Inf == NaN ++ | // 0 * Other == 0 ++ | fmuld fzero, FCARG1, FAT ++ | fcmpun fzero, FAT, FAT ++ | fbeq FAT, >1 ++ | faddd fzero, FCARG1, FCRET1 ++ | ret zero, 0(ra) ++ |1: ++ |.if "func"=="floor" ++ | fcvtdln FCARG1, FAT; ++ |.endif ++ |.if "func"=="ceil" ++ | fcvtdlp FCARG1, FAT; ++ |.endif ++ |.if "func"=="trunc" ++ | fcvtdlz FCARG1, FAT; ++ |.endif ++ | fcvtld FAT, FCRET1 ++ | ret zero, 0(ra) ++ |.endmacro ++ | ++ | ++ |->vm_floor: ++ | vm_round floor ++ |->vm_ceil: ++ | vm_round ceil ++ |->vm_trunc: ++ |.if JIT ++ | vm_round trunc ++ |.endif ++ | ++ | ++ |//----------------------------------------------------------------------- ++ |//-- Miscellaneous functions -------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |.define NEXT_TAB, TAB:CARG1 ++ |.define NEXT_IDX, CARG2 ++ |.define NEXT_ASIZE, CARG3 ++ |.define NEXT_NIL, CARG4 ++ |.define NEXT_TMP0, TMP0 ++ |.define NEXT_TMP1, TMP1 ++ |.define NEXT_TMP2, TMP2 ++ |.define NEXT_RES_VK, CRET1 ++ |.define NEXT_RES_IDX, CRET2 ++ |.define NEXT_RES_PTR, sp ++ |.define NEXT_RES_VAL, 0(sp) ++ |.define NEXT_RES_KEY, 8(sp) ++ | ++ |// TValue *lj_vm_next(GCtab *t, uint32_t idx) ++ |// Next idx returned in CRET2. ++ |//->vm_next: ++ |//.if JIT ++ |// ldw NEXT_ASIZE, NEXT_TAB->asize ++ |// ldl NEXT_TMP0, NEXT_TAB->array ++ |// ldi NEXT_NIL, LJ_TNIL(zero) ++ |//1: // Traverse array part. ++ |// cmpult NEXT_IDX, NEXT_ASIZE, TMP3 ++ |// slli NEXT_IDX, 3, NEXT_TMP1 ++ |// addwi NEXT_TMP1, 0, NEXT_TMP1 ++ |// addl NEXT_TMP1, NEXT_TMP0, NEXT_TMP1 ++ |// beq TMP3, >5 ++ |// ldi TMP3, LJ_TISNUM(zero) ++ |// ldl NEXT_TMP2, 0(NEXT_TMP1) ++ |// slli TMP3, 47, TMP3 ++ |// bis NEXT_IDX, TMP3, NEXT_TMP1 ++ |// addwi NEXT_IDX, 1, NEXT_IDX ++ |// cmpeq NEXT_TMP2, NEXT_NIL, NEXT_TMP2 ++ |// bne NEXT_TMP2, <1 ++ |// stl NEXT_TMP2, NEXT_RES_VAL ++ |// stl NEXT_TMP1, NEXT_RES_KEY ++ |// bis NEXT_RES_PTR, zero, NEXT_RES_VK ++ |// bis NEXT_IDX, zero, NEXT_RES_IDX ++ |// ret zero, 0(ra) ++ | ++ |//5: // Traverse hash part. ++ |// subw NEXT_IDX, NEXT_ASIZE, NEXT_RES_IDX ++ |// ldw NEXT_TMP0, NEXT_TAB->hmask ++ |// ldl NODE:NEXT_RES_VK, NEXT_TAB->node ++ |// slli NEXT_RES_IDX, 5, NEXT_TMP2 ++ |// addwi NEXT_TMP2, 0, NEXT_TMP2 ++ |// slli NEXT_RES_IDX, 3, TMP3 ++ |// addwi TMP3, 0, TMP3 ++ |// subw NEXT_TMP2, TMP3, TMP3 ++ |// addl NODE:NEXT_RES_VK, TMP3, NODE:NEXT_RES_VK ++ |//6: ++ |// cmpult NEXT_TMP0, NEXT_RES_IDX, TMP3 ++ |// bne TMP3, >8 ++ |// ldl NEXT_TMP2, NODE:NEXT_RES_VK->val ++ |// addwi NEXT_RES_IDX, 1, NEXT_RES_IDX ++ |// cmpeq NEXT_TMP2, NEXT_NIL, NEXT_TMP2 ++ |// beq NEXT_TMP2, >9 ++ | // Skip holes in hash part. ++ |// ldi NODE:NEXT_RES_VK, sizeof(Node)(NODE:NEXT_RES_VK) ++ |// br zero, <6 ++ | ++ |//8: // End of iteration. Set the key to nil (not the value). ++ |// stl NEXT_NIL, NEXT_RES_KEY ++ |// bis NEXT_RES_PTR, zero, NEXT_RES_VK ++ |//9: ++ |// addw NEXT_RES_IDX, NEXT_ASIZE, NEXT_RES_IDX ++ |// ret zero, 0(ra) ++ |//.endif ++ | ++ |//----------------------------------------------------------------------- ++ |//-- FFI helper functions ----------------------------------------------- ++ |//----------------------------------------------------------------------- ++ | ++ |// Handler for callback functions. Callback slot number in r1, g in r2. ++ |->vm_ffi_callback: ++ |.if FFI ++ |.type CTSTATE, CTState, PC ++ | saveregs ++ | ldl CTSTATE, GL:r2->ctype_state ++ | ldi DISPATCH, GG_G2DISP(r2) ++ | load_got lj_ccallback_enter ++ | stw r1, CTSTATE->cb.slot ++ | stl CARG1, CTSTATE->cb.gpr[0] ++ | fstd FCARG1, CTSTATE->cb.fpr[0] ++ | stl CARG2, CTSTATE->cb.gpr[1] ++ | fstd FCARG2, CTSTATE->cb.fpr[1] ++ | stl CARG3, CTSTATE->cb.gpr[2] ++ | fstd FCARG3, CTSTATE->cb.fpr[2] ++ | stl CARG4, CTSTATE->cb.gpr[3] ++ | fstd FCARG4, CTSTATE->cb.fpr[3] ++ | stl CARG5, CTSTATE->cb.gpr[4] ++ | fstd FCARG5, CTSTATE->cb.fpr[4] ++ | stl CARG6, CTSTATE->cb.gpr[5] ++ | fstd FCARG6, CTSTATE->cb.fpr[5] ++ | ldi TMP0, CFRAME_SPACE(sp) ++ | stl TMP0, CTSTATE->cb.stack ++ | stl zero, SAVE_PC(sp) // Any value outside of bytecode is ok. ++ | bis CTSTATE, zero, CARG1 ++ | bis sp, zero, CARG2 ++ | call_intern lj_ccallback_enter // (CTState *cts, void *cf) ++ | // Returns lua_State *. ++ | ldl BASE, L:CRET1->base ++ | ldl RC, L:CRET1->top ++ | bis CRET1, zero, L ++ | ldih TMP3, 0x59c0(zero) // TOBIT = 2^52 + 2^51 (float). ++ | ldl LFUNC:RB, FRAME_FUNC(BASE) ++ | ifmovs TMP3, TOBIT ++ | ldi TISNIL, LJ_TNIL(zero) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | li_vmstate INTERP ++ | subw RC, BASE, RC ++ | cleartp LFUNC:RB ++ | st_vmstate ++ | fcvtsd TOBIT, TOBIT ++ | ins_callt ++ |.endif ++ | ++ |->cont_ffi_callback: // Return from FFI callback. ++ |.if FFI ++ | load_got lj_ccallback_leave ++ | ldl CTSTATE, DISPATCH_GL(ctype_state)(DISPATCH) ++ | stl BASE, L->base ++ | stl RB, L->top ++ | stl L, CTSTATE->L ++ | bis CTSTATE, zero, CARG1 ++ | bis RA, zero, CARG2 ++ | call_intern lj_ccallback_leave // (CTState *cts, TValue *o) ++ | fldd FCRET1, CTSTATE->cb.fpr[0] ++ | ldl CRET1, CTSTATE->cb.gpr[0] ++ | fldd FCRET2, CTSTATE->cb.fpr[1] ++ | ldl CRET2, CTSTATE->cb.gpr[1] ++ | br zero, ->vm_leave_unw ++ |.endif ++ | ++ |->vm_ffi_call: // Call C function via FFI. ++ | // Caveat: needs special frame unwinding, see below. ++ |.if FFI ++ | .type CCSTATE, CCallState, CARG1 ++ | ldw TMP1, CCSTATE->spadj ++ | ldbu CARG2, CCSTATE->nsp ++ | bis sp, zero, TMP2 ++ | subl sp, TMP1, sp ++ | stl ra, -8(TMP2) ++ | s8addwi CARG2, 0, CARG2 ++ | stl r9, -16(TMP2) ++ | stl CCSTATE, -24(TMP2) ++ | bis TMP2, zero, r9 ++ | ldi TMP1, offsetof(CCallState, stack)(CCSTATE) ++ | bis sp, zero, TMP2 ++ | addl TMP1, CARG2, TMP3 ++ | beq CARG2, >2 ++ |1: ++ | ldl TMP0, 0(TMP1) ++ | ldi TMP1, 8(TMP1) ++ | cmpult TMP1, TMP3, TMP4 ++ | stl TMP0, 0(TMP2) ++ | ldi TMP2, 8(TMP2) ++ | bne TMP4, <1 ++ |2: ++ | ldl CFUNCADDR, CCSTATE->func ++ | fldd FCARG1, CCSTATE->gpr[0] ++ | fldd FCARG2, CCSTATE->gpr[1] ++ | fldd FCARG3, CCSTATE->gpr[2] ++ | fldd FCARG4, CCSTATE->gpr[3] ++ | fldd FCARG5, CCSTATE->gpr[4] ++ | fldd FCARG6, CCSTATE->gpr[5] ++ | ldl CARG2, CCSTATE->gpr[1] ++ | ldl CARG3, CCSTATE->gpr[2] ++ | ldl CARG4, CCSTATE->gpr[3] ++ | ldl CARG5, CCSTATE->gpr[4] ++ | ldl CARG6, CCSTATE->gpr[5] ++ | ldl CARG1, CCSTATE->gpr[0] // Do this last, since CCSTATE is CARG1. ++ | call r26, 0(CFUNCADDR) ++ | ldl CCSTATE:TMP1, -24(r9) ++ | ldl TMP2, -16(r9) ++ | ldl ra, -8(r9) ++ | stl CRET1, CCSTATE:TMP1->gpr[0] ++ | stl CRET2, CCSTATE:TMP1->gpr[1] ++ | fstd FCRET1, CCSTATE:TMP1->fpr[0] ++ | fstd FCRET2, CCSTATE:TMP1->fpr[1] ++ | bis r9, zero, sp ++ | bis TMP2, zero, r9 ++ | ret zero, 0(ra) ++ |.endif ++ |// Note: vm_ffi_call must be the last function in this object file! ++ | ++ |//----------------------------------------------------------------------- ++} ++ ++//TODO cmx ++/* Generate the code for a single instruction. */ ++static void build_ins(BuildCtx *ctx, BCOp op, int defop) ++{ ++ int vk = 0; ++ |=>defop: ++ ++ switch (op) { ++ ++ /* -- Comparison ops ---------------------------------------------------- */ ++ ++ /* Remember: all ops branch for a true comparison, fall through otherwise. */ ++ ++ case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT: ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | addl RA, BASE, RA ++ | addl RD, BASE, RD ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | ldl CARG1, 0(RA) ++ | ldl CARG2, 0(RD) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ } else { ++ | ldl CARG2, 0(RA) ++ | ldl CARG1, 0(RD) ++ | gettp CARG3, CARG2 ++ | gettp CARG4, CARG1 ++ } ++ | ldhu TMP2, OFS_RD(PC) // TMP2=jump ++ | ldi PC, 4(PC) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CARG3, TISNUM, AT ++ | beq AT, >2 ++ | decode_BC4b TMP2 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CARG4, TISNUM, AT ++ | beq AT, >5 ++ | addwi CARG1, 0, CARG1 ++ | addwi CARG2, 0, CARG2 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | cmplt CARG1, CARG2, TMP1 ++ | addw TMP2, TMP3, TMP2 // TMP2=(jump-0x8000)<<2 ++ if (op == BC_ISLT || op == BC_ISGT) { ++ | seleq TMP1, zero, TMP2, TMP2 ++ } else { ++ | selne TMP1, zero, TMP2,TMP2 ++ } ++ |1: ++ | addl PC, TMP2, PC ++ | ins_next ++ | ++ |2: // RA is not an integer. ++ | // cmpulti CARG3, LJ_TISNUM, TMP1 ++ | ldi TMP1, LJ_TISNUM(zero) ++ | cmpult CARG3, TMP1, TMP1 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | beq TMP1, ->vmeta_comp ++ | // cmpulti CARG4, LJ_TISNUM, TMP1 ++ | ldi TMP1, LJ_TISNUM(zero) ++ | cmpult CARG4, TMP1, TMP1 ++ | decode_BC4b TMP2 ++ | beq TMP1, >4 ++ | ifmovd CARG1, FTMP0 ++ | ifmovd CARG2, FTMP2 ++ |3: // RA and RD are both numbers. ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | fcmplt FTMP0, FTMP2, FTMP3 ++ } else { ++ | fcmplt FTMP0, FTMP2, FTMP3 ++ | fcmpun FTMP0, FTMP2, FTMP4 //TODO FCC CHECK ++ | faddd FTMP3, FTMP4, FTMP3 ++ } ++ | addw TMP2, TMP3, TMP2 ++ | fimovd FTMP3, TMP3 //TODO CHECK ++ if (op == BC_ISLT || op == BC_ISGT) { ++ | seleq TMP3, zero, TMP2, TMP2 ++ } else { ++ | selne TMP3, zero, TMP2, TMP2 ++ } ++ | br zero, <1 ++ | ++ |4: // RA is a number, RD is not a number. ++ | // RA is a number, RD is an integer. Convert RD to a number. ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CARG4, TISNUM, AT ++ | beq AT, ->vmeta_comp ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | ifmovs CARG2, FTMP2 ++ | ifmovd CARG1, FTMP0 ++ | fcvtwl FTMP2, FTMP2 ++ | fcvtld FTMP2, FTMP2 ++ } else { ++ | ifmovs CARG1, FTMP0 ++ | ifmovd CARG2, FTMP2 ++ | fcvtwl FTMP0, FTMP0 ++ | fcvtld FTMP0, FTMP0 ++ } ++ | br zero, <3 ++ | ++ |5: // RA is an integer, RD is not an integer ++ | // cmpulti CARG4, LJ_TISNUM, TMP1 ++ | ldi TMP1, LJ_TISNUM(zero) ++ | cmpult CARG4, TMP1, TMP1 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | beq TMP1, ->vmeta_comp ++ | // RA is an integer, RD is a number. Convert RA to a number. ++ if (op == BC_ISLT || op == BC_ISGE) { ++ | ifmovs CARG1, FTMP0 ++ | ifmovd CARG2, FTMP2 ++ | fcvtwl FTMP0, FTMP0 ++ | fcvtld FTMP0, FTMP0 ++ } else { ++ | ifmovs CARG2, FTMP2 ++ | ifmovd CARG1, FTMP0 ++ | fcvtwl FTMP2, FTMP2 ++ | fcvtld FTMP2, FTMP2 ++ } ++ | br zero, <3 ++ break; ++ ++ case BC_ISEQV: case BC_ISNEV: ++ vk = op == BC_ISEQV; ++ | // RA = src1*8, RD = src2*8, JMP with RD = target ++ | addl RA, BASE, RA ++ | addl RD, BASE, RD ++ | ldi PC, 4(PC) ++ | ldl CARG1, 0(RA) ++ | ldl CARG2, 0(RD) ++ | ldhu TMP2, -4+OFS_RD(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult TISNUM, CARG3, TMP0 ++ | cmpult TISNUM, CARG4, TMP1 ++ | bis TMP0, TMP1, TMP0 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ if (vk) { ++ | beq TMP0, ->BC_ISEQN_Z ++ } else { ++ | beq TMP0, ->BC_ISNEN_Z ++ } ++ |// Either or both types are not numbers. ++ |.if FFI ++ | // Check if RA or RD is a cdata. ++ | ldi TMP0, LJ_TCDATA(zero) ++ | addwi TMP0, 0, TMP0 ++ | cmpeq CARG3, TMP0, AT ++ | bne AT, ->vmeta_equal_cd ++ | cmpeq CARG4, TMP0, AT ++ | bne AT, ->vmeta_equal_cd ++ |.endif ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | decode_BC4b TMP2 ++ | addw TMP2, TMP3, TMP2 // (jump-0x8000)<<2 ++ | cmpeq CARG1, CARG2, AT ++ | beq AT, >2 ++ | // Tag and value are equal. ++ if (vk) { ++ |->BC_ISEQV_Z: ++ | addl PC, TMP2, PC ++ } ++ |1: ++ | ins_next ++ | ++ |2: // Check if the tags are the same and it's a table or userdata. ++ | xor CARG3, CARG4, TMP3 // Same type? ++ | // cmpulti CARG3, LJ_TISTABUD+1, TMP0 // Table or userdata? TMP0=1 ++ | ldi TMP0, LJ_TISTABUD+1(zero) ++ | cmpult CARG3, TMP0, TMP0 ++ | selne TMP3, zero, TMP0, TMP0 // TMP0=0: not same type, or same type table/userdata ++ | cleartp TAB:TMP1, CARG1 ++ if (vk) { ++ | beq TMP0, <1 ++ } else { ++ | beq TMP0, ->BC_ISEQV_Z // Reuse code from opposite instruction. ++ } ++ | // Different tables or userdatas. Need to check __eq metamethod. ++ | // Field metatable must be at same offset for GCtab and GCudata! ++ | ldl TAB:TMP3, TAB:TMP1->metatable ++ if (vk) { ++ | beq TAB:TMP3, <1 // No metatable? ++ | ldbu TMP3, TAB:TMP3->nomm ++ | andi TMP3, 1<BC_ISEQV_Z // No metatable? ++ | ldbu TMP3, TAB:TMP3->nomm ++ | andi TMP3, 1<BC_ISEQV_Z // Or 'no __eq' flag set? ++ } ++ | br zero, ->vmeta_equal // Handle __eq metamethod. ++ break; ++ ++ case BC_ISEQS: case BC_ISNES: ++ vk = op == BC_ISEQS; ++ | // RA = src*8, RD = str_const*8 (~), JMP with RD = target ++ | addl RA, BASE, RA ++ | ldi PC, 4(PC) ++ | ldl CARG1, 0(RA) ++ | subl KBASE, RD, RD ++ | ldhu TMP2, -4+OFS_RD(PC) ++ | ldl CARG2, -8(RD) // KBASE-8-str_const*8 ++ |.if FFI ++ | gettp CARG3, CARG1 ++ | ldi TMP1, LJ_TCDATA(zero) ++ | addwi TMP1, 0, TMP1 ++ |.endif ++ | ldi TMP0, LJ_TSTR(zero) ++ | addwi TMP0, 0, TMP0 ++ | decode_BC4b TMP2 ++ | settp CARG2, TMP0 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ |.if FFI ++ | cmpeq CARG3, TMP1, AT ++ | bne AT, ->vmeta_equal_cd ++ |.endif ++ | xor CARG1, CARG2, TMP0 // TMP2=0: A==D; TMP2!=0: A!=D ++ | addw TMP2, TMP3, TMP2 ++ if (vk) { ++ | selne TMP0, zero, TMP2, TMP2 ++ } else { ++ | seleq TMP0, zero, TMP2, TMP2 ++ } ++ | addl PC, TMP2, PC ++ | ins_next ++ break; ++ ++ case BC_ISEQN: case BC_ISNEN: ++ vk = op == BC_ISEQN; ++ | // RA = src*8, RD = num_const*8, JMP with RD = target ++ | addl RA, BASE, RA ++ | addl RD, KBASE, RD ++ | ldl CARG1, 0(RA) ++ | ldl CARG2, 0(RD) ++ | ldhu TMP2, OFS_RD(PC) ++ | ldi PC, 4(PC) ++ | gettp CARG3, CARG1 ++ | gettp CARG4, CARG2 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ if (vk) { ++ |->BC_ISEQN_Z: ++ } else { ++ |->BC_ISNEN_Z: ++ } ++ | decode_BC4b TMP2 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CARG3, TISNUM, AT ++ | beq AT, >4 ++ | addw TMP2, TMP3, TMP2 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CARG4, TISNUM, AT ++ | beq AT, >6 ++ | xor CARG1, CARG2, TMP0 // TMP0=0: A==D; TMP0!=0: A!=D ++ if (vk) { ++ | selne TMP0, zero, TMP2, TMP2 ++ |1: ++ | addl PC, TMP2, PC ++ |2: ++ } else { ++ | seleq TMP0, zero, TMP2, TMP2 ++ |1: ++ |2: ++ | addl PC, TMP2, PC ++ } ++ |3: ++ | ins_next ++ | ++ |4: // RA is not an integer. ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult CARG3, TISNUM, TMP0 ++ | addw TMP2, TMP3, TMP2 ++ |.if FFI ++ | beq TMP0, >7 ++ |.else ++ | beq TMP0, <2 ++ |.endif ++ | ifmovd CARG1, FTMP0 ++ | ifmovd CARG2, FTMP2 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CARG4, TISNUM, AT ++ | beq AT, >5 ++ |// RA is a number, RD is an integer. ++ | ldl TMP3, 0(RD) ++ | addw TMP3, zero, TMP3 //get [0:31] of RD ++ | ifmovd TMP3, FTMP2 ++ | fcvtld FTMP2, FTMP2 ++ | ++ |5: // RA and RD are both numbers. ++ | fcmpun FTMP0, FTMP2, FTMP3 ++ | fimovd FTMP3, TMP4 //tmp4=2:is NaN; tmp0=0:isnot NaN ++ | bne TMP4, >9 ++ | fcmpeq FTMP0, FTMP2, FTMP4 ++ | fimovd FTMP4, TMP1 //tmp1=0:is eq ++ if (vk) { ++ | seleq TMP1, zero, TMP2, TMP2 ++ } else { ++ | selne TMP1, zero, TMP2, TMP2 ++ } ++ | br zero, <1 ++ | ++ |6: // RA is an integer, RD is a number. ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult CARG4, TISNUM, TMP0 ++ |.if FFI ++ | beq TMP0, >8 ++ |.else ++ | beq TMP0, <2 ++ |.endif ++ | ifmovs CARG1, FTMP0 ++ | ifmovd CARG2, FTMP2 ++ | fcvtwl FTMP0, FTMP0 ++ | fcvtld FTMP0, FTMP0 ++ | br zero, <5 ++ | ++ |.if FFI ++ |7: // RA not int, not number ++ | ldi TMP0, LJ_TCDATA(zero) ++ | addwi TMP0, 0, TMP0 ++ | cmpeq CARG3, TMP0, AT ++ | beq AT, <2 ++ | br zero, ->vmeta_equal_cd ++ | ++ |8: // RD not int, not number ++ | ldi TMP0, LJ_TCDATA(zero) ++ | addwi TMP0, 0, TMP0 ++ | cmpeq CARG4, TMP0, AT ++ | beq AT, <2 ++ | br zero, ->vmeta_equal_cd ++ |.endif ++ | ++ |9: //is NaN ++ if (vk) { ++ | selne TMP4, zero, TMP2, TMP2 ++ } else { ++ | seleq TMP4, zero, TMP2, TMP2 ++ } ++ | br zero, <1 ++ break; ++ ++ case BC_ISEQP: case BC_ISNEP: ++ vk = op == BC_ISEQP; ++ | // RA = src*8, RD = primitive_type*8 (~), JMP with RD = target ++ | addl RA, BASE, RA ++ | zapi RD, 0xf0, TMP0 ++ | srli TMP0, 3, TMP0 ++ | ldl TMP1, 0(RA) ++ | ornot zero, TMP0, TMP0 // ~TMP0: ~0 ~1 ~2 ++ | ldhu TMP2, OFS_RD(PC) // TMP2: RD in next INS, branch target ++ | gettp TMP1, TMP1 ++ | ldi PC, 4(PC) ++ | xor TMP0, TMP1, TMP0 // TMP0=0 A=D; TMP0!=0 A!=D ++ |.if FFI ++ | ldi TMP3, LJ_TCDATA(zero) ++ | addwi TMP3, 0, TMP3 ++ | cmpeq TMP1, TMP3, AT ++ | bne AT, ->vmeta_equal_cd ++ |.endif ++ | decode_BC4b TMP2 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | addw TMP2, TMP3, TMP2 // TMP2=(jump-0x8000)<<2 ++ if (vk) { ++ | selne TMP0, zero, TMP2, TMP2 ++ } else { ++ | seleq TMP0, zero, TMP2, TMP2 ++ } ++ | addl PC, TMP2, PC ++ | ins_next ++ break; ++ ++ /* -- Unary test and copy ops ------------------------------------------- */ ++ ++ case BC_ISTC: case BC_ISFC: case BC_IST: case BC_ISF: ++ | // RA = dst*8 or unused, RD = src*8, JMP with RD = target ++ | addl RD, BASE, RD ++ | ldhu TMP2, OFS_RD(PC) ++ | ldl TMP0, 0(RD) ++ | ldi PC, 4(PC) ++ | gettp TMP0, TMP0 ++ | addl RA, BASE, RA ++ | // cmpulti TMP0, LJ_TISTRUECOND, TMP0 // TMP0=1 true; TMP0=0 false ++ | ldi AT, LJ_TISTRUECOND(zero) ++ | cmpult TMP0, AT, TMP0 ++ | decode_BC4b TMP2 ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | ldl CRET1, 0(RD) ++ | addw TMP2, TMP3, TMP2 // (jump-0x8000)<<2 ++ if (op == BC_IST || op == BC_ISTC) { ++ | beq TMP0, >1 ++ if (op == BC_ISTC) { ++ | stl CRET1, 0(RA) ++ } ++ } else { ++ | bne TMP0, >1 ++ if (op == BC_ISFC) { ++ | stl CRET1, 0(RA) ++ } ++ } ++ | addl PC, TMP2, PC ++ |1: ++ | ins_next ++ break; ++ ++ case BC_ISTYPE: ++ | // RA = src*8, RD = -type*8 ++ | addl BASE, RA, TMP0 ++ | zapi RD, 0xf0, TMP1 ++ | srli TMP1, 3, TMP1 ++ | ldl TMP0, 0(TMP0) ++ | gettp TMP0, TMP0 ++ | addl TMP0, TMP1, TMP0 // if itype of RA == type, then TMP0=0 ++ | bne TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ case BC_ISNUM: ++ | // RA = src*8, RD = -(TISNUM-1)*8 ++ | addl BASE, RA, TMP0 ++ | ldl TMP0, 0(TMP0) ++ | checknum TMP0, ->vmeta_istype ++ | ins_next ++ break; ++ ++ /* -- Unary ops --------------------------------------------------------- */ ++ ++ case BC_MOV: ++ | // RA = dst*8, RD = src*8 ++ | addl RD, BASE, RD ++ | addl RA, BASE, RA ++ | ldl TMP0, 0(RD) ++ | ins_next1 ++ | stl TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_NOT: ++ | // RA = dst*8, RD = src*8 ++ | addl RD, BASE, RD ++ | addl RA, BASE, RA ++ | ldl TMP0, 0(RD) ++ | ldi TMP1, LJ_TTRUE(zero) ++ | ins_next1 ++ | gettp TMP0, TMP0 ++ | cmpult TMP1, TMP0, TMP0 ++ | addwi TMP0, 1, TMP0 ++ | slli TMP0, 47, TMP0 ++ | ornot zero, TMP0, TMP0 // ~TMP0 ++ | stl TMP0, 0(RA) ++ | ins_next2 ++ break; ++ case BC_UNM: ++ | // RA = dst*8, RD = src*8 ++ | addl BASE, RD, RB ++ | addl BASE, RA, RA ++ | ldl TMP0, 0(RB) ++ | ldih TMP1, -32768(zero) ++ | gettp CARG3, TMP0 ++ | ldi TISNUM, LJ_TISNUM(zero); cmpeq CARG3, TISNUM, AT; beq AT, >1 ++ | subw zero, TMP0, TMP0 ++ | cmpeq TMP0, TMP1, AT; bne AT, ->vmeta_unm // Meta handler deals with -2^31. ++ | zapi TMP0, 0xf0, TMP0 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp TMP0, TISNUM ++ | br zero, >2 ++ |1: ++ | ldi AT, LJ_TISNUM(zero); cmpult CARG3, AT, TMP3 ++ | slli TMP1, 32, TMP1 ++ | beq TMP3, ->vmeta_unm ++ | xor TMP0, TMP1, TMP0 // sign => ~sign ++ |2: ++ | stl TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_LEN: ++ | // RA = dst*8, RD = src*8 ++ | addl BASE, RD, CARG2 ++ | ldl TMP0, 0(CARG2) ++ | addl BASE, RA, RA ++ | gettp TMP1, TMP0 ++ | ldi TMP2, -LJ_TSTR(TMP1) ++ | cleartp STR:CARG1, TMP0 ++ | bne TMP2, >2 ++ | ldw CRET1, STR:CARG1->len ++ |1: ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp CRET1, TISNUM ++ | stl CRET1, 0(RA) ++ | ins_next ++ |2: ++ | ldi TMP2, -LJ_TTAB(TMP1) ++ | bne TMP2, ->vmeta_len ++#if LJ_52 ++ | ldl TAB:TMP2, TAB:CARG1->metatable ++ | bne TAB:TMP2, >9 ++ |3: ++#endif ++ |->BC_LEN_Z: ++ | load_got lj_tab_len ++ | call_intern lj_tab_len // (GCtab *t) ++ | // Returns uint32_t (but less than 2^31). ++ | br zero, <1 ++#if LJ_52 ++ |9: ++ | ldbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, 1<vmeta_len ++#endif ++ break; ++ ++ /* -- Binary ops -------------------------------------------------------- */ ++ ++ |.macro fpmod, b, c, a ++ | fdivd b, c, FCARG1 ++ | br ra, ->vm_floor // floor(b/c) ++ | fmuld FCRET1, c, a ++ | fsubd b, a, a // b - floor(b/c)*c ++ |.endmacro ++ | ++ |.macro ins_arithpre ++ ||vk = ((int)op - BC_ADDVN) / (BC_ADDNV-BC_ADDVN); ++ | // RA = dst*8, RB = src1*8, RC = src2*8 | num_const*8 ++ ||if (vk == 1) { ++ | // RA = dst*8, RB = num_const*8, RC = src1*8 ++ | decode_RB RC, INS ++ | decode_RDtoRC8 RB, RD ++ ||} else { ++ | // RA = dst*8, RB = src1*8, RC = num_const*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ ||} ++ ||switch (vk) { ++ ||case 0: // suffix is VN ++ | addl RB, BASE, RB ++ | addl RC, KBASE, RC ++ || break; ++ ||case 1: // suffix is NV ++ | addl RC, BASE, RC ++ | addl RB, KBASE, RB ++ || break; ++ ||default: // CAT or suffix is VV ++ | addl RB, BASE, RB ++ | addl RC, BASE, RC ++ || break; ++ ||} ++ |.endmacro ++ | ++ |.macro ins_arithfp, fpins, itype1, itype2 ++ | fldd FTMP0, 0(RB) ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpult itype1, TISNUM, itype1 ++ | cmpult itype2, TISNUM, itype2 ++ | fldd FTMP2, 0(RC) ++ | and itype1, itype2, itype1 ++ | addl RA, BASE, RA ++ | beq itype1, ->vmeta_arith ++ | fpins FTMP0, FTMP2, FCRET1 ++ | ins_next1 ++ | fstd FCRET1, 0(RA) ++ | ins_next2 ++ |.endmacro ++ | ++ |.macro ins_arithead, itype1, itype2, tval1, tval2 ++ | ldl tval1, 0(RB) ++ | ldl tval2, 0(RC) ++ | // Check for two integers. ++ | gettp itype1, tval1 ++ | gettp itype2, tval2 ++ |.endmacro ++ | ++ |.macro ins_arithdn, intins, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP0, TISNUM, AT ++ | beq AT, >1 ++ | cmpeq TMP1, TISNUM, AT ++ | beq AT, >1 ++ | addwi CARG1, 0, CARG3 ++ | addwi CARG2, 0, CARG4 ++ |.if "intins" == "addw" ++ | intins CARG3, CARG4, CRET1 ++ | xor CRET1, CARG3, TMP1 // ((y^a) & (y^b)) < 0: overflow. ++ | xor CRET1, CARG4, TMP2 ++ | and TMP1, TMP2, TMP1 ++ | addl RA, BASE, RA ++ | cmplt TMP1, zero, AT ++ | bne AT, ->vmeta_arith ++ |.elif "intins" == "subw" ++ | intins CARG3, CARG4, CRET1 ++ | xor CRET1, CARG3, TMP1 // ((y^a) & (a^b)) < 0: overflow. ++ | xor CARG3, CARG4, TMP2 ++ | and TMP1, TMP2, TMP1 ++ | addl RA, BASE, RA ++ | cmplt TMP1, zero, AT ++ | bne AT, ->vmeta_arith ++ |.elif "intins" == "mulw" //TODO CHECK ++ | mulw CARG3, CARG4, CRET1 ++ | mull CARG3, CARG4, TMP2 ++ | zapi TMP2, 0xf, TMP2 ++ | addwi CRET1, 0, CRET1 ++ | srai CRET1, 31, TMP1 // 63-32bit not all 0 or 1: overflow. ++ | addl RA, BASE, RA ++ | cmpeq TMP1, TMP2, AT ++ | beq AT, ->vmeta_arith ++ |.endif ++ | zapi CRET1, 0xf0, CRET1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp CRET1, TISNUM ++ | stl CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithdiv, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ | ++ |.macro ins_arithmod, fpins ++ | ins_arithpre ++ | ins_arithead TMP0, TMP1, CARG1, CARG2 ++ | load_got lj_vm_modi ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP0, TISNUM, AT ++ | beq AT, >1 ++ | cmpeq TMP1, TISNUM, AT ++ | beq AT, >1 ++ | addwi CARG1, 0, CARG1 ++ | addwi CARG2, 0, CARG2 ++ | addl RA, BASE, RA ++ | beq CARG2, ->vmeta_arith ++ | call_intern lj_vm_modi ++ | zapi CRET1, 0xf0, CRET1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp CRET1, TISNUM ++ | stl CRET1, 0(RA) ++ | ins_next ++ |1: // Check for two numbers. ++ | ins_arithfp, fpins, TMP0, TMP1 ++ |.endmacro ++ ++ case BC_ADDVN: case BC_ADDNV: case BC_ADDVV: ++ | ins_arithdn addw, faddd ++ break; ++ case BC_SUBVN: case BC_SUBNV: case BC_SUBVV: ++ | ins_arithdn subw, fsubd ++ break; ++ case BC_MULVN: case BC_MULNV: case BC_MULVV: ++ | ins_arithdn mulw, fmuld ++ break; ++ case BC_DIVVN: case BC_DIVNV: case BC_DIVVV: ++ | ins_arithdiv fdivd ++ break; ++ case BC_MODVN: case BC_MODNV: case BC_MODVV: ++ | ins_arithmod fpmod ++ break; ++ case BC_POW: ++ | ins_arithpre ++ | ldl CARG1, 0(RB) ++ | ldl CARG2, 0(RC) ++ | gettp TMP0, CARG1 ++ | gettp TMP1, CARG2 ++ | // cmpulti TMP0, LJ_TISNUM, TMP0 ++ | // cmpulti TMP1, LJ_TISNUM, TMP1 ++ | ldi AT, LJ_TISNUM(zero) ++ | cmpult TMP0, AT, TMP0 ++ | cmpult TMP1, AT, TMP1 ++ | and TMP0, TMP1, TMP0 ++ | addl RA, BASE, RA ++ | load_got pow ++ | beq TMP0, ->vmeta_arith ++ | fldd FCARG1, 0(RB) ++ | fldd FCARG2, 0(RC) ++ | call_extern ++ | ins_next1 ++ | fstd FCRET1, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_CAT: ++ | // RA = dst*8, RB = src_start*8, RC = src_end*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | subl RC, RB, CARG3 ++ | stl BASE, L->base ++ | addl BASE, RC, CARG2 ++ | bis RB, zero, MULTRES ++ |->BC_CAT_Z: ++ | load_got lj_meta_cat ++ | zapi CARG3, 0xf0, CARG3 ++ | srli CARG3, 3, CARG3 ++ | stl PC, SAVE_PC(sp) ++ | bis L, zero, CARG1 ++ | call_intern lj_meta_cat // (lua_State *L, TValue *top, int left) ++ | // Returns NULL (finished) or TValue * (metamethod). ++ | ldl BASE, L->base ++ | bne CRET1, ->vmeta_binop ++ | addl BASE, MULTRES, RB ++ | ldl TMP0, 0(RB) ++ | addl RA, BASE, RA ++ | stl TMP0, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Constant ops ------------------------------------------------------ */ ++ ++ case BC_KSTR: ++ | // RA = dst*8, RD = str_const*8 (~) ++ | subl KBASE, RD, TMP1 ++ | ldi TMP2, LJ_TSTR(zero) ++ | addwi TMP2, 0, TMP2 ++ | ldl TMP0, -8(TMP1) // KBASE-8-str_const*8 ++ | addl RA, BASE, RA ++ | settp TMP0, TMP2 ++ | stl TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KCDATA: ++ |.if FFI ++ | // RA = dst*8, RD = cdata_const*8 (~) ++ | subl KBASE, RD, TMP1 ++ | ldl TMP0, -8(TMP1) // KBASE-8-cdata_const*8 ++ | ldi TMP2, LJ_TCDATA(zero) ++ | addwi TMP2, 0, TMP2 ++ | addl RA, BASE, RA ++ | settp TMP0, TMP2 ++ | stl TMP0, 0(RA) ++ | ins_next ++ |.endif ++ break; ++ case BC_KSHORT: ++ | // RA = dst*8, RD = int16_literal*8 ++ | addwi INS, 0, RD ++ | srai RD, 16, RD ++ | addl RA, BASE, RA ++ | zapi RD, 0xf0, RD ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp RD, TISNUM ++ | stl RD, 0(RA) ++ | ins_next ++ break; ++ case BC_KNUM: ++ | // RA = dst*8, RD = num_const*8 ++ | addl RD, KBASE, RD ++ | addl RA, BASE, RA ++ | ldl TMP0, 0(RD) ++ | stl TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KPRI: ++ | // RA = dst*8, RD = primitive_type*8 (~) ++ | addl RA, BASE, RA ++ | slli RD, 44, TMP0 // 44+3 ++ | ornot zero, TMP0, TMP0 // ~TMP0 ++ | stl TMP0, 0(RA) ++ | ins_next ++ break; ++ case BC_KNIL: ++ | // RA = base*8, RD = end*8 ++ | addl RA, BASE, RA ++ | stl TISNIL, 0(RA) ++ | ldi RA, 8(RA) ++ | addl RD, BASE, RD ++ |1: ++ | stl TISNIL, 0(RA) ++ | cmplt RA, RD, TMP0 ++ | ldi RA, 8(RA) ++ | bne TMP0, <1 ++ | ins_next ++ break; ++ ++ /* -- Upvalue and function ops ------------------------------------------ */ ++ ++ case BC_UGET: ++ | // RA = dst*8, RD = uvnum*8 ++ | ldl LFUNC:TMP0, FRAME_FUNC(BASE) ++ | addl RA, BASE, RA ++ | cleartp LFUNC:TMP0 ++ | addl RD, LFUNC:TMP0, RD ++ | ldl UPVAL:TMP0, LFUNC:RD->uvptr ++ | ldl TMP1, UPVAL:TMP0->v ++ | ldl TMP2, 0(TMP1) ++ | ins_next1 ++ | stl TMP2, 0(RA) ++ | ins_next2 ++ break; ++ case BC_USETV: ++ | // RA = uvnum*8, RD = src*8 ++ | ldl LFUNC:TMP0, FRAME_FUNC(BASE) ++ | addl RD, BASE, RD ++ | cleartp LFUNC:TMP0 ++ | addl RA, LFUNC:TMP0, RA ++ | ldl UPVAL:TMP0, LFUNC:RA->uvptr ++ | ldl CRET1, 0(RD) ++ | ldbu TMP3, UPVAL:TMP0->marked ++ | ldl CARG2, UPVAL:TMP0->v ++ | andi TMP3, LJ_GC_BLACK, TMP3 // isblack(uv) ++ | ldbu TMP0, UPVAL:TMP0->closed ++ | gettp TMP2, CRET1 ++ | stl CRET1, 0(CARG2) ++ | bis TMP3, TMP0, TMP3 ++ | ldi TMP0, LJ_GC_BLACK|1(zero) ++ | ldi TMP2, -(LJ_TNUMX+1)(TMP2) ++ | cmpeq TMP3, TMP0, AT ++ | bne AT, >2 // Upvalue is closed and black? ++ |1: ++ | ins_next ++ | ++ |2: // Check if new value is collectable. ++ | // cmpulti TMP2, LJ_TISGCV - (LJ_TNUMX+1), TMP0 ++ | ldi TMP0, (LJ_TISGCV-(LJ_TNUMX+1))(zero) ++ | cmpult TMP2, TMP0, TMP0 ++ | cleartp GCOBJ:CRET1, CRET1 ++ | beq TMP0, <1 // tvisgcv(v) ++ | ldbu TMP3, GCOBJ:CRET1->gch.marked ++ | andi TMP3, LJ_GC_WHITES, TMP3 // iswhite(v) ++ | load_got lj_gc_barrieruv ++ | beq TMP3, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | ldi CARG1, GG_DISP2G(DISPATCH) ++ | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | br zero, <1 ++ break; ++ case BC_USETS: ++ | // RA = uvnum*8, RD = str_const*8 (~) ++ | ldl LFUNC:TMP0, FRAME_FUNC(BASE) ++ | subl KBASE, RD, TMP1 ++ | cleartp LFUNC:TMP0 ++ | addl RA, LFUNC:TMP0, RA ++ | ldl UPVAL:TMP0, LFUNC:RA->uvptr ++ | ldl STR:TMP1, -8(TMP1) // KBASE-8-str_const*8 ++ | ldbu TMP2, UPVAL:TMP0->marked ++ | ldl CARG2, UPVAL:TMP0->v ++ | ldbu TMP3, STR:TMP1->marked ++ | andi TMP2, LJ_GC_BLACK, TMP4 // isblack(uv) ++ | ldbu TMP2, UPVAL:TMP0->closed ++ | ldi TMP0, LJ_TSTR(zero) ++ | settp TMP1, TMP0 ++ | stl TMP1, 0(CARG2) ++ | bne TMP4, >2 ++ |1: ++ | ins_next ++ | ++ |2: // Check if string is white and ensure upvalue is closed. ++ | andi TMP3, LJ_GC_WHITES, TMP0 // iswhite(str) ++ | beq TMP2, <1 ++ | load_got lj_gc_barrieruv ++ | beq TMP0, <1 ++ | // Crossed a write barrier. Move the barrier forward. ++ | ldi CARG1, GG_DISP2G(DISPATCH) ++ | call_intern lj_gc_barrieruv // (global_State *g, TValue *tv) ++ | br zero, <1 ++ break; ++ case BC_USETN: ++ | // RA = uvnum*8, RD = num_const*8 ++ | ldl LFUNC:TMP0, FRAME_FUNC(BASE) ++ | addl RD, KBASE, RD ++ | cleartp LFUNC:TMP0 ++ | addl RA, LFUNC:TMP0, TMP0 ++ | ldl UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ldl TMP1, 0(RD) ++ | ldl TMP0, UPVAL:TMP0->v ++ | stl TMP1, 0(TMP0) ++ | ins_next ++ break; ++ case BC_USETP: ++ | // RA = uvnum*8, RD = primitive_type*8 (~) ++ | ldl LFUNC:TMP0, FRAME_FUNC(BASE) ++ | slli RD, 44, TMP2 ++ | cleartp LFUNC:TMP0 ++ | addl RA, LFUNC:TMP0, TMP0 ++ | ornot zero, TMP2, TMP2 // ~TMP2 ++ | ldl UPVAL:TMP0, LFUNC:TMP0->uvptr ++ | ldl TMP1, UPVAL:TMP0->v ++ | stl TMP2, 0(TMP1) ++ | ins_next ++ break; ++ ++ case BC_UCLO: ++ | // RA = level*8, RD = target ++ | ldl TMP2, L->openupval ++ | branch_RD // Do this first since RD is not saved. ++ | load_got lj_func_closeuv ++ | stl BASE, L->base ++ | bis L, zero, CARG1 ++ | beq TMP2, >1 ++ | addl BASE, RA, CARG2 ++ | call_intern lj_func_closeuv // (lua_State *L, TValue *level) ++ | ldl BASE, L->base ++ |1: ++ | ins_next ++ break; ++ ++ case BC_FNEW: ++ | // RA = dst*8, RD = proto_const*8 (~) (holding function prototype) ++ | load_got lj_func_newL_gc ++ | subl KBASE, RD, TMP1 ++ | ldl CARG3, FRAME_FUNC(BASE) ++ | ldl CARG2, -8(TMP1) // KBASE-8-tab_const*8 ++ | stl BASE, L->base ++ | stl PC, SAVE_PC(sp) ++ | cleartp CARG3 ++ | bis L, zero, CARG1 ++ | // (lua_State *L, GCproto *pt, GCfuncL *parent) ++ | call_intern lj_func_newL_gc ++ | // Returns GCfuncL *. ++ | ldi TMP0, LJ_TFUNC(zero) ++ | ldl BASE, L->base ++ | settp CRET1, TMP0 ++ | addl RA, BASE, RA ++ | stl CRET1, 0(RA) ++ | ins_next ++ break; ++ ++ /* -- Table ops --------------------------------------------------------- */ ++ ++ case BC_TNEW: ++ case BC_TDUP: ++ | // RA = dst*8, RD = (hbits|asize)*8 | tab_const*8 (~) ++ | ldl TMP0, DISPATCH_GL(gc.total)(DISPATCH) ++ | ldl TMP1, DISPATCH_GL(gc.threshold)(DISPATCH) ++ | stl BASE, L->base ++ | cmpult TMP0, TMP1, TMP2 ++ | stl PC, SAVE_PC(sp) ++ | beq TMP2, >5 ++ |1: ++ if (op == BC_TNEW) { ++ | load_got lj_tab_new ++ | zapi RD, 0xf0, CARG2 ++ | srli CARG2, 3, CARG2 ++ | ldi AT, 0x7ff(zero) ++ | and CARG2, AT, CARG2 ++ | ldi TMP0, 0x801(zero) ++ | subw CARG2, AT, TMP2 ++ | zapi RD, 0xf0, CARG3 ++ | srli CARG3, 14, CARG3 ++ | seleq TMP2, TMP0, CARG2, CARG2 ++ | // (lua_State *L, int32_t asize, uint32_t hbits) ++ | bis L, zero, CARG1 ++ | call_intern lj_tab_new ++ | // Returns Table *. ++ } else { ++ | load_got lj_tab_dup ++ | subl KBASE, RD, TMP1 ++ | bis L, zero, CARG1 ++ | ldl CARG2, -8(TMP1) // KBASE-8-str_const*8 ++ | call_intern lj_tab_dup // (lua_State *L, Table *kt) ++ | // Returns Table *. ++ } ++ | ldi TMP0, LJ_TTAB(zero) ++ | ldl BASE, L->base ++ | ins_next1 ++ | settp CRET1, TMP0 ++ | addl RA, BASE, RA ++ | stl CRET1, 0(RA) ++ | ins_next2 ++ |5: ++ | load_got lj_gc_step_fixtop ++ | bis RD, zero, MULTRES ++ | bis L, zero, CARG1 ++ | call_intern lj_gc_step_fixtop // (lua_State *L) ++ | bis MULTRES, zero, RD ++ | br zero, <1 ++ break; ++ ++ case BC_GGET: ++ | // RA = dst*8, RD = str_const*8 (~) ++ case BC_GSET: ++ | // RA = src*8, RD = str_const*8 (~) ++ | ldl LFUNC:TMP0, FRAME_FUNC(BASE) ++ | subl KBASE, RD, TMP1 ++ | ldl STR:RC, -8(TMP1) // KBASE-8-str_const*8 ++ | cleartp LFUNC:TMP0 ++ | ldl TAB:RB, LFUNC:TMP0->env ++ | addl RA, BASE, RA ++ if (op == BC_GGET) { ++ | br zero, ->BC_TGETS_Z ++ } else { ++ | br zero, ->BC_TSETS_Z ++ } ++ break; ++ ++ case BC_TGETV: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | addl BASE, RB, CARG2 ++ | addl BASE, RC, CARG3 ++ | ldl TAB:RB, 0(CARG2) ++ | ldl TMP2, 0(CARG3) ++ | addl RA, BASE, RA ++ | checktab TAB:RB, ->vmeta_tgetv ++ | gettp TMP3, TMP2 ++ | ldw TMP0, TAB:RB->asize ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq TMP3, TISNUM, AT ++ | beq AT, >5 // Integer key? ++ | addwi TMP2, 0, TMP2 ++ | ldl TMP1, TAB:RB->array ++ | cmpult TMP2, TMP0, TMP3 //array part (keys = [0, asize-1]) ++ | s8addwi TMP2, 0, TMP2 ++ | beq TMP3, ->vmeta_tgetv // Integer key and in array part? ++ | addl TMP2, TMP1, TMP2 ++ | ldl AT, 0(TMP2) ++ | ldl CRET1, 0(TMP2) ++ | cmpeq AT, TISNIL, AT ++ | bne AT, >2 ++ |1: ++ | ins_next1 ++ | stl CRET1, 0(RA) ++ | ins_next2 ++ | ++ |2: // Check for __index if table value is nil. ++ | ldl TAB:TMP2, TAB:RB->metatable ++ | beq TAB:TMP2, <1 // No metatable: done. ++ | ldbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, 1<vmeta_tgetv ++ | ++ |5: ++ | ldi TMP0, LJ_TSTR(zero) ++ | cleartp RC, TMP2 ++ | cmpeq TMP3, TMP0, AT ++ | beq AT, ->vmeta_tgetv // String key? ++ | br zero, ->BC_TGETS_Z ++ break; ++ case BC_TGETS: ++ | // RA = dst*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD //TODO CHECK ++ | addl BASE, RB, CARG2 ++ | subl KBASE, RC, CARG3 ++ | ldl TAB:RB, 0(CARG2) ++ | addl RA, BASE, RA ++ | ldl STR:RC, -8(CARG3) // KBASE-8-str_const*8 ++ | checktab TAB:RB, ->vmeta_tgets1 ++ |->BC_TGETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = dst*8 ++ | ldw TMP0, TAB:RB->hmask ++ | ldw TMP1, STR:RC->hash ++ | ldl NODE:TMP2, TAB:RB->node ++ | and TMP1, TMP0, TMP1 // idx = str->hash & tab->hmask ++ | slli TMP1, 5, TMP0 ++ | addwi TMP0, 0, TMP0 ++ | s8addwi TMP1, 0, TMP1 ++ | subw TMP0, TMP1, TMP1 ++ | ldi TMP3, LJ_TSTR(zero) ++ | addl NODE:TMP2, TMP1, NODE:TMP2 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ |1: ++ | ldl CARG1, NODE:TMP2->key ++ | ldl CRET1, NODE:TMP2->val ++ | ldl NODE:TMP1, NODE:TMP2->next ++ | ldl TAB:TMP3, TAB:RB->metatable ++ | cmpeq CARG1, RC, TMP4 ++ | beq TMP4, >4 ++ | cmpeq CRET1, TISNIL, TMP4 ++ | bne TMP4, >5 // Key found, but nil value? ++ |3: ++ | ins_next1 ++ | stl CRET1, 0(RA) ++ | ins_next2 ++ | ++ |4: // Follow hash chain. ++ | bis NODE:TMP1, zero, NODE:TMP2 ++ | bne NODE:TMP1, <1 ++ | // End of hash chain: key not found, nil result. ++ | ++ |5: // Check for __index if table value is nil. ++ | bis TISNIL, zero, CRET1 ++ | beq TAB:TMP3, <3 // No metatable: done. ++ | ldbu TMP0, TAB:TMP3->nomm ++ | andi TMP0, 1<vmeta_tgets ++ break; ++ case BC_TGETB: ++ | // RA = dst*8, RB = table*8, RC = index*8 ++ | decode_RB RB, INS ++ | addl BASE, RB, CARG2 ++ | decode_RDtoRC8 RC, RD ++ | ldl TAB:RB, 0(CARG2) ++ | addl RA, BASE, RA ++ | zapi RC, 0xf0, TMP0 ++ | srli TMP0, 3, TMP0 ++ | checktab TAB:RB, ->vmeta_tgetb ++ | ldw TMP1, TAB:RB->asize ++ | ldl TMP2, TAB:RB->array ++ | cmpult TMP0, TMP1, TMP1 ++ | addl RC, TMP2, RC ++ | beq TMP1, ->vmeta_tgetb ++ | ldl CRET1, 0(RC) ++ | cmpeq CRET1, TISNIL, AT ++ | bne AT, >5 ++ |1: ++ | ins_next1 ++ | stl CRET1, 0(RA) ++ | ins_next2 ++ | ++ |5: // Check for __index if table value is nil. ++ | ldl TAB:TMP2, TAB:RB->metatable ++ | beq TAB:TMP2, <1 // No metatable: done. ++ | ldbu TMP1, TAB:TMP2->nomm ++ | andi TMP1, 1<vmeta_tgetb // Caveat: preserve TMP0 and CARG2! ++ break; ++ case BC_TGETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | addl RB, BASE, RB ++ | addl RC, BASE, RC ++ | ldl TAB:CARG1, 0(RB) ++ | ldw CARG2, 0(RC) ++ | addl RA, BASE, RA ++ | cleartp TAB:CARG1 ++ | ldw TMP0, TAB:CARG1->asize ++ | ldl TMP1, TAB:CARG1->array ++ | cmpult CARG2, TMP0, TMP0 ++ | s8addwi CARG2, 0, TMP2 ++ | addl TMP1, TMP2, CRET1 ++ | beq TMP0, ->vmeta_tgetr // In array part? ++ | ldl CARG2, 0(CRET1) ++ |->BC_TGETR_Z: ++ | ins_next1 ++ | stl CARG2, 0(RA) ++ | ins_next2 ++ break; ++ ++ case BC_TSETV: ++ | // RA = src*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | addl BASE, RB, CARG2 ++ | addl BASE, RC, CARG3 ++ | ldl RB, 0(CARG2) ++ | ldl TMP2, 0(CARG3) ++ | addl RA, BASE, RA ++ | checktab RB, ->vmeta_tsetv ++ | addwi TMP2, 0, RC ++ | checkint TMP2, >5 ++ | ldw TMP0, TAB:RB->asize ++ | ldl TMP1, TAB:RB->array ++ | cmpult RC, TMP0, TMP0 ++ | s8addwi RC, 0, TMP2 ++ | beq TMP0, ->vmeta_tsetv // Integer key and in array part? ++ | addl TMP1, TMP2, TMP1 ++ | ldbu TMP3, TAB:RB->marked ++ | ldl TMP0, 0(TMP1) ++ | ldl CRET1, 0(RA) ++ | cmpeq TMP0, TISNIL, AT ++ | bne AT, >3 ++ |1: ++ | andi TMP3, LJ_GC_BLACK, TMP2 // isblack(table) ++ | stl CRET1, 0(TMP1) ++ | bne TMP2, >7 ++ |2: ++ | ins_next ++ | ++ |3: // Check for __newindex if previous value is nil. ++ | ldl TAB:TMP2, TAB:RB->metatable ++ | beq TAB:TMP2, <1 // No metatable: done. ++ | ldbu TMP2, TAB:TMP2->nomm ++ | andi TMP2, 1<vmeta_tsetv ++ |5: ++ | gettp TMP0, TMP2 ++ | ldi TMP0, -LJ_TSTR(TMP0) ++ | bne TMP0, ->vmeta_tsetv ++ | cleartp STR:RC, TMP2 ++ | br zero, ->BC_TSETS_Z // String key? ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETS: ++ | // RA = src*8, RB = table*8, RC = str_const*8 (~) ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | addl BASE, RB, CARG2 ++ | subl KBASE, RC, CARG3 ++ | ldl TAB:RB, 0(CARG2) ++ | ldl RC, -8(CARG3) // KBASE-8-str_const*8 ++ | addl RA, BASE, RA ++ | cleartp STR:RC ++ | checktab TAB:RB, ->vmeta_tsets1 ++ |->BC_TSETS_Z: ++ | // TAB:RB = GCtab *, STR:RC = GCstr *, RA = BASE+src*8 ++ | ldw TMP0, TAB:RB->hmask ++ | ldw TMP1, STR:RC->hash ++ | ldl NODE:TMP2, TAB:RB->node ++ | stb zero, TAB:RB->nomm // Clear metamethod cache. ++ | and TMP1, TMP0, TMP1 // idx = str->hash & tab->hmask ++ | slli TMP1, 5, TMP0 ++ | addwi TMP0, 0, TMP0 ++ | s8addwi TMP1, 0, TMP1 ++ | subw TMP0, TMP1, TMP1 ++ | ldi TMP3, LJ_TSTR(zero) ++ | addl NODE:TMP2, TMP1, NODE:TMP2 // node = tab->node + (idx*32-idx*8) ++ | settp STR:RC, TMP3 // Tagged key to look for. ++ | fldd f9, 0(RA) ++ |1: ++ | ldl TMP0, NODE:TMP2->key ++ | ldl CARG2, NODE:TMP2->val ++ | ldl NODE:TMP1, NODE:TMP2->next ++ | ldbu TMP3, TAB:RB->marked ++ | cmpeq TMP0, RC, AT ++ | beq AT, >5 ++ | ldl TAB:TMP0, TAB:RB->metatable ++ | cmpeq CARG2, TISNIL, AT ++ | bne AT, >4 // Key found, but nil value? ++ |2: ++ | andi TMP3, LJ_GC_BLACK, TMP3 // isblack(table) ++ | fstd f9, NODE:TMP2->val ++ | bne TMP3, >7 ++ |3: ++ | ins_next ++ | ++ |4: // Check for __newindex if previous value is nil. ++ | beq TAB:TMP0, <2 // No metatable: done. ++ | ldbu TMP0, TAB:TMP0->nomm ++ | andi TMP0, 1<vmeta_tsets ++ | ++ |5: // Follow hash chain. ++ | bis NODE:TMP1, zero, NODE:TMP2 ++ | bne NODE:TMP1, <1 ++ | // End of hash chain: key not found, add a new one ++ | ++ | // But check for __newindex first. ++ | ldl TAB:TMP2, TAB:RB->metatable ++ | ldi CARG3, DISPATCH_GL(tmptv)(DISPATCH) ++ | beq TAB:TMP2, >6 // No metatable: continue. ++ | ldbu TMP0, TAB:TMP2->nomm ++ | andi TMP0, 1<vmeta_tsets // 'no __newindex' flag NOT set: check. ++ |6: ++ | load_got lj_tab_newkey ++ | stl RC, 0(CARG3) ++ | stl BASE, L->base ++ | bis TAB:RB, zero, CARG2 ++ | stl PC, SAVE_PC(sp) ++ | bis L, zero, CARG1 ++ | call_intern lj_tab_newkey // (lua_State *L, GCtab *t, TValue *k ++ | // Returns TValue *. ++ | ldl BASE, L->base ++ | fstd f9, 0(CRET1) ++ | br zero, <3 // No 2nd write barrier needed. ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <3 ++ break; ++ case BC_TSETB: ++ | // RA = src*8, RB = table*8, RC = index*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | addl BASE, RB, CARG2 ++ | addl RA, BASE, RA ++ | ldl TAB:RB, 0(CARG2) ++ | zapi RC, 0xf0, TMP0 ++ | srli TMP0, 3, TMP0 ++ | checktab RB, ->vmeta_tsetb ++ | ldw TMP1, TAB:RB->asize ++ | ldl TMP2, TAB:RB->array ++ | cmpult TMP0, TMP1, TMP1 ++ | addl RC, TMP2, RC ++ | beq TMP1, ->vmeta_tsetb ++ | ldl TMP1, 0(RC) ++ | ldbu TMP3, TAB:RB->marked ++ | cmpeq TMP1, TISNIL, AT ++ | bne AT, >5 ++ |1: ++ | ldl CRET1, 0(RA) ++ | andi TMP3, LJ_GC_BLACK, TMP1 // isblack(table) ++ | stl CRET1, 0(RC) ++ | bne TMP1, >7 ++ |2: ++ | ins_next ++ | ++ |5: // Check for __newindex if previous value is nil. ++ | ldl TAB:TMP2, TAB:RB->metatable ++ | beq TAB:TMP2, <1 // No metatable: done. ++ | ldbu TMP1, TAB:TMP2->nomm ++ | andi TMP1, 1<vmeta_tsetb // Caveat: preserve TMP0 and CARG2! ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:RB, TMP3, TMP0, <2 ++ break; ++ case BC_TSETR: ++ | // RA = dst*8, RB = table*8, RC = key*8 ++ | decode_RB RB, INS ++ | decode_RDtoRC8 RC, RD ++ | addl BASE, RB, CARG1 ++ | addl BASE, RC, CARG3 ++ | ldl TAB:CARG2, 0(CARG1) ++ | ldw CARG3, 0(CARG3) ++ | cleartp TAB:CARG2 ++ | ldbu TMP3, TAB:CARG2->marked ++ | ldw TMP0, TAB:CARG2->asize ++ | ldl TMP1, TAB:CARG2->array ++ | andi TMP3, LJ_GC_BLACK, TMP2 // isblack(table) ++ | addl RA, BASE, RA ++ | bne TMP2, >7 ++ |2: ++ | cmpult CARG3, TMP0, TMP0 ++ | s8addwi CARG3, 0, TMP2 ++ | addl TMP1, TMP2, CRET1 ++ | beq TMP0, ->vmeta_tsetr // In array part? ++ |->BC_TSETR_Z: ++ | ldl TMP1, 0(RA) ++ | ins_next1 ++ | stl TMP1, 0(CRET1) ++ | ins_next2 ++ | ++ |7: // Possible table write barrier for the value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, CRET1, <2 ++ break; ++ ++ case BC_TSETM: ++ | // RA = base*8 (table at base-1), RD = num_const*8 (start index) ++ | addl RA, BASE, RA ++ |1: ++ | addl KBASE, RD, TMP3 ++ | ldl TAB:CARG2, -8(RA) // Guaranteed to be a table. ++ | subwi MULTRES, 8, TMP0 ++ | ldw TMP3, 0(TMP3) // Integer constant is in lo-word. ++ | zapi TMP0, 0xf0, CARG3 ++ | srli CARG3, 3, CARG3 ++ | beq TMP0, >4 // Nothing to copy? ++ | cleartp TAB:CARG2 ++ | addw CARG3, TMP3, CARG3 ++ | ldw TMP2, TAB:CARG2->asize ++ | s8addwi TMP3, 0, TMP1 ++ | ldbu TMP3, TAB:CARG2->marked ++ | ldl CARG1, TAB:CARG2->array ++ | cmpult TMP2, CARG3, TMP4 ++ | addl TMP0, RA, TMP2 ++ | bne TMP4, >5 ++ | addl TMP1, CARG1, TMP1 ++ | andi TMP3, LJ_GC_BLACK, TMP0 // isblack(table) ++ |3: // Copy result slots to table. ++ | ldl CRET1, 0(RA) ++ | ldi RA, 8(RA) ++ | cmpult RA, TMP2, TMP4 ++ | stl CRET1, 0(TMP1) ++ | ldi TMP1, 8(TMP1) ++ | bne TMP4, <3 ++ | bne TMP0, >7 ++ |4: ++ | ins_next ++ | ++ |5: // Need to resize array part. ++ | load_got lj_tab_reasize ++ | stl BASE, L->base ++ | stl PC, SAVE_PC(sp) ++ | bis RD, zero, BASE ++ | bis L, zero, CARG1 ++ | call_intern lj_tab_reasize // (lua_State *L, GCtab *t, int nasize) ++ | // Must not reallocate the stack. ++ | bis BASE, zero, RD ++ | ldl BASE, L->base // Reload BASE for lack of a saved register. ++ | br zero, <1 ++ | ++ |7: // Possible table write barrier for any value. Skip valiswhite check. ++ | barrierback TAB:CARG2, TMP3, TMP0, <4 ++ break; ++ ++ /* -- Calls and vararg handling ----------------------------------------- */ ++ ++ case BC_CALLM: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = extra_nargs*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ | addw NARGS8:RC, MULTRES, NARGS8:RC ++ | br zero, ->BC_CALL_Z ++ break; ++ case BC_CALL: ++ | // RA = base*8, (RB = (nresults+1)*8,) RC = (nargs+1)*8 ++ | decode_RDtoRC8 NARGS8:RC, RD ++ |->BC_CALL_Z: ++ | bis BASE, zero, TMP2 ++ | addl BASE, RA, BASE ++ | ldl LFUNC:RB, 0(BASE) ++ | ldi BASE, 16(BASE) ++ | subwi NARGS8:RC, 8, NARGS8:RC ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_CALLMT: ++ | // RA = base*8, (RB = 0,) RC = extra_nargs*8 ++ | addw NARGS8:RD, MULTRES, NARGS8:RD ++ | br zero, ->BC_CALLT_Z1 ++ break; ++ case BC_CALLT: ++ | // RA = base*8, (RB = 0,) RC = (nargs+1)*8 ++ |->BC_CALLT_Z1: ++ | addl RA, BASE, RA ++ | ldl LFUNC:RB, 0(RA) ++ | bis RD, zero, NARGS8:RC ++ | ldl TMP1, FRAME_PC(BASE) ++ | ldi RA, 16(RA) ++ | subwi NARGS8:RC, 8, NARGS8:RC ++ | checktp CARG3, LFUNC:RB, -LJ_TFUNC, ->vmeta_callt ++ |->BC_CALLT_Z: ++ | andi TMP1, FRAME_TYPE, TMP0 // Caveat: preserve TMP0 until the 'or'. ++ | ldbu TMP3, LFUNC:CARG3->ffid ++ | xori TMP1, FRAME_VARG, TMP2 ++ | bne TMP0, >7 ++ |1: ++ | stl LFUNC:RB, FRAME_FUNC(BASE) // Copy function down, but keep PC. ++ | cmpulti TMP3, 2, CARG4 // (> FF_C) Calling a fast function? ++ | bis BASE, zero, TMP2 ++ | bis CARG3, zero, RB ++ | bis NARGS8:RC, zero, TMP3 ++ | beq NARGS8:RC, >3 ++ |2: ++ | ldl CRET1, 0(RA) ++ | ldi RA, 8(RA) ++ | subwi TMP3, 8, TMP3 ++ | stl CRET1, 0(TMP2) ++ | ldi TMP2, 8(TMP2) ++ | bne TMP3, <2 ++ |3: ++ | bis TMP0, CARG4, TMP0 ++ | beq TMP0, >5 ++ |4: ++ | ins_callt ++ | ++ |5: // Tailcall to a fast function with a Lua frame below. ++ | ldw INS, -4(TMP1) ++ | decode_RA RA, INS ++ | subl BASE, RA, TMP1 ++ | ldl TMP1, -32(TMP1) ++ | cleartp LFUNC:TMP1 ++ | ldl TMP1, LFUNC:TMP1->pc ++ | ldl KBASE, PC2PROTO(k)(TMP1) // Need to prepare KBASE. ++ | br zero, <4 ++ | ++ |7: // Tailcall from a vararg function. ++ | andi TMP2, FRAME_TYPEP, CARG4 ++ | subl BASE, TMP2, TMP2 // Relocate BASE down. ++ | bne CARG4, <1 // Vararg frame below? ++ | bis TMP2, zero, BASE ++ | ldl TMP1, FRAME_PC(TMP2) ++ | andi TMP1, FRAME_TYPE, TMP0 ++ | br zero, <1 ++ break; ++ ++ case BC_ITERC: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 ((2+1)*8)) ++ | bis BASE, zero, TMP2 // Save old BASE for vmeta_call. ++ | addl BASE, RA, BASE ++ | ldl RB, -24(BASE) //A, A+1, A+2 = A-3, A-2, A-1. ++ | ldl CARG1, -16(BASE) ++ | ldl CARG2, -8(BASE) ++ | ldi NARGS8:RC, 16(zero) // Iterators get 2 arguments. ++ | stl RB, 0(BASE) // Copy callable. ++ | stl CARG1, 16(BASE) // Copy state. ++ | stl CARG2, 24(BASE) // Copy control var. ++ | ldi BASE, 16(BASE) ++ | checkfunc RB, ->vmeta_call ++ | ins_call ++ break; ++ ++ case BC_ITERN: ++ | // RA = base*8, (RB = (nresults+1)*8, RC = (nargs+1)*8 (2+1)*8) ++ | addl RA, BASE, RA ++ | ldl TAB:RB, -16(RA) ++ | ldw RC, -8(RA) // Get index from control var. ++ | cleartp TAB:RB ++ | ldi PC, 4(PC) ++ | ldw TMP0, TAB:RB->asize ++ | ldl TMP1, TAB:RB->array ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | slli TISNUM, 47, CARG3 ++ |1: // Traverse array part. ++ | cmpult RC, TMP0, TMP2 ++ | s8addwi RC, 0, TMP3 ++ | beq TMP2, >5 // Index points after array part? ++ | addl TMP3, TMP1, TMP3 ++ | ldl CARG1, 0(TMP3) ++ | ldhu RD, -4+OFS_RD(PC) // ITERL RD ++ | bis RC, CARG3, TMP2 ++ | addwi RC, 1, RC ++ | cmpeq CARG1, TISNIL, AT ++ | bne AT, <1 // Skip holes in array part. ++ | stl TMP2, 0(RA) ++ | stl CARG1, 8(RA) ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | decode_BC4b RD ++ | addl RD, TMP3, RD ++ | stw RC, -8(RA) // Update control var. ++ | addl PC, RD, PC ++ |3: ++ | ins_next ++ | ++ |5: // Traverse hash part. ++ | ldw TMP1, TAB:RB->hmask ++ | subw RC, TMP0, RC ++ | ldl TMP2, TAB:RB->node ++ |6: ++ | cmpult TMP1, RC, CARG1 // End of iteration? Branch to ITERL+1. ++ | slli RC, 5, TMP3 ++ | addwi TMP3, 0, TMP3 ++ | bne CARG1, <3 ++ | s8addwi RC, 0, RB ++ | subw TMP3, RB, TMP3 ++ | addl TMP3, TMP2, NODE:TMP3 // node = tab->node + (idx*32-idx*8) ++ | ldl CARG1, 0(NODE:TMP3) ++ | ldhu RD, -4+OFS_RD(PC) // ITERL RD ++ | addwi RC, 1, RC ++ | cmpeq CARG1, TISNIL, AT ++ | bne AT, <6 // Skip holes in hash part. ++ | ldl CARG2, NODE:TMP3->key ++ | ldih TMP3, -0x2(zero) // -BCBIAS_J*4 ++ | stl CARG1, 8(RA) ++ | addw RC, TMP0, RC ++ | decode_BC4b RD ++ | addw RD, TMP3, RD ++ | stl CARG2, 0(RA) ++ | addl PC, RD, PC ++ | stw RC, -8(RA) // Update control var. ++ | br zero, <3 ++ break; ++ ++ case BC_ISNEXT: ++ | // RA = base*8, RD = target (points to ITERN) ++ | addl RA, BASE, RA ++ | zapi RD, 0xf0, TMP0 ++ | srli TMP0, 1, TMP0 ++ | ldl CFUNC:CARG1, -24(RA) ++ | addl TMP0, PC, TMP0 ++ | ldl CARG2, -16(RA) ++ | ldl CARG3, -8(RA) ++ | ldih TMP2, -0x2(zero) // -BCBIAS_J*4 ++ | checkfunc CFUNC:CARG1, >5 ++ | gettp CARG2, CARG2 ++ | ldi CARG2, -LJ_TTAB(CARG2) ++ | ldbu TMP1, CFUNC:CARG1->ffid ++ | ldi CARG3, -LJ_TNIL(CARG3) ++ | bis CARG2, CARG3, TMP3 ++ | ldi TMP1, -FF_next_N(TMP1) ++ | bis TMP3, TMP1, TMP3 ++ | ldih TMP1, 0x1(zero) ++ | ldi TMP1, -0x2(TMP1) //LJ_KEYINDEX ++ | bne TMP3, >5 ++ | addl TMP0, TMP2, PC ++ | slli TMP1, 16, TMP1 ++ | ldi TMP1, 0x7fff(TMP1) ++ | slli TMP1, 32, TMP1 //make TMP1=0xfffe7fff00000000 ++ | stl TMP1, -8(RA) ++ |1: ++ | ins_next ++ |5: // Despecialize bytecode if any of the checks fail. ++ | ldi TMP3, BC_JMP(zero) ++ | ldi TMP1, BC_ITERC(zero) ++ | stb TMP3, -4+OFS_OP(PC) ++ | addl TMP0, TMP2, PC ++ | stb TMP1, OFS_OP(PC) ++ | br zero, <1 ++ break; ++ ++ case BC_VARG: ++ | // RA = base*8, RB = (nresults+1)*8, RC = numparams*8 ++ | ldl TMP0, FRAME_PC(BASE) ++ | decode_RDtoRC8 RC, RD ++ | decode_RB RB, INS ++ | addl RC, BASE, RC ++ | addl RA, BASE, RA ++ | ldi RC, FRAME_VARG(RC) ++ | addl RA, RB, TMP2 ++ | ldi TMP3, -16(BASE) // TMP3 = vtop ++ | subl RC, TMP0, RC // RC = vbase ++ | // Note: RC may now be even _above_ BASE if nargs was < numparams. ++ | subl TMP3, RC, TMP1 ++ | beq RB, >5 // Copy all varargs? ++ | ldi TMP2, -16(TMP2) ++ |1: // Copy vararg slots to destination slots. ++ | ldl CARG1, 0(RC) ++ | cmpult RC, TMP3, TMP0 ++ | ldi RC, 8(RC) ++ | selne TMP0, CARG1, TISNIL, CARG1 ++ | stl CARG1, 0(RA) ++ | cmpult RA, TMP2, TMP0 ++ | ldi RA, 8(RA) ++ | bne TMP0, <1 ++ |3: ++ | ins_next ++ | ++ |5: // Copy all varargs. ++ | ldl TMP0, L->maxstack ++ | ldi MULTRES, 8(zero) // MULTRES = (0+1)*8 ++ | cmplt zero, TMP1, AT ++ | beq AT, <3 // No vararg slots? ++ | addl RA, TMP1, TMP2 ++ | cmpult TMP0, TMP2, TMP2 ++ | ldi MULTRES, 8(TMP1) ++ | bne TMP2, >7 ++ |6: ++ | ldl CRET1, 0(RC) ++ | ldi RC, 8(RC) ++ | stl CRET1, 0(RA) ++ | cmpult RC, TMP3, TMP0 ++ | ldi RA, 8(RA) ++ | bne TMP0, <6 // More vararg slots? ++ | br zero, <3 ++ | ++ |7: // Grow stack for varargs. ++ | load_got lj_state_growstack ++ | stl RA, L->top ++ | subl RA, BASE, RA ++ | stl BASE, L->base ++ | subl RC, BASE, BASE // Need delta, because BASE may change. ++ | stl PC, SAVE_PC(sp) ++ | zapi TMP1, 0xf0, CARG2 ++ | srli CARG2, 3, CARG2 ++ | bis L, zero, CARG1 ++ | call_intern lj_state_growstack // (lua_State *L, int n) ++ | bis BASE, zero, RC ++ | ldl BASE, L->base ++ | addl RA, BASE, RA ++ | addl RC, BASE, RC ++ | ldi TMP3, -16(BASE) ++ | br zero, <6 ++ break; ++ ++ /* -- Returns ----------------------------------------------------------- */ ++ ++ case BC_RETM: ++ | // RA = results*8, RD = extra_nresults*8 ++ | addw RD, MULTRES, RD ++ | br zero, ->BC_RET_Z1 ++ break; ++ ++ case BC_RET: ++ | // RA = results*8, RD = (nresults+1)*8 ++ |->BC_RET_Z1: ++ | ldl PC, FRAME_PC(BASE) ++ | addl RA, BASE, RA ++ | bis RD, zero, MULTRES ++ |1: ++ | andi PC, FRAME_TYPE, TMP0 ++ | xori PC, FRAME_VARG, TMP1 ++ | bne TMP0, ->BC_RETV_Z ++ | ++ |->BC_RET_Z: ++ | // BASE = base, RA = resultptr, RD = (nresults+1)*8, PC = return ++ | ldw INS, -4(PC) ++ | ldi TMP2, -16(BASE) ++ | ldi RC, -8(RD) ++ | decode_RA TMP0, INS ++ | decode_RB RB, INS ++ | addl TMP2, RB, TMP3 ++ | subl TMP2, TMP0, BASE ++ | beq RC, >3 ++ |2: ++ | ldl CRET1, 0(RA) ++ | ldi RA, 8(RA) ++ | ldi RC, -8(RC) ++ | stl CRET1, 0(TMP2) ++ | ldi TMP2, 8(TMP2) ++ | bne RC, <2 ++ |3: ++ | ldi TMP3, -8(TMP3) ++ |5: ++ | cmpult TMP2, TMP3, TMP0 ++ | ldl LFUNC:TMP1, FRAME_FUNC(BASE) ++ | bne TMP0, >6 ++ | cleartp LFUNC:TMP1 ++ | ldl TMP1, LFUNC:TMP1->pc ++ | ldl KBASE, PC2PROTO(k)(TMP1) ++ | ins_next ++ | ++ |6: // Fill up results with nil. ++ | stl TISNIL, 0(TMP2) ++ | ldi TMP2, 8(TMP2) ++ | br zero, <5 ++ | ++ |->BC_RETV_Z: // Non-standard return case. ++ | andi TMP1, FRAME_TYPEP, TMP2 ++ | bne TMP2, ->vm_return ++ | // Return from vararg function: relocate BASE down. ++ | subl BASE, TMP1, BASE ++ | ldl PC, FRAME_PC(BASE) ++ | br zero, <1 ++ break; ++ ++ case BC_RET0: case BC_RET1: ++ | // RA = results*8, RD = (nresults+1)*8 ++ | ldl PC, FRAME_PC(BASE) ++ | addl RA, BASE, RA ++ | bis RD, zero, MULTRES ++ | andi PC, FRAME_TYPE, TMP0 ++ | xori PC, FRAME_VARG, TMP1 ++ | bne TMP0, ->BC_RETV_Z ++ | ldw INS, -4(PC) ++ | ldi TMP2, -16(BASE) ++ if (op == BC_RET1) { ++ | ldl CRET1, 0(RA) ++ } ++ | decode_RB RB, INS ++ | decode_RA RA, INS ++ | subl TMP2, RA, BASE ++ if (op == BC_RET1) { ++ | stl CRET1, 0(TMP2) ++ } ++ |5: ++ | cmpult RD, RB, TMP0 ++ | ldl TMP1, FRAME_FUNC(BASE) ++ | bne TMP0, >6 ++ | cleartp LFUNC:TMP1 ++ | ldl TMP1, LFUNC:TMP1->pc ++ | ins_next1 ++ | ldl KBASE, PC2PROTO(k)(TMP1) ++ | ins_next2 ++ | ++ |6: // Fill up results with nil. ++ | ldi TMP2, 8(TMP2) ++ | ldi RD, 8(RD) ++ if (op == BC_RET1) { ++ | stl TISNIL, 0(TMP2) ++ } else { ++ | stl TISNIL, -8(TMP2) ++ } ++ | br zero, <5 ++ break; ++ ++ /* -- Loops and branches ------------------------------------------------ */ ++ ++ case BC_FORL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IFORL follows. ++ break; ++ ++ case BC_JFORI: ++ case BC_JFORL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_FORI: ++ case BC_IFORL: ++ | // RA = base*8, RD = target (after end of loop or start of loop) ++ vk = (op == BC_IFORL || op == BC_JFORL); ++ | addl RA, BASE, RA ++ | ldl CARG1, FORL_IDX*8(RA) // CARG1 = IDX ++ | ldl CARG2, FORL_STEP*8(RA) // CARG2 = STEP ++ | ldl CARG3, FORL_STOP*8(RA) // CARG3 = STOP ++ | gettp CARG4, CARG1 ++ | gettp CARG5, CARG2 ++ | gettp CRET2, CARG3 ++ if (op != BC_JFORL) { ++ | zapi RD, 0xf0, RD ++ | srli RD, 1, RD ++ | ldih TMP2, -0x2(zero) // -BCBIAS_J<<2 ++ | addl TMP2, RD, TMP2 ++ } ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CARG4, TISNUM, AT ++ | beq AT, >3 ++ | addwi CARG1, 0, CARG4 // start ++ | addwi CARG3, 0, CARG3 // stop ++ if (!vk) { // init ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | cmpeq CRET2, TISNUM, AT ++ | beq AT,->vmeta_for ++ | cmpeq CARG5, TISNUM, AT ++ | beq AT, ->vmeta_for ++ | .DEXTM TMP0, CARG2, 31, 1 // sign ++ | cmplt CARG3, CARG4, CARG2 ++ | cmplt CARG4, CARG3, TMP1 ++ | selne TMP0, TMP1, CARG2, CARG2 // CARG2=0: +,start <= stop or -,start >= stop ++ } else { ++ | addwi CARG2, 0, CARG5 // step ++ | addw CARG4, CARG5, CARG1 // start + step ++ | xor CARG1, CARG4, TMP3 // y^a ++ | xor CARG1, CARG5, TMP1 // y^b ++ | and TMP3, TMP1, TMP3 ++ | cmplt CARG1, CARG3, TMP1 // start+step < stop ? ++ | cmplt CARG3, CARG1, CARG3 // stop < start+step ? ++ | cmplt CARG5, zero, TMP0 // step < 0 ? ++ | cmplt TMP3, zero, TMP3 // ((y^a) & (y^b)) < 0: overflow. ++ | selne TMP0, TMP1, CARG3, CARG3 ++ | bis CARG3, TMP3, CARG2 // CARG2=1: overflow; CARG2=0: continue ++ | zapi CARG1, 0xf0, CARG1 ++ | ldi TISNUM, LJ_TISNUM(zero) ++ | settp CARG1, TISNUM ++ | stl CARG1, FORL_IDX*8(RA) ++ } ++ |1: ++ if (op == BC_FORI) { ++ | seleq CARG2, zero, TMP2, TMP2 // CARG2!=0: jump out the loop; CARG2==0: next INS ++ | addl PC, TMP2, PC ++ } else if (op == BC_JFORI) { ++ | addl PC, TMP2, PC ++ | ldhu RD, -4+OFS_RD(PC) ++ } else if (op == BC_IFORL) { ++ | selne CARG2, zero, TMP2, TMP2 // CARG2!=0: next INS; CARG2==0: jump back ++ | addl PC, TMP2, PC ++ } ++ | ins_next1 ++ | stl CARG1, FORL_EXT*8(RA) ++ |2: ++ if (op == BC_JFORI) { ++ | decode_BC8b RD ++ | beq CARG2, =>BC_JLOOP // CARG2 == 0: excute the loop ++ } else if (op == BC_JFORL) { ++ | beq CARG2, =>BC_JLOOP ++ } ++ | ins_next2 ++ | //TODO FCC ++ |3: // FP loop. ++ | fldd FTMP0, FORL_IDX*8(RA) // start ++ | fldd FTMP1, FORL_STOP*8(RA) // stop ++ | ldl TMP0, FORL_STEP*8(RA) // step ++ | cmplt TMP0, zero, TMP0 // step < 0 ? ++ | ifmovd TMP0, FTMP2 ++ if (!vk) { ++ | // cmpulti CARG4, LJ_TISNUM, TMP3 // start is number ? ++ | // cmpulti CARG5, LJ_TISNUM, TMP0 // step is number ? ++ | // cmpulti CARG6, LJ_TISNUM, TMP1 // stop is number ? ++ | ldi TMP1, LJ_TISNUM(zero) ++ | cmpult CARG4, TMP1, TMP3 ++ | cmpult CARG5, TMP1, TMP0 ++ | cmpult CRET2, TMP1, TMP1 ++ | and TMP3, TMP1, TMP3 ++ | and TMP0, TMP3, TMP0 ++ | beq TMP0, ->vmeta_for // if start or step or stop isn't number ++ | fcmplt FTMP0, FTMP1, FTMP3 // start < stop ? ++ | fcmplt FTMP1, FTMP0, FTMP4 // stop < start ? ++ | fseleq FTMP2, FTMP4, FTMP3, FTMP2 //TODO CHECK ++ | fimovd FTMP2, CARG2 // CARG2=0:+,startstop ++ | br zero, <1 ++ } else { ++ | fldd FTMP3, FORL_STEP*8(RA) ++ | faddd FTMP0, FTMP3, FTMP0 // start + step ++ | fcmplt FTMP0, FTMP1, FTMP3 // start + step < stop ? ++ | fcmplt FTMP1, FTMP0, FTMP4 ++ | fseleq FTMP2, FTMP4, FTMP3, FTMP2 ++ | fimovd FTMP2, CARG2 ++ if (op == BC_IFORL) { ++ | selne CARG2, zero, TMP2, TMP2 ++ | addl PC, TMP2, PC ++ } ++ | fstd FTMP0, FORL_IDX*8(RA) ++ | ins_next1 ++ | fstd FTMP0, FORL_EXT*8(RA) ++ | br zero, <2 ++ } ++ break; ++ ++ case BC_ITERL: ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_IITERL follows. ++ break; ++ ++ case BC_JITERL: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IITERL: ++ | // RA = base*8, RD = target ++ | addl RA, BASE, RA ++ | ldl TMP1, 0(RA) ++ | cmpeq TMP1, TISNIL, AT ++ | bne AT, >1 // Stop if iterator returned nil. ++ if (op == BC_JITERL) { ++ | stl TMP1,-8(RA) ++ | br zero, =>BC_JLOOP ++ } else { ++ | branch_RD // Otherwise save control var + branch. ++ | stl TMP1, -8(RA) ++ } ++ |1: ++ | ins_next ++ break; ++ ++ case BC_LOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | // Note: RA/RD is only used by trace recorder to determine scope/extent ++ | // This opcode does NOT jump, it's only purpose is to detect a hot loop. ++ |.if JIT ++ | hotloop ++ |.endif ++ | // Fall through. Assumes BC_ILOOP follows. ++ break; ++ ++ case BC_ILOOP: ++ | // RA = base*8, RD = target (loop extent) ++ | ins_next ++ break; ++ ++ case BC_JLOOP: ++ |.if JIT ++ | // RA = base*8 (ignored), RD = traceno*8 ++ | ldl TMP0, DISPATCH_J(trace)(DISPATCH) ++ | addl TMP0, RD, TMP0 ++ | // Traces on SW64 don't store the trace number, so use 0. ++ | stl zero, DISPATCH_GL(vmstate)(DISPATCH) ++ | ldl TRACE:TMP1, 0(TMP0) ++ | stl BASE, DISPATCH_GL(jit_base)(DISPATCH) // store Current JIT code L->base ++ | ldl TMP1, TRACE:TMP1->mcode ++ | ldi JGL, GG_DISP2G+32768(DISPATCH) ++ | stl L, DISPATCH_GL(tmpbuf.L)(DISPATCH) ++ | jmp zero, 0(TMP1) ++ |.endif ++ break; ++ ++ case BC_JMP: ++ | // RA = base*8 (only used by trace recorder), RD = target ++ | branch_RD // PC + (jump - 0x8000)<<2 ++ | ins_next ++ break; ++ ++ /* -- Function headers -------------------------------------------------- */ ++ ++ case BC_FUNCF: ++ |.if JIT ++ | hotcall ++ |.endif ++ case BC_FUNCV: /* NYI: compiled vararg functions. */ ++ | // Fall through. Assumes BC_IFUNCF/BC_IFUNCV follow. ++ break; ++ ++ case BC_JFUNCF: ++#if !LJ_HASJIT ++ break; ++#endif ++ case BC_IFUNCF: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | ldl TMP2, L->maxstack ++ | ldbu TMP1, -4+PC2PROTO(numparams)(PC) ++ | ldl KBASE, -4+PC2PROTO(k)(PC) ++ | cmpult TMP2, RA, TMP0 ++ | s8addwi TMP1, 0, TMP1 // numparams*8 ++ | bne TMP0, ->vm_growstack_l ++ |2: ++ | cmpult NARGS8:RC, TMP1, TMP0 // Check for missing parameters. ++ | bne TMP0, >3 ++ if (op == BC_JFUNCF) { ++ | decode_RD RD, INS ++ | br zero, =>BC_JLOOP ++ } else { ++ | ins_next ++ } ++ | ++ |3: // Clear missing parameters. ++ | addl BASE, NARGS8:RC, TMP0 ++ | stl TISNIL, 0(TMP0) ++ | addwi NARGS8:RC, 8, NARGS8:RC ++ | br zero, <2 ++ break; ++ ++ case BC_JFUNCV: ++#if !LJ_HASJIT ++ break; ++#endif ++ | NYI // NYI: compiled vararg functions ++ break; /* NYI: compiled vararg functions. */ ++ ++ case BC_IFUNCV: ++ | // BASE = new base, RA = BASE+framesize*8, RB = LFUNC, RC = nargs*8 ++ | ldi TMP0, LJ_TFUNC(zero) ++ | addwi TMP0, 0, TMP0 ++ | addl BASE, RC, TMP1 ++ | ldl TMP2, L->maxstack ++ | settp LFUNC:RB, TMP0 ++ | addl RA, RC, TMP0 ++ | stl LFUNC:RB, 0(TMP1) // Store (tagged) copy of LFUNC. ++ | ldi TMP3, 16+FRAME_VARG(RC) ++ | cmpult TMP0, TMP2, TMP0 ++ | ldl KBASE, -4+PC2PROTO(k)(PC) ++ | stl TMP3, 8(TMP1) // Store delta + FRAME_VARG. ++ | beq TMP0, ->vm_growstack_l ++ | ldbu TMP2, -4+PC2PROTO(numparams)(PC) ++ | bis BASE, zero, RA ++ | bis TMP1, zero, RC ++ | ins_next1 ++ | ldi BASE, 16(TMP1) ++ | beq TMP2, >3 ++ |1: ++ | ldl TMP0, 0(RA) ++ | cmpult RA, RC, AT // Less args than parameters? ++ | bis TMP0, zero, CARG1 ++ | selne AT, TMP0, TISNIL, TMP0 // Clear missing parameters. ++ | seleq AT, CARG1, TISNIL, CARG1 // Clear old fixarg slot (help the GC). ++ | subwi TMP2, 1, TMP2 ++ | stl TMP0, 16(TMP1) ++ | ldi TMP1, 8(TMP1) ++ | stl CARG1, 0(RA) ++ | ldi RA, 8(RA) ++ | bne TMP2, <1 ++ |3: ++ | ins_next2 ++ break; ++ ++ case BC_FUNCC: ++ case BC_FUNCCW: ++ | // BASE = new base, RA = BASE+framesize*8, RB = CFUNC, RC = nargs*8 ++ if (op == BC_FUNCC) { ++ | ldl CFUNCADDR, CFUNC:RB->f ++ } else { ++ | ldl CFUNCADDR, DISPATCH_GL(wrapf)(DISPATCH) ++ } ++ | addl RA, NARGS8:RC, TMP1 ++ | ldl TMP2, L->maxstack ++ | addl BASE, NARGS8:RC, RC ++ | stl BASE, L->base // base of currently excuting function ++ | stl RC, L->top ++ | cmpult TMP2, TMP1, AT ++ | li_vmstate C // ldi TMP0, ~LJ_VMST_C(zero) ++ if (op == BC_FUNCCW) { ++ | ldl CARG2, CFUNC:RB->f ++ } ++ | bis L, zero, CARG1 ++ | bne AT, ->vm_growstack_c // Need to grow stack. ++ | st_vmstate // .STXW TMP0, DISPATCH, DISPATCH_GL(vmstate) ++ | call r26, 0(CFUNCADDR) // (lua_State *L [, lua_CFunction f]) ++ | // Returns nresults. ++ | ldl BASE, L->base ++ | ldl TMP1, L->top ++ | stl L, DISPATCH_GL(cur_L)(DISPATCH) ++ | s8addwi CRET1, 0, RD ++ | li_vmstate INTERP ++ | ldl PC, FRAME_PC(BASE) // Fetch PC of caller. ++ | subl TMP1, RD, RA // RA = L->top - nresults*8 ++ | st_vmstate ++ | br zero, ->vm_returnc ++ break; ++ ++ /* ---------------------------------------------------------------------- */ ++ ++ default: ++ fprintf(stderr, "Error: undefined opcode BC_%s\n", bc_names[op]); ++ exit(2); ++ break; ++ } ++} ++ ++static int build_backend(BuildCtx *ctx) ++{ ++ int op; ++ ++ dasm_growpc(Dst, BC__MAX); ++ ++ build_subroutines(ctx); ++ ++ |.code_op ++ for (op = 0; op < BC__MAX; op++) ++ build_ins(ctx, (BCOp)op, op); ++ ++ return BC__MAX; ++} ++ ++/* Emit pseudo frame-info for all assembler functions. */ ++static void emit_asm_debug(BuildCtx *ctx) ++{ ++ int fcofs = (int)((uint8_t *)ctx->glob[GLOB_vm_ffi_call] - ctx->code); ++ int i; ++ switch (ctx->mode) { ++ case BUILD_elfasm: ++ fprintf(ctx->fp, "\t.section .debug_frame,\"\",@progbits\n"); ++ fprintf(ctx->fp, ++ ".Lframe0:\n" ++ "\t.4byte .LECIE0-.LSCIE0\n" ++ ".LSCIE0:\n" ++ "\t.4byte 0xffffffff\n" ++ "\t.byte 0x1\n" ++ "\t.string \"\"\n" ++ "\t.uleb128 0x1\n" ++ "\t.sleb128 -4\n" ++ "\t.byte 31\n" ++ "\t.byte 0xc\n\t.uleb128 29\n\t.uleb128 0\n" ++ "\t.align 2\n" ++ ".LECIE0:\n\n"); ++ fprintf(ctx->fp, ++ ".LSFDE0:\n" ++ "\t.4byte .LEFDE0-.LASFDE0\n" ++ ".LASFDE0:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte .Lbegin\n" ++ "\t.8byte %d\n" ++ "\t.byte 0xe\n\t.uleb128 %d\n" ++ "\t.byte 0x9f\n\t.sleb128 2*5\n" ++ "\t.byte 0x9e\n\t.sleb128 2*6\n", ++ fcofs, CFRAME_SIZE); ++ for (i = 23; i >= 16; i--) ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+i, 2*(30-i)); ++ for (i = 31; i >= 24; i--) ++ fprintf(ctx->fp, "\t.byte %d\n\t.uleb128 %d\n", 0x80+32+i, 2*(46-i)); ++ fprintf(ctx->fp, ++ "\t.align 2\n" ++ ".LEFDE0:\n\n"); ++#if LJ_HASFFI ++ fprintf(ctx->fp, ++ ".LSFDE1:\n" ++ "\t.4byte .LEFDE1-.LASFDE1\n" ++ ".LASFDE1:\n" ++ "\t.4byte .Lframe0\n" ++ "\t.8byte lj_vm_ffi_call\n" ++ "\t.4byte %d\n" ++ "\t.byte 0x9f\n\t.uleb128 2*1\n" ++ "\t.byte 0x90\n\t.uleb128 2*2\n" ++ "\t.byte 0xd\n\t.uleb128 0x10\n" ++ "\t.align 2\n" ++ ".LEFDE1:\n\n", (int)ctx->codesz - fcofs); ++#endif ++#if !LJ_NO_UNWIND ++ /* NYI */ ++#endif ++ break; ++ default: ++ break; ++ } ++} diff --git a/luajit.spec b/luajit.spec index 1bea606..7f952f3 100644 --- a/luajit.spec +++ b/luajit.spec @@ -2,7 +2,7 @@ Name: luajit Version: 2.1.0 -Release: 9 +Release: 10 Summary: Just-In-Time Compiler for Lua License: MIT URL: http://luajit.org/ @@ -17,7 +17,8 @@ Source3: apply-patches Patch0001: luajit-2.1-d06beb0-update.patch Patch0002: 0002-luajit-add-secure-compile-option-fstack.patch Patch0003: add-riscv-support.patch -ExclusiveArch: %{arm} %{ix86} x86_64 %{mips} aarch64 riscv64 loongarch64 +Patch0004: add-sw64-support.patch +ExclusiveArch: %{arm} %{ix86} x86_64 %{mips} aarch64 riscv64 loongarch64 sw_64 BuildRequires: gcc BuildRequires: make @@ -56,6 +57,11 @@ cp %{SOURCE3} . sh ./apply-patches %endif +# sw64 arch patch +%ifarch sw_64 +%patch -P0004 -p1 +%endif + sed -i -e '/install -m/s/-m/-p -m/' Makefile %build @@ -94,6 +100,10 @@ cp -a doc _tmp_html/html %{_mandir}/man1/%{name}.1* %changelog + +* Tue Mar 25 2025 swcompiler - 2.1.0-10 +- add sw64 support + * Tue Apr 23 2024 zhaoxiaolin - 2.1.0-9 - Add loongarch64 base support -- Gitee