root/lj_asm_mips.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. ra_hintalloc
  2. ra_alloc1z
  3. ra_alloc2
  4. asm_sparejump_setup
  5. asm_exitstub_setup
  6. asm_guard
  7. noconflict
  8. asm_fuseabase
  9. asm_fuseahuref
  10. asm_fusexref
  11. asm_gencall
  12. asm_setupresult
  13. asm_call
  14. asm_callx
  15. asm_callid
  16. asm_callround
  17. asm_retf
  18. asm_tointg
  19. asm_tobit
  20. asm_conv
  21. asm_conv64
  22. asm_strto
  23. asm_tvptr
  24. asm_tostr
  25. asm_aref
  26. asm_href
  27. asm_hrefk
  28. asm_newref
  29. asm_uref
  30. asm_fref
  31. asm_strref
  32. asm_fxloadins
  33. asm_fxstoreins
  34. asm_fload
  35. asm_fstore
  36. asm_xload
  37. asm_xstore
  38. asm_ahuvload
  39. asm_ahustore
  40. asm_sload
  41. asm_cnew
  42. asm_tbar
  43. asm_obar
  44. asm_fparith
  45. asm_fpunary
  46. asm_fpjoin_pow
  47. asm_add
  48. asm_sub
  49. asm_mul
  50. asm_neg
  51. asm_arithov
  52. asm_mulov
  53. asm_add64
  54. asm_sub64
  55. asm_neg64
  56. asm_bitnot
  57. asm_bitswap
  58. asm_bitop
  59. asm_bitshift
  60. asm_bitror
  61. asm_min_max
  62. asm_comp
  63. asm_compeq
  64. asm_comp64
  65. asm_comp64eq
  66. asm_hiop
  67. asm_stack_check
  68. asm_stack_restore
  69. asm_gc_check
  70. asm_loop_fixup
  71. asm_head_root_base
  72. asm_head_side_base
  73. asm_tail_fixup
  74. asm_tail_prep
  75. asm_ir
  76. asm_setup_call_slots
  77. asm_setup_target
  78. lj_asm_patchexit

   1 /*
   2 ** MIPS IR assembler (SSA IR -> machine code).
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 */
   5 
   6 /* -- Register allocator extensions --------------------------------------- */
   7 
   8 /* Allocate a register with a hint. */
   9 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
  10 {
  11   Reg r = IR(ref)->r;
  12   if (ra_noreg(r)) {
  13     if (!ra_hashint(r) && !iscrossref(as, ref))
  14       ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
  15     r = ra_allocref(as, ref, allow);
  16   }
  17   ra_noweak(as, r);
  18   return r;
  19 }
  20 
  21 /* Allocate a register or RID_ZERO. */
  22 static Reg ra_alloc1z(ASMState *as, IRRef ref, RegSet allow)
  23 {
  24   Reg r = IR(ref)->r;
  25   if (ra_noreg(r)) {
  26     if (!(allow & RSET_FPR) && irref_isk(ref) && IR(ref)->i == 0)
  27       return RID_ZERO;
  28     r = ra_allocref(as, ref, allow);
  29   } else {
  30     ra_noweak(as, r);
  31   }
  32   return r;
  33 }
  34 
  35 /* Allocate two source registers for three-operand instructions. */
  36 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
  37 {
  38   IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
  39   Reg left = irl->r, right = irr->r;
  40   if (ra_hasreg(left)) {
  41     ra_noweak(as, left);
  42     if (ra_noreg(right))
  43       right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
  44     else
  45       ra_noweak(as, right);
  46   } else if (ra_hasreg(right)) {
  47     ra_noweak(as, right);
  48     left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
  49   } else if (ra_hashint(right)) {
  50     right = ra_alloc1z(as, ir->op2, allow);
  51     left = ra_alloc1z(as, ir->op1, rset_exclude(allow, right));
  52   } else {
  53     left = ra_alloc1z(as, ir->op1, allow);
  54     right = ra_alloc1z(as, ir->op2, rset_exclude(allow, left));
  55   }
  56   return left | (right << 8);
  57 }
  58 
  59 /* -- Guard handling ------------------------------------------------------ */
  60 
  61 /* Need some spare long-range jump slots, for out-of-range branches. */
  62 #define MIPS_SPAREJUMP          4
  63 
  64 /* Setup spare long-range jump slots per mcarea. */
  65 static void asm_sparejump_setup(ASMState *as)
  66 {
  67   MCode *mxp = as->mcbot;
  68   if (((uintptr_t)mxp & (LJ_PAGESIZE-1)) == sizeof(MCLink)) {
  69     lua_assert(MIPSI_NOP == 0);
  70     memset(mxp, 0, MIPS_SPAREJUMP*2*sizeof(MCode));
  71     mxp += MIPS_SPAREJUMP*2;
  72     lua_assert(mxp < as->mctop);
  73     lj_mcode_sync(as->mcbot, mxp);
  74     lj_mcode_commitbot(as->J, mxp);
  75     as->mcbot = mxp;
  76     as->mclim = as->mcbot + MCLIM_REDZONE;
  77   }
  78 }
  79 
  80 /* Setup exit stub after the end of each trace. */
  81 static void asm_exitstub_setup(ASMState *as)
  82 {
  83   MCode *mxp = as->mctop;
  84   /* sw TMP, 0(sp); j ->vm_exit_handler; li TMP, traceno */
  85   *--mxp = MIPSI_LI|MIPSF_T(RID_TMP)|as->T->traceno;
  86   *--mxp = MIPSI_J|((((uintptr_t)(void *)lj_vm_exit_handler)>>2)&0x03ffffffu);
  87   lua_assert(((uintptr_t)mxp ^ (uintptr_t)(void *)lj_vm_exit_handler)>>28 == 0);
  88   *--mxp = MIPSI_SW|MIPSF_T(RID_TMP)|MIPSF_S(RID_SP)|0;
  89   as->mctop = mxp;
  90 }
  91 
  92 /* Keep this in-sync with exitstub_trace_addr(). */
  93 #define asm_exitstub_addr(as)   ((as)->mctop)
  94 
  95 /* Emit conditional branch to exit for guard. */
  96 static void asm_guard(ASMState *as, MIPSIns mi, Reg rs, Reg rt)
  97 {
  98   MCode *target = asm_exitstub_addr(as);
  99   MCode *p = as->mcp;
 100   if (LJ_UNLIKELY(p == as->invmcp)) {
 101     as->invmcp = NULL;
 102     as->loopinv = 1;
 103     as->mcp = p+1;
 104     mi = mi ^ ((mi>>28) == 1 ? 0x04000000u : 0x00010000u);  /* Invert cond. */
 105     target = p;  /* Patch target later in asm_loop_fixup. */
 106   }
 107   emit_ti(as, MIPSI_LI, RID_TMP, as->snapno);
 108   emit_branch(as, mi, rs, rt, target);
 109 }
 110 
 111 /* -- Operand fusion ------------------------------------------------------ */
 112 
 113 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
 114 #define CONFLICT_SEARCH_LIM     31
 115 
 116 /* Check if there's no conflicting instruction between curins and ref. */
 117 static int noconflict(ASMState *as, IRRef ref, IROp conflict)
 118 {
 119   IRIns *ir = as->ir;
 120   IRRef i = as->curins;
 121   if (i > ref + CONFLICT_SEARCH_LIM)
 122     return 0;  /* Give up, ref is too far away. */
 123   while (--i > ref)
 124     if (ir[i].o == conflict)
 125       return 0;  /* Conflict found. */
 126   return 1;  /* Ok, no conflict. */
 127 }
 128 
 129 /* Fuse the array base of colocated arrays. */
 130 static int32_t asm_fuseabase(ASMState *as, IRRef ref)
 131 {
 132   IRIns *ir = IR(ref);
 133   if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
 134       !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
 135     return (int32_t)sizeof(GCtab);
 136   return 0;
 137 }
 138 
 139 /* Fuse array/hash/upvalue reference into register+offset operand. */
 140 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow)
 141 {
 142   IRIns *ir = IR(ref);
 143   if (ra_noreg(ir->r)) {
 144     if (ir->o == IR_AREF) {
 145       if (mayfuse(as, ref)) {
 146         if (irref_isk(ir->op2)) {
 147           IRRef tab = IR(ir->op1)->op1;
 148           int32_t ofs = asm_fuseabase(as, tab);
 149           IRRef refa = ofs ? tab : ir->op1;
 150           ofs += 8*IR(ir->op2)->i;
 151           if (checki16(ofs)) {
 152             *ofsp = ofs;
 153             return ra_alloc1(as, refa, allow);
 154           }
 155         }
 156       }
 157     } else if (ir->o == IR_HREFK) {
 158       if (mayfuse(as, ref)) {
 159         int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
 160         if (checki16(ofs)) {
 161           *ofsp = ofs;
 162           return ra_alloc1(as, ir->op1, allow);
 163         }
 164       }
 165     } else if (ir->o == IR_UREFC) {
 166       if (irref_isk(ir->op1)) {
 167         GCfunc *fn = ir_kfunc(IR(ir->op1));
 168         int32_t ofs = i32ptr(&gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.tv);
 169         int32_t jgl = (intptr_t)J2G(as->J);
 170         if ((uint32_t)(ofs-jgl) < 65536) {
 171           *ofsp = ofs-jgl-32768;
 172           return RID_JGL;
 173         } else {
 174           *ofsp = (int16_t)ofs;
 175           return ra_allock(as, ofs-(int16_t)ofs, allow);
 176         }
 177       }
 178     }
 179   }
 180   *ofsp = 0;
 181   return ra_alloc1(as, ref, allow);
 182 }
 183 
 184 /* Fuse XLOAD/XSTORE reference into load/store operand. */
 185 static void asm_fusexref(ASMState *as, MIPSIns mi, Reg rt, IRRef ref,
 186                          RegSet allow, int32_t ofs)
 187 {
 188   IRIns *ir = IR(ref);
 189   Reg base;
 190   if (ra_noreg(ir->r) && canfuse(as, ir)) {
 191     if (ir->o == IR_ADD) {
 192       int32_t ofs2;
 193       if (irref_isk(ir->op2) && (ofs2 = ofs + IR(ir->op2)->i, checki16(ofs2))) {
 194         ref = ir->op1;
 195         ofs = ofs2;
 196       }
 197     } else if (ir->o == IR_STRREF) {
 198       int32_t ofs2 = 65536;
 199       lua_assert(ofs == 0);
 200       ofs = (int32_t)sizeof(GCstr);
 201       if (irref_isk(ir->op2)) {
 202         ofs2 = ofs + IR(ir->op2)->i;
 203         ref = ir->op1;
 204       } else if (irref_isk(ir->op1)) {
 205         ofs2 = ofs + IR(ir->op1)->i;
 206         ref = ir->op2;
 207       }
 208       if (!checki16(ofs2)) {
 209         /* NYI: Fuse ADD with constant. */
 210         Reg right, left = ra_alloc2(as, ir, allow);
 211         right = (left >> 8); left &= 255;
 212         emit_hsi(as, mi, rt, RID_TMP, ofs);
 213         emit_dst(as, MIPSI_ADDU, RID_TMP, left, right);
 214         return;
 215       }
 216       ofs = ofs2;
 217     }
 218   }
 219   base = ra_alloc1(as, ref, allow);
 220   emit_hsi(as, mi, rt, base, ofs);
 221 }
 222 
 223 /* -- Calls --------------------------------------------------------------- */
 224 
 225 /* Generate a call to a C function. */
 226 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 227 {
 228   uint32_t n, nargs = CCI_NARGS(ci);
 229   int32_t ofs = 16;
 230   Reg gpr, fpr = REGARG_FIRSTFPR;
 231   if ((void *)ci->func)
 232     emit_call(as, (void *)ci->func);
 233   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
 234     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
 235   gpr = REGARG_FIRSTGPR;
 236   for (n = 0; n < nargs; n++) {  /* Setup args. */
 237     IRRef ref = args[n];
 238     if (ref) {
 239       IRIns *ir = IR(ref);
 240       if (irt_isfp(ir->t) && fpr <= REGARG_LASTFPR &&
 241           !(ci->flags & CCI_VARARG)) {
 242         lua_assert(rset_test(as->freeset, fpr));  /* Already evicted. */
 243         ra_leftov(as, fpr, ref);
 244         fpr += 2;
 245         gpr += irt_isnum(ir->t) ? 2 : 1;
 246       } else {
 247         fpr = REGARG_LASTFPR+1;
 248         if (irt_isnum(ir->t)) gpr = (gpr+1) & ~1;
 249         if (gpr <= REGARG_LASTGPR) {
 250           lua_assert(rset_test(as->freeset, gpr));  /* Already evicted. */
 251           if (irt_isfp(ir->t)) {
 252             RegSet of = as->freeset;
 253             Reg r;
 254             /* Workaround to protect argument GPRs from being used for remat. */
 255             as->freeset &= ~RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1);
 256             r = ra_alloc1(as, ref, RSET_FPR);
 257             as->freeset |= (of & RSET_RANGE(REGARG_FIRSTGPR, REGARG_LASTGPR+1));
 258             if (irt_isnum(ir->t)) {
 259               emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?0:1), r+1);
 260               emit_tg(as, MIPSI_MFC1, gpr+(LJ_BE?1:0), r);
 261               lua_assert(rset_test(as->freeset, gpr+1));  /* Already evicted. */
 262               gpr += 2;
 263             } else if (irt_isfloat(ir->t)) {
 264               emit_tg(as, MIPSI_MFC1, gpr, r);
 265               gpr++;
 266             }
 267           } else {
 268             ra_leftov(as, gpr, ref);
 269             gpr++;
 270           }
 271         } else {
 272           Reg r = ra_alloc1z(as, ref, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
 273           if (irt_isnum(ir->t)) ofs = (ofs + 4) & ~4;
 274           emit_spstore(as, ir, r, ofs);
 275           ofs += irt_isnum(ir->t) ? 8 : 4;
 276         }
 277       }
 278     } else {
 279       fpr = REGARG_LASTFPR+1;
 280       if (gpr <= REGARG_LASTGPR)
 281         gpr++;
 282       else
 283         ofs += 4;
 284     }
 285     checkmclim(as);
 286   }
 287 }
 288 
 289 /* Setup result reg/sp for call. Evict scratch regs. */
 290 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 291 {
 292   RegSet drop = RSET_SCRATCH;
 293   int hiop = ((ir+1)->o == IR_HIOP && !irt_isnil((ir+1)->t));
 294   if ((ci->flags & CCI_NOFPRCLOBBER))
 295     drop &= ~RSET_FPR;
 296   if (ra_hasreg(ir->r))
 297     rset_clear(drop, ir->r);  /* Dest reg handled below. */
 298   if (hiop && ra_hasreg((ir+1)->r))
 299     rset_clear(drop, (ir+1)->r);  /* Dest reg handled below. */
 300   ra_evictset(as, drop);  /* Evictions must be performed first. */
 301   if (ra_used(ir)) {
 302     lua_assert(!irt_ispri(ir->t));
 303     if (irt_isfp(ir->t)) {
 304       if ((ci->flags & CCI_CASTU64)) {
 305         int32_t ofs = sps_scale(ir->s);
 306         Reg dest = ir->r;
 307         if (ra_hasreg(dest)) {
 308           ra_free(as, dest);
 309           ra_modified(as, dest);
 310           emit_tg(as, MIPSI_MTC1, RID_RETHI, dest+1);
 311           emit_tg(as, MIPSI_MTC1, RID_RETLO, dest);
 312         }
 313         if (ofs) {
 314           emit_tsi(as, MIPSI_SW, RID_RETLO, RID_SP, ofs+(LJ_BE?4:0));
 315           emit_tsi(as, MIPSI_SW, RID_RETHI, RID_SP, ofs+(LJ_BE?0:4));
 316         }
 317       } else {
 318         ra_destreg(as, ir, RID_FPRET);
 319       }
 320     } else if (hiop) {
 321       ra_destpair(as, ir);
 322     } else {
 323       ra_destreg(as, ir, RID_RET);
 324     }
 325   }
 326 }
 327 
 328 static void asm_call(ASMState *as, IRIns *ir)
 329 {
 330   IRRef args[CCI_NARGS_MAX];
 331   const CCallInfo *ci = &lj_ir_callinfo[ir->op2];
 332   asm_collectargs(as, ir, ci, args);
 333   asm_setupresult(as, ir, ci);
 334   asm_gencall(as, ci, args);
 335 }
 336 
 337 static void asm_callx(ASMState *as, IRIns *ir)
 338 {
 339   IRRef args[CCI_NARGS_MAX*2];
 340   CCallInfo ci;
 341   IRRef func;
 342   IRIns *irf;
 343   ci.flags = asm_callx_flags(as, ir);
 344   asm_collectargs(as, ir, &ci, args);
 345   asm_setupresult(as, ir, &ci);
 346   func = ir->op2; irf = IR(func);
 347   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
 348   if (irref_isk(func)) {  /* Call to constant address. */
 349     ci.func = (ASMFunction)(void *)(irf->i);
 350   } else {  /* Need specific register for indirect calls. */
 351     Reg r = ra_alloc1(as, func, RID2RSET(RID_CFUNCADDR));
 352     MCode *p = as->mcp;
 353     if (r == RID_CFUNCADDR)
 354       *--p = MIPSI_NOP;
 355     else
 356       *--p = MIPSI_MOVE | MIPSF_D(RID_CFUNCADDR) | MIPSF_S(r);
 357     *--p = MIPSI_JALR | MIPSF_S(r);
 358     as->mcp = p;
 359     ci.func = (ASMFunction)(void *)0;
 360   }
 361   asm_gencall(as, &ci, args);
 362 }
 363 
 364 static void asm_callid(ASMState *as, IRIns *ir, IRCallID id)
 365 {
 366   const CCallInfo *ci = &lj_ir_callinfo[id];
 367   IRRef args[2];
 368   args[0] = ir->op1;
 369   args[1] = ir->op2;
 370   asm_setupresult(as, ir, ci);
 371   asm_gencall(as, ci, args);
 372 }
 373 
 374 static void asm_callround(ASMState *as, IRIns *ir, IRCallID id)
 375 {
 376   /* The modified regs must match with the *.dasc implementation. */
 377   RegSet drop = RID2RSET(RID_R1)|RID2RSET(RID_R12)|RID2RSET(RID_FPRET)|
 378                 RID2RSET(RID_F2)|RID2RSET(RID_F4)|RID2RSET(REGARG_FIRSTFPR);
 379   if (ra_hasreg(ir->r)) rset_clear(drop, ir->r);
 380   ra_evictset(as, drop);
 381   ra_destreg(as, ir, RID_FPRET);
 382   emit_call(as, (void *)lj_ir_callinfo[id].func);
 383   ra_leftov(as, REGARG_FIRSTFPR, ir->op1);
 384 }
 385 
 386 /* -- Returns ------------------------------------------------------------- */
 387 
 388 /* Return to lower frame. Guard that it goes to the right spot. */
 389 static void asm_retf(ASMState *as, IRIns *ir)
 390 {
 391   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
 392   void *pc = ir_kptr(IR(ir->op2));
 393   int32_t delta = 1+bc_a(*((const BCIns *)pc - 1));
 394   as->topslot -= (BCReg)delta;
 395   if ((int32_t)as->topslot < 0) as->topslot = 0;
 396   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
 397   emit_setgl(as, base, jit_base);
 398   emit_addptr(as, base, -8*delta);
 399   asm_guard(as, MIPSI_BNE, RID_TMP,
 400             ra_allock(as, i32ptr(pc), rset_exclude(RSET_GPR, base)));
 401   emit_tsi(as, MIPSI_LW, RID_TMP, base, -8);
 402 }
 403 
 404 /* -- Type conversions ---------------------------------------------------- */
 405 
 406 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
 407 {
 408   Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
 409   Reg dest = ra_dest(as, ir, RSET_GPR);
 410   asm_guard(as, MIPSI_BC1F, 0, 0);
 411   emit_fgh(as, MIPSI_C_EQ_D, 0, tmp, left);
 412   emit_fg(as, MIPSI_CVT_D_W, tmp, tmp);
 413   emit_tg(as, MIPSI_MFC1, dest, tmp);
 414   emit_fg(as, MIPSI_CVT_W_D, tmp, left);
 415 }
 416 
 417 static void asm_tobit(ASMState *as, IRIns *ir)
 418 {
 419   RegSet allow = RSET_FPR;
 420   Reg dest = ra_dest(as, ir, RSET_GPR);
 421   Reg left = ra_alloc1(as, ir->op1, allow);
 422   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
 423   Reg tmp = ra_scratch(as, rset_clear(allow, right));
 424   emit_tg(as, MIPSI_MFC1, dest, tmp);
 425   emit_fgh(as, MIPSI_ADD_D, tmp, left, right);
 426 }
 427 
 428 static void asm_conv(ASMState *as, IRIns *ir)
 429 {
 430   IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
 431   int stfp = (st == IRT_NUM || st == IRT_FLOAT);
 432   IRRef lref = ir->op1;
 433   lua_assert(irt_type(ir->t) != st);
 434   lua_assert(!(irt_isint64(ir->t) ||
 435                (st == IRT_I64 || st == IRT_U64))); /* Handled by SPLIT. */
 436   if (irt_isfp(ir->t)) {
 437     Reg dest = ra_dest(as, ir, RSET_FPR);
 438     if (stfp) {  /* FP to FP conversion. */
 439       emit_fg(as, st == IRT_NUM ? MIPSI_CVT_S_D : MIPSI_CVT_D_S,
 440               dest, ra_alloc1(as, lref, RSET_FPR));
 441     } else if (st == IRT_U32) {  /* U32 to FP conversion. */
 442       /* y = (x ^ 0x8000000) + 2147483648.0 */
 443       Reg left = ra_alloc1(as, lref, RSET_GPR);
 444       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, dest));
 445       if (irt_isfloat(ir->t))
 446         emit_fg(as, MIPSI_CVT_S_D, dest, dest);
 447       /* Must perform arithmetic with doubles to keep the precision. */
 448       emit_fgh(as, MIPSI_ADD_D, dest, dest, tmp);
 449       emit_fg(as, MIPSI_CVT_D_W, dest, dest);
 450       emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
 451                  (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
 452                  RSET_GPR);
 453       emit_tg(as, MIPSI_MTC1, RID_TMP, dest);
 454       emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, left);
 455       emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
 456     } else {  /* Integer to FP conversion. */
 457       Reg left = ra_alloc1(as, lref, RSET_GPR);
 458       emit_fg(as, irt_isfloat(ir->t) ? MIPSI_CVT_S_W : MIPSI_CVT_D_W,
 459               dest, dest);
 460       emit_tg(as, MIPSI_MTC1, left, dest);
 461     }
 462   } else if (stfp) {  /* FP to integer conversion. */
 463     if (irt_isguard(ir->t)) {
 464       /* Checked conversions are only supported from number to int. */
 465       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
 466       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
 467     } else {
 468       Reg dest = ra_dest(as, ir, RSET_GPR);
 469       Reg left = ra_alloc1(as, lref, RSET_FPR);
 470       Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
 471       if (irt_isu32(ir->t)) {
 472         /* y = (int)floor(x - 2147483648.0) ^ 0x80000000 */
 473         emit_dst(as, MIPSI_XOR, dest, dest, RID_TMP);
 474         emit_ti(as, MIPSI_LUI, RID_TMP, 0x8000);
 475         emit_tg(as, MIPSI_MFC1, dest, tmp);
 476         emit_fg(as, st == IRT_FLOAT ? MIPSI_FLOOR_W_S : MIPSI_FLOOR_W_D,
 477                 tmp, tmp);
 478         emit_fgh(as, st == IRT_FLOAT ? MIPSI_SUB_S : MIPSI_SUB_D,
 479                  tmp, left, tmp);
 480         if (st == IRT_FLOAT)
 481           emit_lsptr(as, MIPSI_LWC1, (tmp & 31),
 482                      (void *)lj_ir_k64_find(as->J, U64x(4f000000,4f000000)),
 483                      RSET_GPR);
 484         else
 485           emit_lsptr(as, MIPSI_LDC1, (tmp & 31),
 486                      (void *)lj_ir_k64_find(as->J, U64x(41e00000,00000000)),
 487                      RSET_GPR);
 488       } else {
 489         emit_tg(as, MIPSI_MFC1, dest, tmp);
 490         emit_fg(as, st == IRT_FLOAT ? MIPSI_TRUNC_W_S : MIPSI_TRUNC_W_D,
 491                 tmp, left);
 492       }
 493     }
 494   } else {
 495     Reg dest = ra_dest(as, ir, RSET_GPR);
 496     if (st >= IRT_I8 && st <= IRT_U16) {  /* Extend to 32 bit integer. */
 497       Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
 498       lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
 499       if ((ir->op2 & IRCONV_SEXT)) {
 500         if ((as->flags & JIT_F_MIPS32R2)) {
 501           emit_dst(as, st == IRT_I8 ? MIPSI_SEB : MIPSI_SEH, dest, 0, left);
 502         } else {
 503           uint32_t shift = st == IRT_I8 ? 24 : 16;
 504           emit_dta(as, MIPSI_SRA, dest, dest, shift);
 505           emit_dta(as, MIPSI_SLL, dest, left, shift);
 506         }
 507       } else {
 508         emit_tsi(as, MIPSI_ANDI, dest, left,
 509                  (int32_t)(st == IRT_U8 ? 0xff : 0xffff));
 510       }
 511     } else {  /* 32/64 bit integer conversions. */
 512       /* Only need to handle 32/32 bit no-op (cast) on 32 bit archs. */
 513       ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
 514     }
 515   }
 516 }
 517 
 518 #if LJ_HASFFI
 519 static void asm_conv64(ASMState *as, IRIns *ir)
 520 {
 521   IRType st = (IRType)((ir-1)->op2 & IRCONV_SRCMASK);
 522   IRType dt = (((ir-1)->op2 & IRCONV_DSTMASK) >> IRCONV_DSH);
 523   IRCallID id;
 524   const CCallInfo *ci;
 525   IRRef args[2];
 526   args[LJ_BE?0:1] = ir->op1;
 527   args[LJ_BE?1:0] = (ir-1)->op1;
 528   if (st == IRT_NUM || st == IRT_FLOAT) {
 529     id = IRCALL_fp64_d2l + ((st == IRT_FLOAT) ? 2 : 0) + (dt - IRT_I64);
 530     ir--;
 531   } else {
 532     id = IRCALL_fp64_l2d + ((dt == IRT_FLOAT) ? 2 : 0) + (st - IRT_I64);
 533   }
 534   ci = &lj_ir_callinfo[id];
 535   asm_setupresult(as, ir, ci);
 536   asm_gencall(as, ci, args);
 537 }
 538 #endif
 539 
 540 static void asm_strto(ASMState *as, IRIns *ir)
 541 {
 542   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
 543   IRRef args[2];
 544   RegSet drop = RSET_SCRATCH;
 545   if (ra_hasreg(ir->r)) rset_set(drop, ir->r);  /* Spill dest reg (if any). */
 546   ra_evictset(as, drop);
 547   asm_guard(as, MIPSI_BEQ, RID_RET, RID_ZERO);  /* Test return status. */
 548   args[0] = ir->op1;      /* GCstr *str */
 549   args[1] = ASMREF_TMP1;  /* TValue *n  */
 550   asm_gencall(as, ci, args);
 551   /* Store the result to the spill slot or temp slots. */
 552   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1),
 553            RID_SP, sps_scale(ir->s));
 554 }
 555 
 556 /* Get pointer to TValue. */
 557 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 558 {
 559   IRIns *ir = IR(ref);
 560   if (irt_isnum(ir->t)) {
 561     if (irref_isk(ref))  /* Use the number constant itself as a TValue. */
 562       ra_allockreg(as, i32ptr(ir_knum(ir)), dest);
 563     else  /* Otherwise force a spill and use the spill slot. */
 564       emit_tsi(as, MIPSI_ADDIU, dest, RID_SP, ra_spill(as, ir));
 565   } else {
 566     /* Otherwise use g->tmptv to hold the TValue. */
 567     RegSet allow = rset_exclude(RSET_GPR, dest);
 568     Reg type;
 569     emit_tsi(as, MIPSI_ADDIU, dest, RID_JGL, offsetof(global_State, tmptv)-32768);
 570     if (!irt_ispri(ir->t)) {
 571       Reg src = ra_alloc1(as, ref, allow);
 572       emit_setgl(as, src, tmptv.gcr);
 573     }
 574     type = ra_allock(as, irt_toitype(ir->t), allow);
 575     emit_setgl(as, type, tmptv.it);
 576   }
 577 }
 578 
 579 static void asm_tostr(ASMState *as, IRIns *ir)
 580 {
 581   IRRef args[2];
 582   args[0] = ASMREF_L;
 583   as->gcsteps++;
 584   if (irt_isnum(IR(ir->op1)->t) || (ir+1)->o == IR_HIOP) {
 585     const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromnum];
 586     args[1] = ASMREF_TMP1;  /* const lua_Number * */
 587     asm_setupresult(as, ir, ci);  /* GCstr * */
 588     asm_gencall(as, ci, args);
 589     asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op1);
 590   } else {
 591     const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_str_fromint];
 592     args[1] = ir->op1;  /* int32_t k */
 593     asm_setupresult(as, ir, ci);  /* GCstr * */
 594     asm_gencall(as, ci, args);
 595   }
 596 }
 597 
 598 /* -- Memory references --------------------------------------------------- */
 599 
 600 static void asm_aref(ASMState *as, IRIns *ir)
 601 {
 602   Reg dest = ra_dest(as, ir, RSET_GPR);
 603   Reg idx, base;
 604   if (irref_isk(ir->op2)) {
 605     IRRef tab = IR(ir->op1)->op1;
 606     int32_t ofs = asm_fuseabase(as, tab);
 607     IRRef refa = ofs ? tab : ir->op1;
 608     ofs += 8*IR(ir->op2)->i;
 609     if (checki16(ofs)) {
 610       base = ra_alloc1(as, refa, RSET_GPR);
 611       emit_tsi(as, MIPSI_ADDIU, dest, base, ofs);
 612       return;
 613     }
 614   }
 615   base = ra_alloc1(as, ir->op1, RSET_GPR);
 616   idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
 617   emit_dst(as, MIPSI_ADDU, dest, RID_TMP, base);
 618   emit_dta(as, MIPSI_SLL, RID_TMP, idx, 3);
 619 }
 620 
 621 /* Inlined hash lookup. Specialized for key type and for const keys.
 622 ** The equivalent C code is:
 623 **   Node *n = hashkey(t, key);
 624 **   do {
 625 **     if (lj_obj_equal(&n->key, key)) return &n->val;
 626 **   } while ((n = nextnode(n)));
 627 **   return niltv(L);
 628 */
 629 static void asm_href(ASMState *as, IRIns *ir)
 630 {
 631   RegSet allow = RSET_GPR;
 632   int destused = ra_used(ir);
 633   Reg dest = ra_dest(as, ir, allow);
 634   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
 635   Reg key = RID_NONE, type = RID_NONE, tmpnum = RID_NONE, tmp1 = RID_TMP, tmp2;
 636   IRRef refkey = ir->op2;
 637   IRIns *irkey = IR(refkey);
 638   IRType1 kt = irkey->t;
 639   uint32_t khash;
 640   MCLabel l_end, l_loop, l_next;
 641 
 642   rset_clear(allow, tab);
 643   if (irt_isnum(kt)) {
 644     key = ra_alloc1(as, refkey, RSET_FPR);
 645     tmpnum = ra_scratch(as, rset_exclude(RSET_FPR, key));
 646   } else if (!irt_ispri(kt)) {
 647     key = ra_alloc1(as, refkey, allow);
 648     rset_clear(allow, key);
 649     type = ra_allock(as, irt_toitype(irkey->t), allow);
 650     rset_clear(allow, type);
 651   }
 652   tmp2 = ra_scratch(as, allow);
 653   rset_clear(allow, tmp2);
 654 
 655   /* Key not found in chain: load niltv. */
 656   l_end = emit_label(as);
 657   if (destused)
 658     emit_loada(as, dest, niltvg(J2G(as->J)));
 659   else
 660     *--as->mcp = MIPSI_NOP;
 661   /* Follow hash chain until the end. */
 662   emit_move(as, dest, tmp1);
 663   l_loop = --as->mcp;
 664   emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, next));
 665   l_next = emit_label(as);
 666 
 667   /* Type and value comparison. */
 668   if (irt_isnum(kt)) {
 669     emit_branch(as, MIPSI_BC1T, 0, 0, l_end);
 670     emit_fgh(as, MIPSI_C_EQ_D, 0, tmpnum, key);
 671         emit_tg(as, MIPSI_MFC1, tmp1, key+1);
 672     emit_branch(as, MIPSI_BEQ, tmp1, RID_ZERO, l_next);
 673     emit_tsi(as, MIPSI_SLTIU, tmp1, tmp1, (int32_t)LJ_TISNUM);
 674     emit_hsi(as, MIPSI_LDC1, tmpnum, dest, (int32_t)offsetof(Node, key.n));
 675   } else {
 676     if (irt_ispri(kt)) {
 677       emit_branch(as, MIPSI_BEQ, tmp1, type, l_end);
 678     } else {
 679       emit_branch(as, MIPSI_BEQ, tmp2, key, l_end);
 680       emit_tsi(as, MIPSI_LW, tmp2, dest, (int32_t)offsetof(Node, key.gcr));
 681       emit_branch(as, MIPSI_BNE, tmp1, type, l_next);
 682     }
 683   }
 684   emit_tsi(as, MIPSI_LW, tmp1, dest, (int32_t)offsetof(Node, key.it));
 685   *l_loop = MIPSI_BNE | MIPSF_S(tmp1) | ((as->mcp-l_loop-1) & 0xffffu);
 686 
 687   /* Load main position relative to tab->node into dest. */
 688   khash = irref_isk(refkey) ? ir_khash(irkey) : 1;
 689   if (khash == 0) {
 690     emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
 691   } else {
 692     Reg tmphash = tmp1;
 693     if (irref_isk(refkey))
 694       tmphash = ra_allock(as, khash, allow);
 695     emit_dst(as, MIPSI_ADDU, dest, dest, tmp1);
 696     lua_assert(sizeof(Node) == 24);
 697     emit_dst(as, MIPSI_SUBU, tmp1, tmp2, tmp1);
 698     emit_dta(as, MIPSI_SLL, tmp1, tmp1, 3);
 699     emit_dta(as, MIPSI_SLL, tmp2, tmp1, 5);
 700     emit_dst(as, MIPSI_AND, tmp1, tmp2, tmphash);
 701     emit_tsi(as, MIPSI_LW, dest, tab, (int32_t)offsetof(GCtab, node));
 702     emit_tsi(as, MIPSI_LW, tmp2, tab, (int32_t)offsetof(GCtab, hmask));
 703     if (irref_isk(refkey)) {
 704       /* Nothing to do. */
 705     } else if (irt_isstr(kt)) {
 706       emit_tsi(as, MIPSI_LW, tmp1, key, (int32_t)offsetof(GCstr, hash));
 707     } else {  /* Must match with hash*() in lj_tab.c. */
 708       emit_dst(as, MIPSI_SUBU, tmp1, tmp1, tmp2);
 709       emit_rotr(as, tmp2, tmp2, dest, (-HASH_ROT3)&31);
 710       emit_dst(as, MIPSI_XOR, tmp1, tmp1, tmp2);
 711       emit_rotr(as, tmp1, tmp1, dest, (-HASH_ROT2-HASH_ROT1)&31);
 712       emit_dst(as, MIPSI_SUBU, tmp2, tmp2, dest);
 713       if (irt_isnum(kt)) {
 714         emit_dst(as, MIPSI_XOR, tmp2, tmp2, tmp1);
 715         if ((as->flags & JIT_F_MIPS32R2)) {
 716           emit_dta(as, MIPSI_ROTR, dest, tmp1, (-HASH_ROT1)&31);
 717         } else {
 718           emit_dst(as, MIPSI_OR, dest, dest, tmp1);
 719           emit_dta(as, MIPSI_SLL, tmp1, tmp1, HASH_ROT1);
 720           emit_dta(as, MIPSI_SRL, dest, tmp1, (-HASH_ROT1)&31);
 721         }
 722         emit_dst(as, MIPSI_ADDU, tmp1, tmp1, tmp1);
 723         emit_tg(as, MIPSI_MFC1, tmp2, key);
 724         emit_tg(as, MIPSI_MFC1, tmp1, key+1);
 725       } else {
 726         emit_dst(as, MIPSI_XOR, tmp2, key, tmp1);
 727         emit_rotr(as, dest, tmp1, tmp2, (-HASH_ROT1)&31);
 728         emit_dst(as, MIPSI_ADDU, tmp1, key, ra_allock(as, HASH_BIAS, allow));
 729       }
 730     }
 731   }
 732 }
 733 
 734 static void asm_hrefk(ASMState *as, IRIns *ir)
 735 {
 736   IRIns *kslot = IR(ir->op2);
 737   IRIns *irkey = IR(kslot->op1);
 738   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
 739   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
 740   Reg dest = (ra_used(ir)||ofs > 32736) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
 741   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
 742   Reg key = RID_NONE, type = RID_TMP, idx = node;
 743   RegSet allow = rset_exclude(RSET_GPR, node);
 744   int32_t lo, hi;
 745   lua_assert(ofs % sizeof(Node) == 0);
 746   if (ofs > 32736) {
 747     idx = dest;
 748     rset_clear(allow, dest);
 749     kofs = (int32_t)offsetof(Node, key);
 750   } else if (ra_hasreg(dest)) {
 751     emit_tsi(as, MIPSI_ADDIU, dest, node, ofs);
 752   }
 753   if (!irt_ispri(irkey->t)) {
 754     key = ra_scratch(as, allow);
 755     rset_clear(allow, key);
 756   }
 757   if (irt_isnum(irkey->t)) {
 758     lo = (int32_t)ir_knum(irkey)->u32.lo;
 759     hi = (int32_t)ir_knum(irkey)->u32.hi;
 760   } else {
 761     lo = irkey->i;
 762     hi = irt_toitype(irkey->t);
 763     if (!ra_hasreg(key))
 764       goto nolo;
 765   }
 766   asm_guard(as, MIPSI_BNE, key, lo ? ra_allock(as, lo, allow) : RID_ZERO);
 767 nolo:
 768   asm_guard(as, MIPSI_BNE, type, hi ? ra_allock(as, hi, allow) : RID_ZERO);
 769   if (ra_hasreg(key)) emit_tsi(as, MIPSI_LW, key, idx, kofs+(LJ_BE?4:0));
 770   emit_tsi(as, MIPSI_LW, type, idx, kofs+(LJ_BE?0:4));
 771   if (ofs > 32736)
 772     emit_tsi(as, MIPSI_ADDU, dest, node, ra_allock(as, ofs, allow));
 773 }
 774 
 775 static void asm_newref(ASMState *as, IRIns *ir)
 776 {
 777   if (ir->r != RID_SINK) {
 778     const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_tab_newkey];
 779     IRRef args[3];
 780     args[0] = ASMREF_L;     /* lua_State *L */
 781     args[1] = ir->op1;      /* GCtab *t     */
 782     args[2] = ASMREF_TMP1;  /* cTValue *key */
 783     asm_setupresult(as, ir, ci);  /* TValue * */
 784     asm_gencall(as, ci, args);
 785     asm_tvptr(as, ra_releasetmp(as, ASMREF_TMP1), ir->op2);
 786   }
 787 }
 788 
 789 static void asm_uref(ASMState *as, IRIns *ir)
 790 {
 791   Reg dest = ra_dest(as, ir, RSET_GPR);
 792   if (irref_isk(ir->op1)) {
 793     GCfunc *fn = ir_kfunc(IR(ir->op1));
 794     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
 795     emit_lsptr(as, MIPSI_LW, dest, v, RSET_GPR);
 796   } else {
 797     Reg uv = ra_scratch(as, RSET_GPR);
 798     Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
 799     if (ir->o == IR_UREFC) {
 800       asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
 801       emit_tsi(as, MIPSI_ADDIU, dest, uv, (int32_t)offsetof(GCupval, tv));
 802       emit_tsi(as, MIPSI_LBU, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
 803     } else {
 804       emit_tsi(as, MIPSI_LW, dest, uv, (int32_t)offsetof(GCupval, v));
 805     }
 806     emit_tsi(as, MIPSI_LW, uv, func,
 807              (int32_t)offsetof(GCfuncL, uvptr) + 4*(int32_t)(ir->op2 >> 8));
 808   }
 809 }
 810 
 811 static void asm_fref(ASMState *as, IRIns *ir)
 812 {
 813   UNUSED(as); UNUSED(ir);
 814   lua_assert(!ra_used(ir));
 815 }
 816 
 817 static void asm_strref(ASMState *as, IRIns *ir)
 818 {
 819   Reg dest = ra_dest(as, ir, RSET_GPR);
 820   IRRef ref = ir->op2, refk = ir->op1;
 821   int32_t ofs = (int32_t)sizeof(GCstr);
 822   Reg r;
 823   if (irref_isk(ref)) {
 824     IRRef tmp = refk; refk = ref; ref = tmp;
 825   } else if (!irref_isk(refk)) {
 826     Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
 827     IRIns *irr = IR(ir->op2);
 828     if (ra_hasreg(irr->r)) {
 829       ra_noweak(as, irr->r);
 830       right = irr->r;
 831     } else if (mayfuse(as, irr->op2) &&
 832                irr->o == IR_ADD && irref_isk(irr->op2) &&
 833                checki16(ofs + IR(irr->op2)->i)) {
 834       ofs += IR(irr->op2)->i;
 835       right = ra_alloc1(as, irr->op1, rset_exclude(RSET_GPR, left));
 836     } else {
 837       right = ra_allocref(as, ir->op2, rset_exclude(RSET_GPR, left));
 838     }
 839     emit_tsi(as, MIPSI_ADDIU, dest, dest, ofs);
 840     emit_dst(as, MIPSI_ADDU, dest, left, right);
 841     return;
 842   }
 843   r = ra_alloc1(as, ref, RSET_GPR);
 844   ofs += IR(refk)->i;
 845   if (checki16(ofs))
 846     emit_tsi(as, MIPSI_ADDIU, dest, r, ofs);
 847   else
 848     emit_dst(as, MIPSI_ADDU, dest, r,
 849              ra_allock(as, ofs, rset_exclude(RSET_GPR, r)));
 850 }
 851 
 852 /* -- Loads and stores ---------------------------------------------------- */
 853 
 854 static MIPSIns asm_fxloadins(IRIns *ir)
 855 {
 856   switch (irt_type(ir->t)) {
 857   case IRT_I8: return MIPSI_LB;
 858   case IRT_U8: return MIPSI_LBU;
 859   case IRT_I16: return MIPSI_LH;
 860   case IRT_U16: return MIPSI_LHU;
 861   case IRT_NUM: return MIPSI_LDC1;
 862   case IRT_FLOAT: return MIPSI_LWC1;
 863   default: return MIPSI_LW;
 864   }
 865 }
 866 
 867 static MIPSIns asm_fxstoreins(IRIns *ir)
 868 {
 869   switch (irt_type(ir->t)) {
 870   case IRT_I8: case IRT_U8: return MIPSI_SB;
 871   case IRT_I16: case IRT_U16: return MIPSI_SH;
 872   case IRT_NUM: return MIPSI_SDC1;
 873   case IRT_FLOAT: return MIPSI_SWC1;
 874   default: return MIPSI_SW;
 875   }
 876 }
 877 
 878 static void asm_fload(ASMState *as, IRIns *ir)
 879 {
 880   Reg dest = ra_dest(as, ir, RSET_GPR);
 881   Reg idx = ra_alloc1(as, ir->op1, RSET_GPR);
 882   MIPSIns mi = asm_fxloadins(ir);
 883   int32_t ofs;
 884   if (ir->op2 == IRFL_TAB_ARRAY) {
 885     ofs = asm_fuseabase(as, ir->op1);
 886     if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
 887       emit_tsi(as, MIPSI_ADDIU, dest, idx, ofs);
 888       return;
 889     }
 890   }
 891   ofs = field_ofs[ir->op2];
 892   lua_assert(!irt_isfp(ir->t));
 893   emit_tsi(as, mi, dest, idx, ofs);
 894 }
 895 
 896 static void asm_fstore(ASMState *as, IRIns *ir)
 897 {
 898   if (ir->r != RID_SINK) {
 899     Reg src = ra_alloc1z(as, ir->op2, RSET_GPR);
 900     IRIns *irf = IR(ir->op1);
 901     Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
 902     int32_t ofs = field_ofs[irf->op2];
 903     MIPSIns mi = asm_fxstoreins(ir);
 904     lua_assert(!irt_isfp(ir->t));
 905     emit_tsi(as, mi, src, idx, ofs);
 906   }
 907 }
 908 
 909 static void asm_xload(ASMState *as, IRIns *ir)
 910 {
 911   Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
 912   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
 913   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR, 0);
 914 }
 915 
 916 static void asm_xstore(ASMState *as, IRIns *ir, int32_t ofs)
 917 {
 918   if (ir->r != RID_SINK) {
 919     Reg src = ra_alloc1z(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
 920     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
 921                  rset_exclude(RSET_GPR, src), ofs);
 922   }
 923 }
 924 
 925 static void asm_ahuvload(ASMState *as, IRIns *ir)
 926 {
 927   IRType1 t = ir->t;
 928   Reg dest = RID_NONE, type = RID_TMP, idx;
 929   RegSet allow = RSET_GPR;
 930   int32_t ofs = 0;
 931   if (ra_used(ir)) {
 932     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
 933     dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
 934     rset_clear(allow, dest);
 935   }
 936   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
 937   rset_clear(allow, idx);
 938   if (irt_isnum(t)) {
 939     asm_guard(as, MIPSI_BEQ, type, RID_ZERO);
 940     emit_tsi(as, MIPSI_SLTIU, type, type, (int32_t)LJ_TISNUM);
 941     if (ra_hasreg(dest))
 942       emit_hsi(as, MIPSI_LDC1, dest, idx, ofs);
 943   } else {
 944     asm_guard(as, MIPSI_BNE, type, ra_allock(as, irt_toitype(t), allow));
 945     if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, idx, ofs+(LJ_BE?4:0));
 946   }
 947   emit_tsi(as, MIPSI_LW, type, idx, ofs+(LJ_BE?0:4));
 948 }
 949 
 950 static void asm_ahustore(ASMState *as, IRIns *ir)
 951 {
 952   RegSet allow = RSET_GPR;
 953   Reg idx, src = RID_NONE, type = RID_NONE;
 954   int32_t ofs = 0;
 955   if (ir->r == RID_SINK)
 956     return;
 957   if (irt_isnum(ir->t)) {
 958     src = ra_alloc1(as, ir->op2, RSET_FPR);
 959   } else {
 960     if (!irt_ispri(ir->t)) {
 961       src = ra_alloc1(as, ir->op2, allow);
 962       rset_clear(allow, src);
 963     }
 964     type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
 965     rset_clear(allow, type);
 966   }
 967   idx = asm_fuseahuref(as, ir->op1, &ofs, allow);
 968   if (irt_isnum(ir->t)) {
 969     emit_hsi(as, MIPSI_SDC1, src, idx, ofs);
 970   } else {
 971     if (ra_hasreg(src))
 972       emit_tsi(as, MIPSI_SW, src, idx, ofs+(LJ_BE?4:0));
 973     emit_tsi(as, MIPSI_SW, type, idx, ofs+(LJ_BE?0:4));
 974   }
 975 }
 976 
 977 static void asm_sload(ASMState *as, IRIns *ir)
 978 {
 979   int32_t ofs = 8*((int32_t)ir->op1-1) + ((ir->op2 & IRSLOAD_FRAME) ? 4 : 0);
 980   IRType1 t = ir->t;
 981   Reg dest = RID_NONE, type = RID_NONE, base;
 982   RegSet allow = RSET_GPR;
 983   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
 984   lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
 985   lua_assert(!irt_isint(t) || (ir->op2 & (IRSLOAD_CONVERT|IRSLOAD_FRAME)));
 986   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
 987     dest = ra_scratch(as, RSET_FPR);
 988     asm_tointg(as, ir, dest);
 989     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
 990   } else if (ra_used(ir)) {
 991     lua_assert(irt_isnum(t) || irt_isint(t) || irt_isaddr(t));
 992     dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : RSET_GPR);
 993     rset_clear(allow, dest);
 994     base = ra_alloc1(as, REF_BASE, allow);
 995     rset_clear(allow, base);
 996     if ((ir->op2 & IRSLOAD_CONVERT)) {
 997       if (irt_isint(t)) {
 998         Reg tmp = ra_scratch(as, RSET_FPR);
 999         emit_tg(as, MIPSI_MFC1, dest, tmp);
1000         emit_fg(as, MIPSI_CVT_W_D, tmp, tmp);
1001         dest = tmp;
1002         t.irt = IRT_NUM;  /* Check for original type. */
1003       } else {
1004         Reg tmp = ra_scratch(as, RSET_GPR);
1005         emit_fg(as, MIPSI_CVT_D_W, dest, dest);
1006         emit_tg(as, MIPSI_MTC1, tmp, dest);
1007         dest = tmp;
1008         t.irt = IRT_INT;  /* Check for original type. */
1009       }
1010     }
1011     goto dotypecheck;
1012   }
1013   base = ra_alloc1(as, REF_BASE, allow);
1014   rset_clear(allow, base);
1015 dotypecheck:
1016   if (irt_isnum(t)) {
1017     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1018       asm_guard(as, MIPSI_BEQ, RID_TMP, RID_ZERO);
1019       emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)LJ_TISNUM);
1020       type = RID_TMP;
1021     }
1022     if (ra_hasreg(dest)) emit_hsi(as, MIPSI_LDC1, dest, base, ofs);
1023   } else {
1024     if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1025       Reg ktype = ra_allock(as, irt_toitype(t), allow);
1026       asm_guard(as, MIPSI_BNE, RID_TMP, ktype);
1027       type = RID_TMP;
1028     }
1029     if (ra_hasreg(dest)) emit_tsi(as, MIPSI_LW, dest, base, ofs ^ (LJ_BE?4:0));
1030   }
1031   if (ra_hasreg(type)) emit_tsi(as, MIPSI_LW, type, base, ofs ^ (LJ_BE?0:4));
1032 }
1033 
1034 /* -- Allocations --------------------------------------------------------- */
1035 
1036 #if LJ_HASFFI
1037 static void asm_cnew(ASMState *as, IRIns *ir)
1038 {
1039   CTState *cts = ctype_ctsG(J2G(as->J));
1040   CTypeID ctypeid = (CTypeID)IR(ir->op1)->i;
1041   CTSize sz = (ir->o == IR_CNEWI || ir->op2 == REF_NIL) ?
1042               lj_ctype_size(cts, ctypeid) : (CTSize)IR(ir->op2)->i;
1043   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1044   IRRef args[2];
1045   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1046   RegSet drop = RSET_SCRATCH;
1047   lua_assert(sz != CTSIZE_INVALID);
1048 
1049   args[0] = ASMREF_L;     /* lua_State *L */
1050   args[1] = ASMREF_TMP1;  /* MSize size   */
1051   as->gcsteps++;
1052 
1053   if (ra_hasreg(ir->r))
1054     rset_clear(drop, ir->r);  /* Dest reg handled below. */
1055   ra_evictset(as, drop);
1056   if (ra_used(ir))
1057     ra_destreg(as, ir, RID_RET);  /* GCcdata * */
1058 
1059   /* Initialize immutable cdata object. */
1060   if (ir->o == IR_CNEWI) {
1061     int32_t ofs = sizeof(GCcdata);
1062     lua_assert(sz == 4 || sz == 8);
1063     if (sz == 8) {
1064       ofs += 4;
1065       lua_assert((ir+1)->o == IR_HIOP);
1066       if (LJ_LE) ir++;
1067     }
1068     for (;;) {
1069       Reg r = ra_alloc1z(as, ir->op2, allow);
1070       emit_tsi(as, MIPSI_SW, r, RID_RET, ofs);
1071       rset_clear(allow, r);
1072       if (ofs == sizeof(GCcdata)) break;
1073       ofs -= 4; if (LJ_BE) ir++; else ir--;
1074     }
1075   }
1076   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1077   emit_tsi(as, MIPSI_SB, RID_RET+1, RID_RET, offsetof(GCcdata, gct));
1078   emit_tsi(as, MIPSI_SH, RID_TMP, RID_RET, offsetof(GCcdata, ctypeid));
1079   emit_ti(as, MIPSI_LI, RID_RET+1, ~LJ_TCDATA);
1080   emit_ti(as, MIPSI_LI, RID_TMP, ctypeid); /* Lower 16 bit used. Sign-ext ok. */
1081   asm_gencall(as, ci, args);
1082   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1083                ra_releasetmp(as, ASMREF_TMP1));
1084 }
1085 #else
1086 #define asm_cnew(as, ir)        ((void)0)
1087 #endif
1088 
1089 /* -- Write barriers ------------------------------------------------------ */
1090 
1091 static void asm_tbar(ASMState *as, IRIns *ir)
1092 {
1093   Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1094   Reg mark = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1095   Reg link = RID_TMP;
1096   MCLabel l_end = emit_label(as);
1097   emit_tsi(as, MIPSI_SW, link, tab, (int32_t)offsetof(GCtab, gclist));
1098   emit_tsi(as, MIPSI_SB, mark, tab, (int32_t)offsetof(GCtab, marked));
1099   emit_setgl(as, tab, gc.grayagain);
1100   emit_getgl(as, link, gc.grayagain);
1101   emit_dst(as, MIPSI_XOR, mark, mark, RID_TMP);  /* Clear black bit. */
1102   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
1103   emit_tsi(as, MIPSI_ANDI, RID_TMP, mark, LJ_GC_BLACK);
1104   emit_tsi(as, MIPSI_LBU, mark, tab, (int32_t)offsetof(GCtab, marked));
1105 }
1106 
1107 static void asm_obar(ASMState *as, IRIns *ir)
1108 {
1109   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1110   IRRef args[2];
1111   MCLabel l_end;
1112   Reg obj, val, tmp;
1113   /* No need for other object barriers (yet). */
1114   lua_assert(IR(ir->op1)->o == IR_UREFC);
1115   ra_evictset(as, RSET_SCRATCH);
1116   l_end = emit_label(as);
1117   args[0] = ASMREF_TMP1;  /* global_State *g */
1118   args[1] = ir->op1;      /* TValue *tv      */
1119   asm_gencall(as, ci, args);
1120   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1121   obj = IR(ir->op1)->r;
1122   tmp = ra_scratch(as, rset_exclude(RSET_GPR, obj));
1123   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
1124   emit_tsi(as, MIPSI_ANDI, tmp, tmp, LJ_GC_BLACK);
1125   emit_branch(as, MIPSI_BEQ, RID_TMP, RID_ZERO, l_end);
1126   emit_tsi(as, MIPSI_ANDI, RID_TMP, RID_TMP, LJ_GC_WHITES);
1127   val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
1128   emit_tsi(as, MIPSI_LBU, tmp, obj,
1129            (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1130   emit_tsi(as, MIPSI_LBU, RID_TMP, val, (int32_t)offsetof(GChead, marked));
1131 }
1132 
1133 /* -- Arithmetic and logic operations ------------------------------------- */
1134 
1135 static void asm_fparith(ASMState *as, IRIns *ir, MIPSIns mi)
1136 {
1137   Reg dest = ra_dest(as, ir, RSET_FPR);
1138   Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1139   right = (left >> 8); left &= 255;
1140   emit_fgh(as, mi, dest, left, right);
1141 }
1142 
1143 static void asm_fpunary(ASMState *as, IRIns *ir, MIPSIns mi)
1144 {
1145   Reg dest = ra_dest(as, ir, RSET_FPR);
1146   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1147   emit_fg(as, mi, dest, left);
1148 }
1149 
1150 static int asm_fpjoin_pow(ASMState *as, IRIns *ir)
1151 {
1152   IRIns *irp = IR(ir->op1);
1153   if (irp == ir-1 && irp->o == IR_MUL && !ra_used(irp)) {
1154     IRIns *irpp = IR(irp->op1);
1155     if (irpp == ir-2 && irpp->o == IR_FPMATH &&
1156         irpp->op2 == IRFPM_LOG2 && !ra_used(irpp)) {
1157       const CCallInfo *ci = &lj_ir_callinfo[IRCALL_pow];
1158       IRRef args[2];
1159       args[0] = irpp->op1;
1160       args[1] = irp->op2;
1161       asm_setupresult(as, ir, ci);
1162       asm_gencall(as, ci, args);
1163       return 1;
1164     }
1165   }
1166   return 0;
1167 }
1168 
1169 static void asm_add(ASMState *as, IRIns *ir)
1170 {
1171   if (irt_isnum(ir->t)) {
1172     asm_fparith(as, ir, MIPSI_ADD_D);
1173   } else {
1174     Reg dest = ra_dest(as, ir, RSET_GPR);
1175     Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1176     if (irref_isk(ir->op2)) {
1177       int32_t k = IR(ir->op2)->i;
1178       if (checki16(k)) {
1179         emit_tsi(as, MIPSI_ADDIU, dest, left, k);
1180         return;
1181       }
1182     }
1183     right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1184     emit_dst(as, MIPSI_ADDU, dest, left, right);
1185   }
1186 }
1187 
1188 static void asm_sub(ASMState *as, IRIns *ir)
1189 {
1190   if (irt_isnum(ir->t)) {
1191     asm_fparith(as, ir, MIPSI_SUB_D);
1192   } else {
1193     Reg dest = ra_dest(as, ir, RSET_GPR);
1194     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1195     right = (left >> 8); left &= 255;
1196     emit_dst(as, MIPSI_SUBU, dest, left, right);
1197   }
1198 }
1199 
1200 static void asm_mul(ASMState *as, IRIns *ir)
1201 {
1202   if (irt_isnum(ir->t)) {
1203     asm_fparith(as, ir, MIPSI_MUL_D);
1204   } else {
1205     Reg dest = ra_dest(as, ir, RSET_GPR);
1206     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1207     right = (left >> 8); left &= 255;
1208     emit_dst(as, MIPSI_MUL, dest, left, right);
1209   }
1210 }
1211 
1212 static void asm_neg(ASMState *as, IRIns *ir)
1213 {
1214   if (irt_isnum(ir->t)) {
1215     asm_fpunary(as, ir, MIPSI_NEG_D);
1216   } else {
1217     Reg dest = ra_dest(as, ir, RSET_GPR);
1218     Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1219     emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
1220   }
1221 }
1222 
1223 static void asm_arithov(ASMState *as, IRIns *ir)
1224 {
1225   Reg right, left, tmp, dest = ra_dest(as, ir, RSET_GPR);
1226   if (irref_isk(ir->op2)) {
1227     int k = IR(ir->op2)->i;
1228     if (ir->o == IR_SUBOV) k = -k;
1229     if (checki16(k)) {  /* (dest < left) == (k >= 0 ? 1 : 0) */
1230       left = ra_alloc1(as, ir->op1, RSET_GPR);
1231       asm_guard(as, k >= 0 ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
1232       emit_dst(as, MIPSI_SLT, RID_TMP, dest, dest == left ? RID_TMP : left);
1233       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
1234       if (dest == left) emit_move(as, RID_TMP, left);
1235       return;
1236     }
1237   }
1238   left = ra_alloc2(as, ir, RSET_GPR);
1239   right = (left >> 8); left &= 255;
1240   tmp = ra_scratch(as, rset_exclude(rset_exclude(rset_exclude(RSET_GPR, left),
1241                                                  right), dest));
1242   asm_guard(as, MIPSI_BLTZ, RID_TMP, 0);
1243   emit_dst(as, MIPSI_AND, RID_TMP, RID_TMP, tmp);
1244   if (ir->o == IR_ADDOV) {  /* ((dest^left) & (dest^right)) < 0 */
1245     emit_dst(as, MIPSI_XOR, RID_TMP, dest, dest == right ? RID_TMP : right);
1246   } else {  /* ((dest^left) & (dest^~right)) < 0 */
1247     emit_dst(as, MIPSI_XOR, RID_TMP, RID_TMP, dest);
1248     emit_dst(as, MIPSI_NOR, RID_TMP, dest == right ? RID_TMP : right, RID_ZERO);
1249   }
1250   emit_dst(as, MIPSI_XOR, tmp, dest, dest == left ? RID_TMP : left);
1251   emit_dst(as, ir->o == IR_ADDOV ? MIPSI_ADDU : MIPSI_SUBU, dest, left, right);
1252   if (dest == left || dest == right)
1253     emit_move(as, RID_TMP, dest == left ? left : right);
1254 }
1255 
1256 static void asm_mulov(ASMState *as, IRIns *ir)
1257 {
1258 #if LJ_DUALNUM
1259 #error "NYI: MULOV"
1260 #else
1261   UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused in single-number mode. */
1262 #endif
1263 }
1264 
1265 #if LJ_HASFFI
1266 static void asm_add64(ASMState *as, IRIns *ir)
1267 {
1268   Reg dest = ra_dest(as, ir, RSET_GPR);
1269   Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1270   if (irref_isk(ir->op2)) {
1271     int32_t k = IR(ir->op2)->i;
1272     if (k == 0) {
1273       emit_dst(as, MIPSI_ADDU, dest, left, RID_TMP);
1274       goto loarith;
1275     } else if (checki16(k)) {
1276       emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
1277       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
1278       goto loarith;
1279     }
1280   }
1281   emit_dst(as, MIPSI_ADDU, dest, dest, RID_TMP);
1282   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1283   emit_dst(as, MIPSI_ADDU, dest, left, right);
1284 loarith:
1285   ir--;
1286   dest = ra_dest(as, ir, RSET_GPR);
1287   left = ra_alloc1(as, ir->op1, RSET_GPR);
1288   if (irref_isk(ir->op2)) {
1289     int32_t k = IR(ir->op2)->i;
1290     if (k == 0) {
1291       if (dest != left)
1292         emit_move(as, dest, left);
1293       return;
1294     } else if (checki16(k)) {
1295       if (dest == left) {
1296         Reg tmp = ra_scratch(as, rset_exclude(RSET_GPR, left));
1297         emit_move(as, dest, tmp);
1298         dest = tmp;
1299       }
1300       emit_dst(as, MIPSI_SLTU, RID_TMP, dest, left);
1301       emit_tsi(as, MIPSI_ADDIU, dest, left, k);
1302       return;
1303     }
1304   }
1305   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1306   if (dest == left && dest == right) {
1307     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
1308     emit_move(as, dest, tmp);
1309     dest = tmp;
1310   }
1311   emit_dst(as, MIPSI_SLTU, RID_TMP, dest, dest == left ? right : left);
1312   emit_dst(as, MIPSI_ADDU, dest, left, right);
1313 }
1314 
1315 static void asm_sub64(ASMState *as, IRIns *ir)
1316 {
1317   Reg dest = ra_dest(as, ir, RSET_GPR);
1318   Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1319   right = (left >> 8); left &= 255;
1320   emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
1321   emit_dst(as, MIPSI_SUBU, dest, left, right);
1322   ir--;
1323   dest = ra_dest(as, ir, RSET_GPR);
1324   left = ra_alloc2(as, ir, RSET_GPR);
1325   right = (left >> 8); left &= 255;
1326   if (dest == left) {
1327     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
1328     emit_move(as, dest, tmp);
1329     dest = tmp;
1330   }
1331   emit_dst(as, MIPSI_SLTU, RID_TMP, left, dest);
1332   emit_dst(as, MIPSI_SUBU, dest, left, right);
1333 }
1334 
1335 static void asm_neg64(ASMState *as, IRIns *ir)
1336 {
1337   Reg dest = ra_dest(as, ir, RSET_GPR);
1338   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1339   emit_dst(as, MIPSI_SUBU, dest, dest, RID_TMP);
1340   emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
1341   ir--;
1342   dest = ra_dest(as, ir, RSET_GPR);
1343   left = ra_alloc1(as, ir->op1, RSET_GPR);
1344   emit_dst(as, MIPSI_SLTU, RID_TMP, RID_ZERO, dest);
1345   emit_dst(as, MIPSI_SUBU, dest, RID_ZERO, left);
1346 }
1347 #endif
1348 
1349 static void asm_bitnot(ASMState *as, IRIns *ir)
1350 {
1351   Reg left, right, dest = ra_dest(as, ir, RSET_GPR);
1352   IRIns *irl = IR(ir->op1);
1353   if (mayfuse(as, ir->op1) && irl->o == IR_BOR) {
1354     left = ra_alloc2(as, irl, RSET_GPR);
1355     right = (left >> 8); left &= 255;
1356   } else {
1357     left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1358     right = RID_ZERO;
1359   }
1360   emit_dst(as, MIPSI_NOR, dest, left, right);
1361 }
1362 
1363 static void asm_bitswap(ASMState *as, IRIns *ir)
1364 {
1365   Reg dest = ra_dest(as, ir, RSET_GPR);
1366   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1367   if ((as->flags & JIT_F_MIPS32R2)) {
1368     emit_dta(as, MIPSI_ROTR, dest, RID_TMP, 16);
1369     emit_dst(as, MIPSI_WSBH, RID_TMP, 0, left);
1370   } else {
1371     Reg tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), dest));
1372     emit_dst(as, MIPSI_OR, dest, dest, tmp);
1373     emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
1374     emit_tsi(as, MIPSI_ANDI, dest, dest, 0xff00);
1375     emit_dta(as, MIPSI_SLL, RID_TMP, RID_TMP, 8);
1376     emit_dta(as, MIPSI_SRL, dest, left, 8);
1377     emit_tsi(as, MIPSI_ANDI, RID_TMP, left, 0xff00);
1378     emit_dst(as, MIPSI_OR, tmp, tmp, RID_TMP);
1379     emit_dta(as, MIPSI_SRL, tmp, left, 24);
1380     emit_dta(as, MIPSI_SLL, RID_TMP, left, 24);
1381   }
1382 }
1383 
1384 static void asm_bitop(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1385 {
1386   Reg dest = ra_dest(as, ir, RSET_GPR);
1387   Reg right, left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1388   if (irref_isk(ir->op2)) {
1389     int32_t k = IR(ir->op2)->i;
1390     if (checku16(k)) {
1391       emit_tsi(as, mik, dest, left, k);
1392       return;
1393     }
1394   }
1395   right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1396   emit_dst(as, mi, dest, left, right);
1397 }
1398 
1399 static void asm_bitshift(ASMState *as, IRIns *ir, MIPSIns mi, MIPSIns mik)
1400 {
1401   Reg dest = ra_dest(as, ir, RSET_GPR);
1402   if (irref_isk(ir->op2)) {  /* Constant shifts. */
1403     uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
1404     emit_dta(as, mik, dest, ra_hintalloc(as, ir->op1, dest, RSET_GPR), shift);
1405   } else {
1406     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1407     right = (left >> 8); left &= 255;
1408     emit_dst(as, mi, dest, right, left);  /* Shift amount is in rs. */
1409   }
1410 }
1411 
1412 static void asm_bitror(ASMState *as, IRIns *ir)
1413 {
1414   if ((as->flags & JIT_F_MIPS32R2)) {
1415     asm_bitshift(as, ir, MIPSI_ROTRV, MIPSI_ROTR);
1416   } else {
1417     Reg dest = ra_dest(as, ir, RSET_GPR);
1418     if (irref_isk(ir->op2)) {  /* Constant shifts. */
1419       uint32_t shift = (uint32_t)(IR(ir->op2)->i & 31);
1420       Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1421       emit_rotr(as, dest, left, RID_TMP, shift);
1422     } else {
1423       Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1424       right = (left >> 8); left &= 255;
1425       emit_dst(as, MIPSI_OR, dest, dest, RID_TMP);
1426       emit_dst(as, MIPSI_SRLV, dest, right, left);
1427       emit_dst(as, MIPSI_SLLV, RID_TMP, RID_TMP, left);
1428       emit_dst(as, MIPSI_SUBU, RID_TMP, ra_allock(as, 32, RSET_GPR), right);
1429     }
1430   }
1431 }
1432 
1433 static void asm_min_max(ASMState *as, IRIns *ir, int ismax)
1434 {
1435   if (irt_isnum(ir->t)) {
1436     Reg dest = ra_dest(as, ir, RSET_FPR);
1437     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1438     right = (left >> 8); left &= 255;
1439     if (dest == left) {
1440       emit_fg(as, MIPSI_MOVT_D, dest, right);
1441     } else {
1442       emit_fg(as, MIPSI_MOVF_D, dest, left);
1443       if (dest != right) emit_fg(as, MIPSI_MOV_D, dest, right);
1444     }
1445     emit_fgh(as, MIPSI_C_OLT_D, 0, ismax ? left : right, ismax ? right : left);
1446   } else {
1447     Reg dest = ra_dest(as, ir, RSET_GPR);
1448     Reg right, left = ra_alloc2(as, ir, RSET_GPR);
1449     right = (left >> 8); left &= 255;
1450     if (dest == left) {
1451       emit_dst(as, MIPSI_MOVN, dest, right, RID_TMP);
1452     } else {
1453       emit_dst(as, MIPSI_MOVZ, dest, left, RID_TMP);
1454       if (dest != right) emit_move(as, dest, right);
1455     }
1456     emit_dst(as, MIPSI_SLT, RID_TMP,
1457              ismax ? left : right, ismax ? right : left);
1458   }
1459 }
1460 
1461 /* -- Comparisons --------------------------------------------------------- */
1462 
1463 static void asm_comp(ASMState *as, IRIns *ir)
1464 {
1465   /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
1466   IROp op = ir->o;
1467   if (irt_isnum(ir->t)) {
1468     Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1469     right = (left >> 8); left &= 255;
1470     asm_guard(as, (op&1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1471     emit_fgh(as, MIPSI_C_OLT_D + ((op&3) ^ ((op>>2)&1)), 0, left, right);
1472   } else {
1473     Reg right, left = ra_alloc1(as, ir->op1, RSET_GPR);
1474     if (op == IR_ABC) op = IR_UGT;
1475     if ((op&4) == 0 && irref_isk(ir->op2) && IR(ir->op2)->i == 0) {
1476       MIPSIns mi = (op&2) ? ((op&1) ? MIPSI_BLEZ : MIPSI_BGTZ) :
1477                             ((op&1) ? MIPSI_BLTZ : MIPSI_BGEZ);
1478       asm_guard(as, mi, left, 0);
1479     } else {
1480       if (irref_isk(ir->op2)) {
1481         int32_t k = IR(ir->op2)->i;
1482         if ((op&2)) k++;
1483         if (checki16(k)) {
1484           asm_guard(as, (op&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
1485           emit_tsi(as, (op&4) ? MIPSI_SLTIU : MIPSI_SLTI,
1486                    RID_TMP, left, k);
1487           return;
1488         }
1489       }
1490       right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1491       asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
1492       emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT,
1493                RID_TMP, (op&2) ? right : left, (op&2) ? left : right);
1494     }
1495   }
1496 }
1497 
1498 static void asm_compeq(ASMState *as, IRIns *ir)
1499 {
1500   Reg right, left = ra_alloc2(as, ir, irt_isnum(ir->t) ? RSET_FPR : RSET_GPR);
1501   right = (left >> 8); left &= 255;
1502   if (irt_isnum(ir->t)) {
1503     asm_guard(as, (ir->o & 1) ? MIPSI_BC1T : MIPSI_BC1F, 0, 0);
1504     emit_fgh(as, MIPSI_C_EQ_D, 0, left, right);
1505   } else {
1506     asm_guard(as, (ir->o & 1) ? MIPSI_BEQ : MIPSI_BNE, left, right);
1507   }
1508 }
1509 
1510 #if LJ_HASFFI
1511 /* 64 bit integer comparisons. */
1512 static void asm_comp64(ASMState *as, IRIns *ir)
1513 {
1514   /* ORDER IR: LT GE LE GT  ULT UGE ULE UGT. */
1515   IROp op = (ir-1)->o;
1516   MCLabel l_end;
1517   Reg rightlo, leftlo, righthi, lefthi = ra_alloc2(as, ir, RSET_GPR);
1518   righthi = (lefthi >> 8); lefthi &= 255;
1519   leftlo = ra_alloc2(as, ir-1,
1520                      rset_exclude(rset_exclude(RSET_GPR, lefthi), righthi));
1521   rightlo = (leftlo >> 8); leftlo &= 255;
1522   asm_guard(as, ((op^(op>>1))&1) ? MIPSI_BNE : MIPSI_BEQ, RID_TMP, RID_ZERO);
1523   l_end = emit_label(as);
1524   if (lefthi != righthi)
1525     emit_dst(as, (op&4) ? MIPSI_SLTU : MIPSI_SLT, RID_TMP,
1526              (op&2) ? righthi : lefthi, (op&2) ? lefthi : righthi);
1527   emit_dst(as, MIPSI_SLTU, RID_TMP,
1528            (op&2) ? rightlo : leftlo, (op&2) ? leftlo : rightlo);
1529   if (lefthi != righthi)
1530     emit_branch(as, MIPSI_BEQ, lefthi, righthi, l_end);
1531 }
1532 
1533 static void asm_comp64eq(ASMState *as, IRIns *ir)
1534 {
1535   Reg tmp, right, left = ra_alloc2(as, ir, RSET_GPR);
1536   right = (left >> 8); left &= 255;
1537   asm_guard(as, ((ir-1)->o & 1) ? MIPSI_BEQ : MIPSI_BNE, RID_TMP, RID_ZERO);
1538   tmp = ra_scratch(as, rset_exclude(rset_exclude(RSET_GPR, left), right));
1539   emit_dst(as, MIPSI_OR, RID_TMP, RID_TMP, tmp);
1540   emit_dst(as, MIPSI_XOR, tmp, left, right);
1541   left = ra_alloc2(as, ir-1, RSET_GPR);
1542   right = (left >> 8); left &= 255;
1543   emit_dst(as, MIPSI_XOR, RID_TMP, left, right);
1544 }
1545 #endif
1546 
1547 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
1548 
1549 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1550 static void asm_hiop(ASMState *as, IRIns *ir)
1551 {
1552 #if LJ_HASFFI
1553   /* HIOP is marked as a store because it needs its own DCE logic. */
1554   int uselo = ra_used(ir-1), usehi = ra_used(ir);  /* Loword/hiword used? */
1555   if (LJ_UNLIKELY(!(as->flags & JIT_F_OPT_DCE))) uselo = usehi = 1;
1556   if ((ir-1)->o == IR_CONV) {  /* Conversions to/from 64 bit. */
1557     as->curins--;  /* Always skip the CONV. */
1558     if (usehi || uselo)
1559       asm_conv64(as, ir);
1560     return;
1561   } else if ((ir-1)->o < IR_EQ) {  /* 64 bit integer comparisons. ORDER IR. */
1562     as->curins--;  /* Always skip the loword comparison. */
1563     asm_comp64(as, ir);
1564     return;
1565   } else if ((ir-1)->o <= IR_NE) {  /* 64 bit integer comparisons. ORDER IR. */
1566     as->curins--;  /* Always skip the loword comparison. */
1567     asm_comp64eq(as, ir);
1568     return;
1569   } else if ((ir-1)->o == IR_XSTORE) {
1570     as->curins--;  /* Handle both stores here. */
1571     if ((ir-1)->r != RID_SINK) {
1572       asm_xstore(as, ir, LJ_LE ? 4 : 0);
1573       asm_xstore(as, ir-1, LJ_LE ? 0 : 4);
1574     }
1575     return;
1576   }
1577   if (!usehi) return;  /* Skip unused hiword op for all remaining ops. */
1578   switch ((ir-1)->o) {
1579   case IR_ADD: as->curins--; asm_add64(as, ir); break;
1580   case IR_SUB: as->curins--; asm_sub64(as, ir); break;
1581   case IR_NEG: as->curins--; asm_neg64(as, ir); break;
1582   case IR_CALLN:
1583   case IR_CALLXS:
1584     if (!uselo)
1585       ra_allocref(as, ir->op1, RID2RSET(RID_RETLO));  /* Mark lo op as used. */
1586     break;
1587   case IR_CNEWI:
1588     /* Nothing to do here. Handled by lo op itself. */
1589     break;
1590   default: lua_assert(0); break;
1591   }
1592 #else
1593   UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused without FFI. */
1594 #endif
1595 }
1596 
1597 /* -- Stack handling ------------------------------------------------------ */
1598 
1599 /* Check Lua stack size for overflow. Use exit handler as fallback. */
1600 static void asm_stack_check(ASMState *as, BCReg topslot,
1601                             IRIns *irp, RegSet allow, ExitNo exitno)
1602 {
1603   /* Try to get an unused temp. register, otherwise spill/restore RID_RET*. */
1604   Reg tmp, pbase = irp ? (ra_hasreg(irp->r) ? irp->r : RID_TMP) : RID_BASE;
1605   ExitNo oldsnap = as->snapno;
1606   rset_clear(allow, pbase);
1607   tmp = allow ? rset_pickbot(allow) :
1608                 (pbase == RID_RETHI ? RID_RETLO : RID_RETHI);
1609   as->snapno = exitno;
1610   asm_guard(as, MIPSI_BNE, RID_TMP, RID_ZERO);
1611   as->snapno = oldsnap;
1612   if (allow == RSET_EMPTY)  /* Restore temp. register. */
1613     emit_tsi(as, MIPSI_LW, tmp, RID_SP, 0);
1614   else
1615     ra_modified(as, tmp);
1616   emit_tsi(as, MIPSI_SLTIU, RID_TMP, RID_TMP, (int32_t)(8*topslot));
1617   emit_dst(as, MIPSI_SUBU, RID_TMP, tmp, pbase);
1618   emit_tsi(as, MIPSI_LW, tmp, tmp, offsetof(lua_State, maxstack));
1619   if (pbase == RID_TMP)
1620     emit_getgl(as, RID_TMP, jit_base);
1621   emit_getgl(as, tmp, jit_L);
1622   if (allow == RSET_EMPTY)  /* Spill temp. register. */
1623     emit_tsi(as, MIPSI_SW, tmp, RID_SP, 0);
1624 }
1625 
1626 /* Restore Lua stack from on-trace state. */
1627 static void asm_stack_restore(ASMState *as, SnapShot *snap)
1628 {
1629   SnapEntry *map = &as->T->snapmap[snap->mapofs];
1630   SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1];
1631   MSize n, nent = snap->nent;
1632   /* Store the value of all modified slots to the Lua stack. */
1633   for (n = 0; n < nent; n++) {
1634     SnapEntry sn = map[n];
1635     BCReg s = snap_slot(sn);
1636     int32_t ofs = 8*((int32_t)s-1);
1637     IRRef ref = snap_ref(sn);
1638     IRIns *ir = IR(ref);
1639     if ((sn & SNAP_NORESTORE))
1640       continue;
1641     if (irt_isnum(ir->t)) {
1642       Reg src = ra_alloc1(as, ref, RSET_FPR);
1643       emit_hsi(as, MIPSI_SDC1, src, RID_BASE, ofs);
1644     } else {
1645       Reg type;
1646       RegSet allow = rset_exclude(RSET_GPR, RID_BASE);
1647       lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
1648       if (!irt_ispri(ir->t)) {
1649         Reg src = ra_alloc1(as, ref, allow);
1650         rset_clear(allow, src);
1651         emit_tsi(as, MIPSI_SW, src, RID_BASE, ofs+(LJ_BE?4:0));
1652       }
1653       if ((sn & (SNAP_CONT|SNAP_FRAME))) {
1654         if (s == 0) continue;  /* Do not overwrite link to previous frame. */
1655         type = ra_allock(as, (int32_t)(*flinks--), allow);
1656       } else {
1657         type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
1658       }
1659       emit_tsi(as, MIPSI_SW, type, RID_BASE, ofs+(LJ_BE?0:4));
1660     }
1661     checkmclim(as);
1662   }
1663   lua_assert(map + nent == flinks);
1664 }
1665 
1666 /* -- GC handling --------------------------------------------------------- */
1667 
1668 /* Check GC threshold and do one or more GC steps. */
1669 static void asm_gc_check(ASMState *as)
1670 {
1671   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
1672   IRRef args[2];
1673   MCLabel l_end;
1674   Reg tmp;
1675   ra_evictset(as, RSET_SCRATCH);
1676   l_end = emit_label(as);
1677   /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1678   /* Assumes asm_snap_prep() already done. */
1679   asm_guard(as, MIPSI_BNE, RID_RET, RID_ZERO);
1680   args[0] = ASMREF_TMP1;  /* global_State *g */
1681   args[1] = ASMREF_TMP2;  /* MSize steps     */
1682   asm_gencall(as, ci, args);
1683   emit_tsi(as, MIPSI_ADDIU, ra_releasetmp(as, ASMREF_TMP1), RID_JGL, -32768);
1684   tmp = ra_releasetmp(as, ASMREF_TMP2);
1685   emit_loadi(as, tmp, as->gcsteps);
1686   /* Jump around GC step if GC total < GC threshold. */
1687   emit_branch(as, MIPSI_BNE, RID_TMP, RID_ZERO, l_end);
1688   emit_dst(as, MIPSI_SLTU, RID_TMP, RID_TMP, tmp);
1689   emit_getgl(as, tmp, gc.threshold);
1690   emit_getgl(as, RID_TMP, gc.total);
1691   as->gcsteps = 0;
1692   checkmclim(as);
1693 }
1694 
1695 /* -- Loop handling ------------------------------------------------------- */
1696 
1697 /* Fixup the loop branch. */
1698 static void asm_loop_fixup(ASMState *as)
1699 {
1700   MCode *p = as->mctop;
1701   MCode *target = as->mcp;
1702   p[-1] = MIPSI_NOP;
1703   if (as->loopinv) {  /* Inverted loop branch? */
1704     /* asm_guard already inverted the cond branch. Only patch the target. */
1705     p[-3] |= ((target-p+2) & 0x0000ffffu);
1706   } else {
1707     p[-2] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
1708   }
1709 }
1710 
1711 /* -- Head of trace ------------------------------------------------------- */
1712 
1713 /* Coalesce BASE register for a root trace. */
1714 static void asm_head_root_base(ASMState *as)
1715 {
1716   IRIns *ir = IR(REF_BASE);
1717   Reg r = ir->r;
1718   if (as->loopinv) as->mctop--;
1719   if (ra_hasreg(r)) {
1720     ra_free(as, r);
1721     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
1722       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
1723     if (r != RID_BASE)
1724       emit_move(as, r, RID_BASE);
1725   }
1726 }
1727 
1728 /* Coalesce BASE register for a side trace. */
1729 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
1730 {
1731   IRIns *ir = IR(REF_BASE);
1732   Reg r = ir->r;
1733   if (as->loopinv) as->mctop--;
1734   if (ra_hasreg(r)) {
1735     ra_free(as, r);
1736     if (rset_test(as->modset, r) || irt_ismarked(ir->t))
1737       ir->r = RID_INIT;  /* No inheritance for modified BASE register. */
1738     if (irp->r == r) {
1739       rset_clear(allow, r);  /* Mark same BASE register as coalesced. */
1740     } else if (ra_hasreg(irp->r) && rset_test(as->freeset, irp->r)) {
1741       rset_clear(allow, irp->r);
1742       emit_move(as, r, irp->r);  /* Move from coalesced parent reg. */
1743     } else {
1744       emit_getgl(as, r, jit_base);  /* Otherwise reload BASE. */
1745     }
1746   }
1747   return allow;
1748 }
1749 
1750 /* -- Tail of trace ------------------------------------------------------- */
1751 
1752 /* Fixup the tail code. */
1753 static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1754 {
1755   MCode *target = lnk ? traceref(as->J,lnk)->mcode : (MCode *)lj_vm_exit_interp;
1756   int32_t spadj = as->T->spadjust;
1757   MCode *p = as->mctop-1;
1758   *p = spadj ? (MIPSI_ADDIU|MIPSF_T(RID_SP)|MIPSF_S(RID_SP)|spadj) : MIPSI_NOP;
1759   p[-1] = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
1760 }
1761 
1762 /* Prepare tail of code. */
1763 static void asm_tail_prep(ASMState *as)
1764 {
1765   as->mcp = as->mctop-2;  /* Leave room for branch plus nop or stack adj. */
1766   as->invmcp = as->loopref ? as->mcp : NULL;
1767 }
1768 
1769 /* -- Instruction dispatch ------------------------------------------------ */
1770 
1771 /* Assemble a single instruction. */
1772 static void asm_ir(ASMState *as, IRIns *ir)
1773 {
1774   switch ((IROp)ir->o) {
1775   /* Miscellaneous ops. */
1776   case IR_LOOP: asm_loop(as); break;
1777   case IR_NOP: case IR_XBAR: lua_assert(!ra_used(ir)); break;
1778   case IR_USE:
1779     ra_alloc1(as, ir->op1, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR); break;
1780   case IR_PHI: asm_phi(as, ir); break;
1781   case IR_HIOP: asm_hiop(as, ir); break;
1782   case IR_GCSTEP: asm_gcstep(as, ir); break;
1783 
1784   /* Guarded assertions. */
1785   case IR_EQ: case IR_NE: asm_compeq(as, ir); break;
1786   case IR_LT: case IR_GE: case IR_LE: case IR_GT:
1787   case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT:
1788   case IR_ABC:
1789     asm_comp(as, ir);
1790     break;
1791 
1792   case IR_RETF: asm_retf(as, ir); break;
1793 
1794   /* Bit ops. */
1795   case IR_BNOT: asm_bitnot(as, ir); break;
1796   case IR_BSWAP: asm_bitswap(as, ir); break;
1797 
1798   case IR_BAND: asm_bitop(as, ir, MIPSI_AND, MIPSI_ANDI); break;
1799   case IR_BOR:  asm_bitop(as, ir, MIPSI_OR, MIPSI_ORI); break;
1800   case IR_BXOR: asm_bitop(as, ir, MIPSI_XOR, MIPSI_XORI); break;
1801 
1802   case IR_BSHL: asm_bitshift(as, ir, MIPSI_SLLV, MIPSI_SLL); break;
1803   case IR_BSHR: asm_bitshift(as, ir, MIPSI_SRLV, MIPSI_SRL); break;
1804   case IR_BSAR: asm_bitshift(as, ir, MIPSI_SRAV, MIPSI_SRA); break;
1805   case IR_BROL: lua_assert(0); break;
1806   case IR_BROR: asm_bitror(as, ir); break;
1807 
1808   /* Arithmetic ops. */
1809   case IR_ADD: asm_add(as, ir); break;
1810   case IR_SUB: asm_sub(as, ir); break;
1811   case IR_MUL: asm_mul(as, ir); break;
1812   case IR_DIV: asm_fparith(as, ir, MIPSI_DIV_D); break;
1813   case IR_MOD: asm_callid(as, ir, IRCALL_lj_vm_modi); break;
1814   case IR_POW: asm_callid(as, ir, IRCALL_lj_vm_powi); break;
1815   case IR_NEG: asm_neg(as, ir); break;
1816 
1817   case IR_ABS: asm_fpunary(as, ir, MIPSI_ABS_D); break;
1818   case IR_ATAN2: asm_callid(as, ir, IRCALL_atan2); break;
1819   case IR_LDEXP: asm_callid(as, ir, IRCALL_ldexp); break;
1820   case IR_MIN: asm_min_max(as, ir, 0); break;
1821   case IR_MAX: asm_min_max(as, ir, 1); break;
1822   case IR_FPMATH:
1823     if (ir->op2 == IRFPM_EXP2 && asm_fpjoin_pow(as, ir))
1824       break;
1825     if (ir->op2 <= IRFPM_TRUNC)
1826       asm_callround(as, ir, IRCALL_lj_vm_floor + ir->op2);
1827     else if (ir->op2 == IRFPM_SQRT)
1828       asm_fpunary(as, ir, MIPSI_SQRT_D);
1829     else
1830       asm_callid(as, ir, IRCALL_lj_vm_floor + ir->op2);
1831     break;
1832 
1833   /* Overflow-checking arithmetic ops. */
1834   case IR_ADDOV: asm_arithov(as, ir); break;
1835   case IR_SUBOV: asm_arithov(as, ir); break;
1836   case IR_MULOV: asm_mulov(as, ir); break;
1837 
1838   /* Memory references. */
1839   case IR_AREF: asm_aref(as, ir); break;
1840   case IR_HREF: asm_href(as, ir); break;
1841   case IR_HREFK: asm_hrefk(as, ir); break;
1842   case IR_NEWREF: asm_newref(as, ir); break;
1843   case IR_UREFO: case IR_UREFC: asm_uref(as, ir); break;
1844   case IR_FREF: asm_fref(as, ir); break;
1845   case IR_STRREF: asm_strref(as, ir); break;
1846 
1847   /* Loads and stores. */
1848   case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
1849     asm_ahuvload(as, ir);
1850     break;
1851   case IR_FLOAD: asm_fload(as, ir); break;
1852   case IR_XLOAD: asm_xload(as, ir); break;
1853   case IR_SLOAD: asm_sload(as, ir); break;
1854 
1855   case IR_ASTORE: case IR_HSTORE: case IR_USTORE: asm_ahustore(as, ir); break;
1856   case IR_FSTORE: asm_fstore(as, ir); break;
1857   case IR_XSTORE: asm_xstore(as, ir, 0); break;
1858 
1859   /* Allocations. */
1860   case IR_SNEW: case IR_XSNEW: asm_snew(as, ir); break;
1861   case IR_TNEW: asm_tnew(as, ir); break;
1862   case IR_TDUP: asm_tdup(as, ir); break;
1863   case IR_CNEW: case IR_CNEWI: asm_cnew(as, ir); break;
1864 
1865   /* Write barriers. */
1866   case IR_TBAR: asm_tbar(as, ir); break;
1867   case IR_OBAR: asm_obar(as, ir); break;
1868 
1869   /* Type conversions. */
1870   case IR_CONV: asm_conv(as, ir); break;
1871   case IR_TOBIT: asm_tobit(as, ir); break;
1872   case IR_TOSTR: asm_tostr(as, ir); break;
1873   case IR_STRTO: asm_strto(as, ir); break;
1874 
1875   /* Calls. */
1876   case IR_CALLN: case IR_CALLL: case IR_CALLS: asm_call(as, ir); break;
1877   case IR_CALLXS: asm_callx(as, ir); break;
1878   case IR_CARG: break;
1879 
1880   default:
1881     setintV(&as->J->errinfo, ir->o);
1882     lj_trace_err_info(as->J, LJ_TRERR_NYIIR);
1883     break;
1884   }
1885 }
1886 
1887 /* -- Trace setup --------------------------------------------------------- */
1888 
1889 /* Ensure there are enough stack slots for call arguments. */
1890 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1891 {
1892   IRRef args[CCI_NARGS_MAX*2];
1893   uint32_t i, nargs = (int)CCI_NARGS(ci);
1894   int nslots = 4, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1895   asm_collectargs(as, ir, ci, args);
1896   for (i = 0; i < nargs; i++) {
1897     if (args[i] && irt_isfp(IR(args[i])->t) &&
1898         nfpr > 0 && !(ci->flags & CCI_VARARG)) {
1899       nfpr--;
1900       ngpr -= irt_isnum(IR(args[i])->t) ? 2 : 1;
1901     } else if (args[i] && irt_isnum(IR(args[i])->t)) {
1902       nfpr = 0;
1903       ngpr = ngpr & ~1;
1904       if (ngpr > 0) ngpr -= 2; else nslots = (nslots+3) & ~1;
1905     } else {
1906       nfpr = 0;
1907       if (ngpr > 0) ngpr--; else nslots++;
1908     }
1909   }
1910   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
1911     as->evenspill = nslots;
1912   return irt_isfp(ir->t) ? REGSP_HINT(RID_FPRET) : REGSP_HINT(RID_RET);
1913 }
1914 
1915 static void asm_setup_target(ASMState *as)
1916 {
1917   asm_sparejump_setup(as);
1918   asm_exitstub_setup(as);
1919 }
1920 
1921 /* -- Trace patching ------------------------------------------------------ */
1922 
1923 /* Patch exit jumps of existing machine code to a new target. */
1924 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1925 {
1926   MCode *p = T->mcode;
1927   MCode *pe = (MCode *)((char *)p + T->szmcode);
1928   MCode *px = exitstub_trace_addr(T, exitno);
1929   MCode *cstart = NULL, *cstop = NULL;
1930   MCode *mcarea = lj_mcode_patch(J, p, 0);
1931   MCode exitload = MIPSI_LI | MIPSF_T(RID_TMP) | exitno;
1932   MCode tjump = MIPSI_J|(((uintptr_t)target>>2)&0x03ffffffu);
1933   for (p++; p < pe; p++) {
1934     if (*p == exitload) {  /* Look for load of exit number. */
1935       /* Look for exitstub branch. Yes, this covers all used branch variants. */
1936       if (((p[-1] ^ (px-p)) & 0xffffu) == 0 &&
1937           ((p[-1] & 0xf0000000u) == MIPSI_BEQ ||
1938            (p[-1] & 0xfc1e0000u) == MIPSI_BLTZ ||
1939            (p[-1] & 0xffe00000u) == MIPSI_BC1F)) {
1940         ptrdiff_t delta = target - p;
1941         if (((delta + 0x8000) >> 16) == 0) {  /* Patch in-range branch. */
1942         patchbranch:
1943           p[-1] = (p[-1] & 0xffff0000u) | (delta & 0xffffu);
1944           *p = MIPSI_NOP;  /* Replace the load of the exit number. */
1945           cstop = p;
1946           if (!cstart) cstart = p-1;
1947         } else {  /* Branch out of range. Use spare jump slot in mcarea. */
1948           int i;
1949           for (i = (int)(sizeof(MCLink)/sizeof(MCode));
1950                i < (int)(sizeof(MCLink)/sizeof(MCode)+MIPS_SPAREJUMP*2);
1951                i += 2) {
1952             if (mcarea[i] == tjump) {
1953               delta = mcarea+i - p;
1954               goto patchbranch;
1955             } else if (mcarea[i] == MIPSI_NOP) {
1956               mcarea[i] = tjump;
1957               cstart = mcarea+i;
1958               delta = mcarea+i - p;
1959               goto patchbranch;
1960             }
1961           }
1962           /* Ignore jump slot overflow. Child trace is simply not attached. */
1963         }
1964       } else if (p+1 == pe) {
1965         /* Patch NOP after code for inverted loop branch. Use of J is ok. */
1966         lua_assert(p[1] == MIPSI_NOP);
1967         p[1] = tjump;
1968         *p = MIPSI_NOP;  /* Replace the load of the exit number. */
1969         cstop = p+2;
1970         if (!cstart) cstart = p+1;
1971       }
1972     }
1973   }
1974   if (cstart) lj_mcode_sync(cstart, cstop);
1975   lj_mcode_patch(J, mcarea, 1);
1976 }
1977 

/* [<][>][^][v][top][bottom][index][help] */