root/lj_asm_arm64.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. ra_hintalloc
  2. ra_alloc2
  3. asm_exitstub_setup
  4. asm_exitstub_addr
  5. asm_guardcc
  6. asm_guardtnb
  7. asm_guardcnb
  8. asm_isk32
  9. noconflict
  10. asm_fuseabase
  11. asm_fuseahuref
  12. asm_fuseopm
  13. asm_fusexref
  14. asm_fusemadd
  15. asm_fuseandshift
  16. asm_fuseorshift
  17. asm_gencall
  18. asm_setupresult
  19. asm_callx
  20. asm_retf
  21. asm_tointg
  22. asm_tobit
  23. asm_conv
  24. asm_strto
  25. asm_tvstore64
  26. asm_tvptr
  27. asm_aref
  28. asm_href
  29. asm_hrefk
  30. asm_uref
  31. asm_fref
  32. asm_strref
  33. asm_fxloadins
  34. asm_fxstoreins
  35. asm_fload
  36. asm_fstore
  37. asm_xload
  38. asm_xstore
  39. asm_ahuvload
  40. asm_ahustore
  41. asm_sload
  42. asm_cnew
  43. asm_tbar
  44. asm_obar
  45. asm_fparith
  46. asm_fpunary
  47. asm_fpmath
  48. asm_swapops
  49. asm_intop
  50. asm_intop_s
  51. asm_intneg
  52. asm_intmul
  53. asm_add
  54. asm_sub
  55. asm_mul
  56. asm_div
  57. asm_pow
  58. asm_mod
  59. asm_neg
  60. asm_band
  61. asm_borbxor
  62. asm_bor
  63. asm_bnot
  64. asm_bswap
  65. asm_bitshift
  66. asm_intmin_max
  67. asm_fpmin_max
  68. asm_min_max
  69. asm_fpcomp
  70. asm_intcomp
  71. asm_comp
  72. asm_hiop
  73. asm_prof
  74. asm_stack_check
  75. asm_stack_restore
  76. asm_gc_check
  77. asm_loop_fixup
  78. asm_head_lreg
  79. asm_head_root_base
  80. asm_head_side_base
  81. asm_tail_fixup
  82. asm_tail_prep
  83. asm_setup_call_slots
  84. asm_setup_target
  85. asm_mcode_fixup
  86. lj_asm_patchexit

   1 /*
   2 ** ARM64 IR assembler (SSA IR -> machine code).
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 **
   5 ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
   6 ** Sponsored by Cisco Systems, Inc.
   7 */
   8 
   9 /* -- Register allocator extensions --------------------------------------- */
  10 
  11 /* Allocate a register with a hint. */
  12 static Reg ra_hintalloc(ASMState *as, IRRef ref, Reg hint, RegSet allow)
  13 {
  14   Reg r = IR(ref)->r;
  15   if (ra_noreg(r)) {
  16     if (!ra_hashint(r) && !iscrossref(as, ref))
  17       ra_sethint(IR(ref)->r, hint);  /* Propagate register hint. */
  18     r = ra_allocref(as, ref, allow);
  19   }
  20   ra_noweak(as, r);
  21   return r;
  22 }
  23 
  24 /* Allocate two source registers for three-operand instructions. */
  25 static Reg ra_alloc2(ASMState *as, IRIns *ir, RegSet allow)
  26 {
  27   IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
  28   Reg left = irl->r, right = irr->r;
  29   if (ra_hasreg(left)) {
  30     ra_noweak(as, left);
  31     if (ra_noreg(right))
  32       right = ra_allocref(as, ir->op2, rset_exclude(allow, left));
  33     else
  34       ra_noweak(as, right);
  35   } else if (ra_hasreg(right)) {
  36     ra_noweak(as, right);
  37     left = ra_allocref(as, ir->op1, rset_exclude(allow, right));
  38   } else if (ra_hashint(right)) {
  39     right = ra_allocref(as, ir->op2, allow);
  40     left = ra_alloc1(as, ir->op1, rset_exclude(allow, right));
  41   } else {
  42     left = ra_allocref(as, ir->op1, allow);
  43     right = ra_alloc1(as, ir->op2, rset_exclude(allow, left));
  44   }
  45   return left | (right << 8);
  46 }
  47 
  48 /* -- Guard handling ------------------------------------------------------ */
  49 
  50 /* Setup all needed exit stubs. */
  51 static void asm_exitstub_setup(ASMState *as, ExitNo nexits)
  52 {
  53   ExitNo i;
  54   MCode *mxp = as->mctop;
  55   if (mxp - (nexits + 3 + MCLIM_REDZONE) < as->mclim)
  56     asm_mclimit(as);
  57   /* 1: str lr,[sp]; bl ->vm_exit_handler; movz w0,traceno; bl <1; bl <1; ... */
  58   for (i = nexits-1; (int32_t)i >= 0; i--)
  59     *--mxp = A64I_LE(A64I_BL|((-3-i)&0x03ffffffu));
  60   *--mxp = A64I_LE(A64I_MOVZw|A64F_U16(as->T->traceno));
  61   mxp--;
  62   *mxp = A64I_LE(A64I_BL|(((MCode *)(void *)lj_vm_exit_handler-mxp)&0x03ffffffu));
  63   *--mxp = A64I_LE(A64I_STRx|A64F_D(RID_LR)|A64F_N(RID_SP));
  64   as->mctop = mxp;
  65 }
  66 
  67 static MCode *asm_exitstub_addr(ASMState *as, ExitNo exitno)
  68 {
  69   /* Keep this in-sync with exitstub_trace_addr(). */
  70   return as->mctop + exitno + 3;
  71 }
  72 
  73 /* Emit conditional branch to exit for guard. */
  74 static void asm_guardcc(ASMState *as, A64CC cc)
  75 {
  76   MCode *target = asm_exitstub_addr(as, as->snapno);
  77   MCode *p = as->mcp;
  78   if (LJ_UNLIKELY(p == as->invmcp)) {
  79     as->loopinv = 1;
  80     *p = A64I_B | ((target-p) & 0x03ffffffu);
  81     emit_cond_branch(as, cc^1, p-1);
  82     return;
  83   }
  84   emit_cond_branch(as, cc, target);
  85 }
  86 
  87 /* Emit test and branch instruction to exit for guard. */
  88 static void asm_guardtnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit)
  89 {
  90   MCode *target = asm_exitstub_addr(as, as->snapno);
  91   MCode *p = as->mcp;
  92   if (LJ_UNLIKELY(p == as->invmcp)) {
  93     as->loopinv = 1;
  94     *p = A64I_B | ((target-p) & 0x03ffffffu);
  95     emit_tnb(as, ai^0x01000000u, r, bit, p-1);
  96     return;
  97   }
  98   emit_tnb(as, ai, r, bit, target);
  99 }
 100 
 101 /* Emit compare and branch instruction to exit for guard. */
 102 static void asm_guardcnb(ASMState *as, A64Ins ai, Reg r)
 103 {
 104   MCode *target = asm_exitstub_addr(as, as->snapno);
 105   MCode *p = as->mcp;
 106   if (LJ_UNLIKELY(p == as->invmcp)) {
 107     as->loopinv = 1;
 108     *p = A64I_B | ((target-p) & 0x03ffffffu);
 109     emit_cnb(as, ai^0x01000000u, r, p-1);
 110     return;
 111   }
 112   emit_cnb(as, ai, r, target);
 113 }
 114 
 115 /* -- Operand fusion ------------------------------------------------------ */
 116 
 117 /* Limit linear search to this distance. Avoids O(n^2) behavior. */
 118 #define CONFLICT_SEARCH_LIM     31
 119 
 120 static int asm_isk32(ASMState *as, IRRef ref, int32_t *k)
 121 {
 122   if (irref_isk(ref)) {
 123     IRIns *ir = IR(ref);
 124     if (ir->o == IR_KNULL || !irt_is64(ir->t)) {
 125       *k = ir->i;
 126       return 1;
 127     } else if (checki32((int64_t)ir_k64(ir)->u64)) {
 128       *k = (int32_t)ir_k64(ir)->u64;
 129       return 1;
 130     }
 131   }
 132   return 0;
 133 }
 134 
 135 /* Check if there's no conflicting instruction between curins and ref. */
 136 static int noconflict(ASMState *as, IRRef ref, IROp conflict)
 137 {
 138   IRIns *ir = as->ir;
 139   IRRef i = as->curins;
 140   if (i > ref + CONFLICT_SEARCH_LIM)
 141     return 0;  /* Give up, ref is too far away. */
 142   while (--i > ref)
 143     if (ir[i].o == conflict)
 144       return 0;  /* Conflict found. */
 145   return 1;  /* Ok, no conflict. */
 146 }
 147 
 148 /* Fuse the array base of colocated arrays. */
 149 static int32_t asm_fuseabase(ASMState *as, IRRef ref)
 150 {
 151   IRIns *ir = IR(ref);
 152   if (ir->o == IR_TNEW && ir->op1 <= LJ_MAX_COLOSIZE &&
 153       !neverfuse(as) && noconflict(as, ref, IR_NEWREF))
 154     return (int32_t)sizeof(GCtab);
 155   return 0;
 156 }
 157 
 158 #define FUSE_REG        0x40000000
 159 
 160 /* Fuse array/hash/upvalue reference into register+offset operand. */
 161 static Reg asm_fuseahuref(ASMState *as, IRRef ref, int32_t *ofsp, RegSet allow,
 162                           A64Ins ins)
 163 {
 164   IRIns *ir = IR(ref);
 165   if (ra_noreg(ir->r)) {
 166     if (ir->o == IR_AREF) {
 167       if (mayfuse(as, ref)) {
 168         if (irref_isk(ir->op2)) {
 169           IRRef tab = IR(ir->op1)->op1;
 170           int32_t ofs = asm_fuseabase(as, tab);
 171           IRRef refa = ofs ? tab : ir->op1;
 172           ofs += 8*IR(ir->op2)->i;
 173           if (emit_checkofs(ins, ofs)) {
 174             *ofsp = ofs;
 175             return ra_alloc1(as, refa, allow);
 176           }
 177         } else {
 178           Reg base = ra_alloc1(as, ir->op1, allow);
 179           *ofsp = FUSE_REG|ra_alloc1(as, ir->op2, rset_exclude(allow, base));
 180           return base;
 181         }
 182       }
 183     } else if (ir->o == IR_HREFK) {
 184       if (mayfuse(as, ref)) {
 185         int32_t ofs = (int32_t)(IR(ir->op2)->op2 * sizeof(Node));
 186         if (emit_checkofs(ins, ofs)) {
 187           *ofsp = ofs;
 188           return ra_alloc1(as, ir->op1, allow);
 189         }
 190       }
 191     } else if (ir->o == IR_UREFC) {
 192       if (irref_isk(ir->op1)) {
 193         GCfunc *fn = ir_kfunc(IR(ir->op1));
 194         GCupval *uv = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv;
 195         int64_t ofs = glofs(as, &uv->tv);
 196         if (emit_checkofs(ins, ofs)) {
 197           *ofsp = (int32_t)ofs;
 198           return RID_GL;
 199         }
 200       }
 201     }
 202   }
 203   *ofsp = 0;
 204   return ra_alloc1(as, ref, allow);
 205 }
 206 
 207 /* Fuse m operand into arithmetic/logic instructions. */
 208 static uint32_t asm_fuseopm(ASMState *as, A64Ins ai, IRRef ref, RegSet allow)
 209 {
 210   IRIns *ir = IR(ref);
 211   if (ra_hasreg(ir->r)) {
 212     ra_noweak(as, ir->r);
 213     return A64F_M(ir->r);
 214   } else if (irref_isk(ref)) {
 215     uint32_t m;
 216     int64_t k = get_k64val(ir);
 217     if ((ai & 0x1f000000) == 0x0a000000)
 218       m = emit_isk13(k, irt_is64(ir->t));
 219     else
 220       m = emit_isk12(k);
 221     if (m)
 222       return m;
 223   } else if (mayfuse(as, ref)) {
 224     if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR && irref_isk(ir->op2)) ||
 225         (ir->o == IR_ADD && ir->op1 == ir->op2)) {
 226       A64Shift sh = ir->o == IR_BSHR ? A64SH_LSR :
 227                     ir->o == IR_BSAR ? A64SH_ASR : A64SH_LSL;
 228       int shift = ir->o == IR_ADD ? 1 :
 229                     (IR(ir->op2)->i & (irt_is64(ir->t) ? 63 : 31));
 230       IRIns *irl = IR(ir->op1);
 231       if (sh == A64SH_LSL &&
 232           irl->o == IR_CONV &&
 233           irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
 234           shift <= 4 &&
 235           canfuse(as, irl)) {
 236         Reg m = ra_alloc1(as, irl->op1, allow);
 237         return A64F_M(m) | A64F_EXSH(A64EX_SXTW, shift);
 238       } else {
 239         Reg m = ra_alloc1(as, ir->op1, allow);
 240         return A64F_M(m) | A64F_SH(sh, shift);
 241       }
 242     } else if (ir->o == IR_CONV &&
 243                ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)) {
 244       Reg m = ra_alloc1(as, ir->op1, allow);
 245       return A64F_M(m) | A64F_EX(A64EX_SXTW);
 246     }
 247   }
 248   return A64F_M(ra_allocref(as, ref, allow));
 249 }
 250 
 251 /* Fuse XLOAD/XSTORE reference into load/store operand. */
 252 static void asm_fusexref(ASMState *as, A64Ins ai, Reg rd, IRRef ref,
 253                          RegSet allow)
 254 {
 255   IRIns *ir = IR(ref);
 256   Reg base;
 257   int32_t ofs = 0;
 258   if (ra_noreg(ir->r) && canfuse(as, ir)) {
 259     if (ir->o == IR_ADD) {
 260       if (asm_isk32(as, ir->op2, &ofs) && emit_checkofs(ai, ofs)) {
 261         ref = ir->op1;
 262       } else {
 263         Reg rn, rm;
 264         IRRef lref = ir->op1, rref = ir->op2;
 265         IRIns *irl = IR(lref);
 266         if (mayfuse(as, irl->op1)) {
 267           unsigned int shift = 4;
 268           if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
 269             shift = (IR(irl->op2)->i & 63);
 270           } else if (irl->o == IR_ADD && irl->op1 == irl->op2) {
 271             shift = 1;
 272           }
 273           if ((ai >> 30) == shift) {
 274             lref = irl->op1;
 275             irl = IR(lref);
 276             ai |= A64I_LS_SH;
 277           }
 278         }
 279         if (irl->o == IR_CONV &&
 280             irl->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT) &&
 281             canfuse(as, irl)) {
 282           lref = irl->op1;
 283           ai |= A64I_LS_SXTWx;
 284         } else {
 285           ai |= A64I_LS_LSLx;
 286         }
 287         rm = ra_alloc1(as, lref, allow);
 288         rn = ra_alloc1(as, rref, rset_exclude(allow, rm));
 289         emit_dnm(as, (ai^A64I_LS_R), (rd & 31), rn, rm);
 290         return;
 291       }
 292     } else if (ir->o == IR_STRREF) {
 293       if (asm_isk32(as, ir->op2, &ofs)) {
 294         ref = ir->op1;
 295       } else if (asm_isk32(as, ir->op1, &ofs)) {
 296         ref = ir->op2;
 297       } else {
 298         Reg rn = ra_alloc1(as, ir->op1, allow);
 299         IRIns *irr = IR(ir->op2);
 300         uint32_t m;
 301         if (irr+1 == ir && !ra_used(irr) &&
 302             irr->o == IR_ADD && irref_isk(irr->op2)) {
 303           ofs = sizeof(GCstr) + IR(irr->op2)->i;
 304           if (emit_checkofs(ai, ofs)) {
 305             Reg rm = ra_alloc1(as, irr->op1, rset_exclude(allow, rn));
 306             m = A64F_M(rm) | A64F_EX(A64EX_SXTW);
 307             goto skipopm;
 308           }
 309         }
 310         m = asm_fuseopm(as, 0, ir->op2, rset_exclude(allow, rn));
 311         ofs = sizeof(GCstr);
 312       skipopm:
 313         emit_lso(as, ai, rd, rd, ofs);
 314         emit_dn(as, A64I_ADDx^m, rd, rn);
 315         return;
 316       }
 317       ofs += sizeof(GCstr);
 318       if (!emit_checkofs(ai, ofs)) {
 319         Reg rn = ra_alloc1(as, ref, allow);
 320         Reg rm = ra_allock(as, ofs, rset_exclude(allow, rn));
 321         emit_dnm(as, (ai^A64I_LS_R)|A64I_LS_UXTWx, rd, rn, rm);
 322         return;
 323       }
 324     }
 325   }
 326   base = ra_alloc1(as, ref, allow);
 327   emit_lso(as, ai, (rd & 31), base, ofs);
 328 }
 329 
 330 /* Fuse FP multiply-add/sub. */
 331 static int asm_fusemadd(ASMState *as, IRIns *ir, A64Ins ai, A64Ins air)
 332 {
 333   IRRef lref = ir->op1, rref = ir->op2;
 334   IRIns *irm;
 335   if (lref != rref &&
 336       ((mayfuse(as, lref) && (irm = IR(lref), irm->o == IR_MUL) &&
 337        ra_noreg(irm->r)) ||
 338        (mayfuse(as, rref) && (irm = IR(rref), irm->o == IR_MUL) &&
 339        (rref = lref, ai = air, ra_noreg(irm->r))))) {
 340     Reg dest = ra_dest(as, ir, RSET_FPR);
 341     Reg add = ra_hintalloc(as, rref, dest, RSET_FPR);
 342     Reg left = ra_alloc2(as, irm,
 343                          rset_exclude(rset_exclude(RSET_FPR, dest), add));
 344     Reg right = (left >> 8); left &= 255;
 345     emit_dnma(as, ai, (dest & 31), (left & 31), (right & 31), (add & 31));
 346     return 1;
 347   }
 348   return 0;
 349 }
 350 
 351 /* Fuse BAND + BSHL/BSHR into UBFM. */
 352 static int asm_fuseandshift(ASMState *as, IRIns *ir)
 353 {
 354   IRIns *irl = IR(ir->op1);
 355   lua_assert(ir->o == IR_BAND);
 356   if (canfuse(as, irl) && irref_isk(ir->op2)) {
 357     uint64_t mask = get_k64val(IR(ir->op2));
 358     if (irref_isk(irl->op2) && (irl->o == IR_BSHR || irl->o == IR_BSHL)) {
 359       int32_t shmask = irt_is64(irl->t) ? 63 : 31;
 360       int32_t shift = (IR(irl->op2)->i & shmask);
 361       int32_t imms = shift;
 362       if (irl->o == IR_BSHL) {
 363         mask >>= shift;
 364         shift = (shmask-shift+1) & shmask;
 365         imms = 0;
 366       }
 367       if (mask && !((mask+1) & mask)) {  /* Contiguous 1-bits at the bottom. */
 368         Reg dest = ra_dest(as, ir, RSET_GPR);
 369         Reg left = ra_alloc1(as, irl->op1, RSET_GPR);
 370         A64Ins ai = shmask == 63 ? A64I_UBFMx : A64I_UBFMw;
 371         imms += 63 - emit_clz64(mask);
 372         if (imms > shmask) imms = shmask;
 373         emit_dn(as, ai | A64F_IMMS(imms) | A64F_IMMR(shift), dest, left);
 374         return 1;
 375       }
 376     }
 377   }
 378   return 0;
 379 }
 380 
 381 /* Fuse BOR(BSHL, BSHR) into EXTR/ROR. */
 382 static int asm_fuseorshift(ASMState *as, IRIns *ir)
 383 {
 384   IRIns *irl = IR(ir->op1), *irr = IR(ir->op2);
 385   lua_assert(ir->o == IR_BOR);
 386   if (canfuse(as, irl) && canfuse(as, irr) &&
 387       ((irl->o == IR_BSHR && irr->o == IR_BSHL) ||
 388        (irl->o == IR_BSHL && irr->o == IR_BSHR))) {
 389     if (irref_isk(irl->op2) && irref_isk(irr->op2)) {
 390       IRRef lref = irl->op1, rref = irr->op1;
 391       uint32_t lshift = IR(irl->op2)->i, rshift = IR(irr->op2)->i;
 392       if (irl->o == IR_BSHR) {  /* BSHR needs to be the right operand. */
 393         uint32_t tmp2;
 394         IRRef tmp1 = lref; lref = rref; rref = tmp1;
 395         tmp2 = lshift; lshift = rshift; rshift = tmp2;
 396       }
 397       if (rshift + lshift == (irt_is64(ir->t) ? 64 : 32)) {
 398         A64Ins ai = irt_is64(ir->t) ? A64I_EXTRx : A64I_EXTRw;
 399         Reg dest = ra_dest(as, ir, RSET_GPR);
 400         Reg left = ra_alloc1(as, lref, RSET_GPR);
 401         Reg right = ra_alloc1(as, rref, rset_exclude(RSET_GPR, left));
 402         emit_dnm(as, ai | A64F_IMMS(rshift), dest, left, right);
 403         return 1;
 404       }
 405     }
 406   }
 407   return 0;
 408 }
 409 
 410 /* -- Calls --------------------------------------------------------------- */
 411 
 412 /* Generate a call to a C function. */
 413 static void asm_gencall(ASMState *as, const CCallInfo *ci, IRRef *args)
 414 {
 415   uint32_t n, nargs = CCI_XNARGS(ci);
 416   int32_t ofs = 0;
 417   Reg gpr, fpr = REGARG_FIRSTFPR;
 418   if ((void *)ci->func)
 419     emit_call(as, (void *)ci->func);
 420   for (gpr = REGARG_FIRSTGPR; gpr <= REGARG_LASTGPR; gpr++)
 421     as->cost[gpr] = REGCOST(~0u, ASMREF_L);
 422   gpr = REGARG_FIRSTGPR;
 423   for (n = 0; n < nargs; n++) { /* Setup args. */
 424     IRRef ref = args[n];
 425     IRIns *ir = IR(ref);
 426     if (ref) {
 427       if (irt_isfp(ir->t)) {
 428         if (fpr <= REGARG_LASTFPR) {
 429           lua_assert(rset_test(as->freeset, fpr)); /* Must have been evicted. */
 430           ra_leftov(as, fpr, ref);
 431           fpr++;
 432         } else {
 433           Reg r = ra_alloc1(as, ref, RSET_FPR);
 434           emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_isnum(ir->t)) ? 4 : 0));
 435           ofs += 8;
 436         }
 437       } else {
 438         if (gpr <= REGARG_LASTGPR) {
 439           lua_assert(rset_test(as->freeset, gpr)); /* Must have been evicted. */
 440           ra_leftov(as, gpr, ref);
 441           gpr++;
 442         } else {
 443           Reg r = ra_alloc1(as, ref, RSET_GPR);
 444           emit_spstore(as, ir, r, ofs + ((LJ_BE && !irt_is64(ir->t)) ? 4 : 0));
 445           ofs += 8;
 446         }
 447       }
 448     }
 449   }
 450 }
 451 
 452 /* Setup result reg/sp for call. Evict scratch regs. */
 453 static void asm_setupresult(ASMState *as, IRIns *ir, const CCallInfo *ci)
 454 {
 455   RegSet drop = RSET_SCRATCH;
 456   if (ra_hasreg(ir->r))
 457     rset_clear(drop, ir->r); /* Dest reg handled below. */
 458   ra_evictset(as, drop); /* Evictions must be performed first. */
 459   if (ra_used(ir)) {
 460     lua_assert(!irt_ispri(ir->t));
 461     if (irt_isfp(ir->t)) {
 462       if (ci->flags & CCI_CASTU64) {
 463         Reg dest = ra_dest(as, ir, RSET_FPR) & 31;
 464         emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D_R : A64I_FMOV_S_R,
 465                 dest, RID_RET);
 466       } else {
 467         ra_destreg(as, ir, RID_FPRET);
 468       }
 469     } else {
 470       ra_destreg(as, ir, RID_RET);
 471     }
 472   }
 473   UNUSED(ci);
 474 }
 475 
 476 static void asm_callx(ASMState *as, IRIns *ir)
 477 {
 478   IRRef args[CCI_NARGS_MAX*2];
 479   CCallInfo ci;
 480   IRRef func;
 481   IRIns *irf;
 482   ci.flags = asm_callx_flags(as, ir);
 483   asm_collectargs(as, ir, &ci, args);
 484   asm_setupresult(as, ir, &ci);
 485   func = ir->op2; irf = IR(func);
 486   if (irf->o == IR_CARG) { func = irf->op1; irf = IR(func); }
 487   if (irref_isk(func)) {  /* Call to constant address. */
 488     ci.func = (ASMFunction)(ir_k64(irf)->u64);
 489   } else {  /* Need a non-argument register for indirect calls. */
 490     Reg freg = ra_alloc1(as, func, RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
 491     emit_n(as, A64I_BLR, freg);
 492     ci.func = (ASMFunction)(void *)0;
 493   }
 494   asm_gencall(as, &ci, args);
 495 }
 496 
 497 /* -- Returns ------------------------------------------------------------- */
 498 
 499 /* Return to lower frame. Guard that it goes to the right spot. */
 500 static void asm_retf(ASMState *as, IRIns *ir)
 501 {
 502   Reg base = ra_alloc1(as, REF_BASE, RSET_GPR);
 503   void *pc = ir_kptr(IR(ir->op2));
 504   int32_t delta = 1+LJ_FR2+bc_a(*((const BCIns *)pc - 1));
 505   as->topslot -= (BCReg)delta;
 506   if ((int32_t)as->topslot < 0) as->topslot = 0;
 507   irt_setmark(IR(REF_BASE)->t);  /* Children must not coalesce with BASE reg. */
 508   /* Need to force a spill on REF_BASE now to update the stack slot. */
 509   emit_lso(as, A64I_STRx, base, RID_SP, ra_spill(as, IR(REF_BASE)));
 510   emit_setgl(as, base, jit_base);
 511   emit_addptr(as, base, -8*delta);
 512   asm_guardcc(as, CC_NE);
 513   emit_nm(as, A64I_CMPx, RID_TMP,
 514           ra_allock(as, i64ptr(pc), rset_exclude(RSET_GPR, base)));
 515   emit_lso(as, A64I_LDRx, RID_TMP, base, -8);
 516 }
 517 
 518 /* -- Type conversions ---------------------------------------------------- */
 519 
 520 static void asm_tointg(ASMState *as, IRIns *ir, Reg left)
 521 {
 522   Reg tmp = ra_scratch(as, rset_exclude(RSET_FPR, left));
 523   Reg dest = ra_dest(as, ir, RSET_GPR);
 524   asm_guardcc(as, CC_NE);
 525   emit_nm(as, A64I_FCMPd, (tmp & 31), (left & 31));
 526   emit_dn(as, A64I_FCVT_F64_S32, (tmp & 31), dest);
 527   emit_dn(as, A64I_FCVT_S32_F64, dest, (left & 31));
 528 }
 529 
 530 static void asm_tobit(ASMState *as, IRIns *ir)
 531 {
 532   RegSet allow = RSET_FPR;
 533   Reg left = ra_alloc1(as, ir->op1, allow);
 534   Reg right = ra_alloc1(as, ir->op2, rset_clear(allow, left));
 535   Reg tmp = ra_scratch(as, rset_clear(allow, right));
 536   Reg dest = ra_dest(as, ir, RSET_GPR);
 537   emit_dn(as, A64I_FMOV_R_S, dest, (tmp & 31));
 538   emit_dnm(as, A64I_FADDd, (tmp & 31), (left & 31), (right & 31));
 539 }
 540 
 541 static void asm_conv(ASMState *as, IRIns *ir)
 542 {
 543   IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
 544   int st64 = (st == IRT_I64 || st == IRT_U64 || st == IRT_P64);
 545   int stfp = (st == IRT_NUM || st == IRT_FLOAT);
 546   IRRef lref = ir->op1;
 547   lua_assert(irt_type(ir->t) != st);
 548   if (irt_isfp(ir->t)) {
 549     Reg dest = ra_dest(as, ir, RSET_FPR);
 550     if (stfp) {  /* FP to FP conversion. */
 551       emit_dn(as, st == IRT_NUM ? A64I_FCVT_F32_F64 : A64I_FCVT_F64_F32,
 552               (dest & 31), (ra_alloc1(as, lref, RSET_FPR) & 31));
 553     } else {  /* Integer to FP conversion. */
 554       Reg left = ra_alloc1(as, lref, RSET_GPR);
 555       A64Ins ai = irt_isfloat(ir->t) ?
 556         (((IRT_IS64 >> st) & 1) ?
 557          (st == IRT_I64 ? A64I_FCVT_F32_S64 : A64I_FCVT_F32_U64) :
 558          (st == IRT_INT ? A64I_FCVT_F32_S32 : A64I_FCVT_F32_U32)) :
 559         (((IRT_IS64 >> st) & 1) ?
 560          (st == IRT_I64 ? A64I_FCVT_F64_S64 : A64I_FCVT_F64_U64) :
 561          (st == IRT_INT ? A64I_FCVT_F64_S32 : A64I_FCVT_F64_U32));
 562       emit_dn(as, ai, (dest & 31), left);
 563     }
 564   } else if (stfp) {  /* FP to integer conversion. */
 565     if (irt_isguard(ir->t)) {
 566       /* Checked conversions are only supported from number to int. */
 567       lua_assert(irt_isint(ir->t) && st == IRT_NUM);
 568       asm_tointg(as, ir, ra_alloc1(as, lref, RSET_FPR));
 569     } else {
 570       Reg left = ra_alloc1(as, lref, RSET_FPR);
 571       Reg dest = ra_dest(as, ir, RSET_GPR);
 572       A64Ins ai = irt_is64(ir->t) ?
 573         (st == IRT_NUM ?
 574          (irt_isi64(ir->t) ? A64I_FCVT_S64_F64 : A64I_FCVT_U64_F64) :
 575          (irt_isi64(ir->t) ? A64I_FCVT_S64_F32 : A64I_FCVT_U64_F32)) :
 576         (st == IRT_NUM ?
 577          (irt_isint(ir->t) ? A64I_FCVT_S32_F64 : A64I_FCVT_U32_F64) :
 578          (irt_isint(ir->t) ? A64I_FCVT_S32_F32 : A64I_FCVT_U32_F32));
 579       emit_dn(as, ai, dest, (left & 31));
 580     }
 581   } else if (st >= IRT_I8 && st <= IRT_U16) { /* Extend to 32 bit integer. */
 582     Reg dest = ra_dest(as, ir, RSET_GPR);
 583     Reg left = ra_alloc1(as, lref, RSET_GPR);
 584     A64Ins ai = st == IRT_I8 ? A64I_SXTBw :
 585                 st == IRT_U8 ? A64I_UXTBw :
 586                 st == IRT_I16 ? A64I_SXTHw : A64I_UXTHw;
 587     lua_assert(irt_isint(ir->t) || irt_isu32(ir->t));
 588     emit_dn(as, ai, dest, left);
 589   } else {
 590     Reg dest = ra_dest(as, ir, RSET_GPR);
 591     if (irt_is64(ir->t)) {
 592       if (st64 || !(ir->op2 & IRCONV_SEXT)) {
 593         /* 64/64 bit no-op (cast) or 32 to 64 bit zero extension. */
 594         ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
 595       } else {  /* 32 to 64 bit sign extension. */
 596         Reg left = ra_alloc1(as, lref, RSET_GPR);
 597         emit_dn(as, A64I_SXTW, dest, left);
 598       }
 599     } else {
 600       if (st64) {
 601         /* This is either a 32 bit reg/reg mov which zeroes the hiword
 602         ** or a load of the loword from a 64 bit address.
 603         */
 604         Reg left = ra_alloc1(as, lref, RSET_GPR);
 605         emit_dm(as, A64I_MOVw, dest, left);
 606       } else {  /* 32/32 bit no-op (cast). */
 607         ra_leftov(as, dest, lref);  /* Do nothing, but may need to move regs. */
 608       }
 609     }
 610   }
 611 }
 612 
 613 static void asm_strto(ASMState *as, IRIns *ir)
 614 {
 615   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_strscan_num];
 616   IRRef args[2];
 617   Reg dest = 0, tmp;
 618   int destused = ra_used(ir);
 619   int32_t ofs = 0;
 620   ra_evictset(as, RSET_SCRATCH);
 621   if (destused) {
 622     if (ra_hasspill(ir->s)) {
 623       ofs = sps_scale(ir->s);
 624       destused = 0;
 625       if (ra_hasreg(ir->r)) {
 626         ra_free(as, ir->r);
 627         ra_modified(as, ir->r);
 628         emit_spload(as, ir, ir->r, ofs);
 629       }
 630     } else {
 631       dest = ra_dest(as, ir, RSET_FPR);
 632     }
 633   }
 634   if (destused)
 635     emit_lso(as, A64I_LDRd, (dest & 31), RID_SP, 0);
 636   asm_guardcnb(as, A64I_CBZ, RID_RET);
 637   args[0] = ir->op1; /* GCstr *str */
 638   args[1] = ASMREF_TMP1; /* TValue *n  */
 639   asm_gencall(as, ci, args);
 640   tmp = ra_releasetmp(as, ASMREF_TMP1);
 641   emit_opk(as, A64I_ADDx, tmp, RID_SP, ofs, RSET_GPR);
 642 }
 643 
 644 /* -- Memory references --------------------------------------------------- */
 645 
 646 /* Store tagged value for ref at base+ofs. */
 647 static void asm_tvstore64(ASMState *as, Reg base, int32_t ofs, IRRef ref)
 648 {
 649   RegSet allow = rset_exclude(RSET_GPR, base);
 650   IRIns *ir = IR(ref);
 651   lua_assert(irt_ispri(ir->t) || irt_isaddr(ir->t) || irt_isinteger(ir->t));
 652   if (irref_isk(ref)) {
 653     TValue k;
 654     lj_ir_kvalue(as->J->L, &k, ir);
 655     emit_lso(as, A64I_STRx, ra_allock(as, k.u64, allow), base, ofs);
 656   } else {
 657     Reg src = ra_alloc1(as, ref, allow);
 658     rset_clear(allow, src);
 659     if (irt_isinteger(ir->t)) {
 660       Reg type = ra_allock(as, (int64_t)irt_toitype(ir->t) << 47, allow);
 661       emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
 662       emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), RID_TMP, type, src);
 663     } else {
 664       Reg type = ra_allock(as, (int32_t)irt_toitype(ir->t), allow);
 665       emit_lso(as, A64I_STRx, RID_TMP, base, ofs);
 666       emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), RID_TMP, src, type);
 667     }
 668   }
 669 }
 670 
 671 /* Get pointer to TValue. */
 672 static void asm_tvptr(ASMState *as, Reg dest, IRRef ref)
 673 {
 674   IRIns *ir = IR(ref);
 675   if (irt_isnum(ir->t)) {
 676     if (irref_isk(ref)) {
 677       /* Use the number constant itself as a TValue. */
 678       ra_allockreg(as, i64ptr(ir_knum(ir)), dest);
 679     } else {
 680       /* Otherwise force a spill and use the spill slot. */
 681       emit_opk(as, A64I_ADDx, dest, RID_SP, ra_spill(as, ir), RSET_GPR);
 682     }
 683   } else {
 684     /* Otherwise use g->tmptv to hold the TValue. */
 685     asm_tvstore64(as, dest, 0, ref);
 686     ra_allockreg(as, i64ptr(&J2G(as->J)->tmptv), dest);
 687   }
 688 }
 689 
 690 static void asm_aref(ASMState *as, IRIns *ir)
 691 {
 692   Reg dest = ra_dest(as, ir, RSET_GPR);
 693   Reg idx, base;
 694   if (irref_isk(ir->op2)) {
 695     IRRef tab = IR(ir->op1)->op1;
 696     int32_t ofs = asm_fuseabase(as, tab);
 697     IRRef refa = ofs ? tab : ir->op1;
 698     uint32_t k = emit_isk12(ofs + 8*IR(ir->op2)->i);
 699     if (k) {
 700       base = ra_alloc1(as, refa, RSET_GPR);
 701       emit_dn(as, A64I_ADDx^k, dest, base);
 702       return;
 703     }
 704   }
 705   base = ra_alloc1(as, ir->op1, RSET_GPR);
 706   idx = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, base));
 707   emit_dnm(as, A64I_ADDx | A64F_EXSH(A64EX_UXTW, 3), dest, base, idx);
 708 }
 709 
 710 /* Inlined hash lookup. Specialized for key type and for const keys.
 711 ** The equivalent C code is:
 712 **   Node *n = hashkey(t, key);
 713 **   do {
 714 **     if (lj_obj_equal(&n->key, key)) return &n->val;
 715 **   } while ((n = nextnode(n)));
 716 **   return niltv(L);
 717 */
 718 static void asm_href(ASMState *as, IRIns *ir, IROp merge)
 719 {
 720   RegSet allow = RSET_GPR;
 721   int destused = ra_used(ir);
 722   Reg dest = ra_dest(as, ir, allow);
 723   Reg tab = ra_alloc1(as, ir->op1, rset_clear(allow, dest));
 724   Reg key = 0, tmp = RID_TMP;
 725   IRRef refkey = ir->op2;
 726   IRIns *irkey = IR(refkey);
 727   int isk = irref_isk(ir->op2);
 728   IRType1 kt = irkey->t;
 729   uint32_t k = 0;
 730   uint32_t khash;
 731   MCLabel l_end, l_loop, l_next;
 732   rset_clear(allow, tab);
 733 
 734   if (!isk) {
 735     key = ra_alloc1(as, ir->op2, irt_isnum(kt) ? RSET_FPR : allow);
 736     rset_clear(allow, key);
 737     if (!irt_isstr(kt)) {
 738       tmp = ra_scratch(as, allow);
 739       rset_clear(allow, tmp);
 740     }
 741   } else if (irt_isnum(kt)) {
 742     int64_t val = (int64_t)ir_knum(irkey)->u64;
 743     if (!(k = emit_isk12(val))) {
 744       key = ra_allock(as, val, allow);
 745       rset_clear(allow, key);
 746     }
 747   } else if (!irt_ispri(kt)) {
 748     if (!(k = emit_isk12(irkey->i))) {
 749       key = ra_alloc1(as, refkey, allow);
 750       rset_clear(allow, key);
 751     }
 752   }
 753 
 754   /* Key not found in chain: jump to exit (if merged) or load niltv. */
 755   l_end = emit_label(as);
 756   as->invmcp = NULL;
 757   if (merge == IR_NE)
 758     asm_guardcc(as, CC_AL);
 759   else if (destused)
 760     emit_loada(as, dest, niltvg(J2G(as->J)));
 761 
 762   /* Follow hash chain until the end. */
 763   l_loop = --as->mcp;
 764   emit_n(as, A64I_CMPx^A64I_K12^0, dest);
 765   emit_lso(as, A64I_LDRx, dest, dest, offsetof(Node, next));
 766   l_next = emit_label(as);
 767 
 768   /* Type and value comparison. */
 769   if (merge == IR_EQ)
 770     asm_guardcc(as, CC_EQ);
 771   else
 772     emit_cond_branch(as, CC_EQ, l_end);
 773 
 774   if (irt_isnum(kt)) {
 775     if (isk) {
 776       /* Assumes -0.0 is already canonicalized to +0.0. */
 777       if (k)
 778         emit_n(as, A64I_CMPx^k, tmp);
 779       else
 780         emit_nm(as, A64I_CMPx, key, tmp);
 781       emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
 782     } else {
 783       Reg tisnum = ra_allock(as, LJ_TISNUM << 15, allow);
 784       Reg ftmp = ra_scratch(as, rset_exclude(RSET_FPR, key));
 785       rset_clear(allow, tisnum);
 786       emit_nm(as, A64I_FCMPd, key, ftmp);
 787       emit_dn(as, A64I_FMOV_D_R, (ftmp & 31), (tmp & 31));
 788       emit_cond_branch(as, CC_LO, l_next);
 789       emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32), tisnum, tmp);
 790       emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.n));
 791     }
 792   } else if (irt_isaddr(kt)) {
 793     Reg scr;
 794     if (isk) {
 795       int64_t kk = ((int64_t)irt_toitype(irkey->t) << 47) | irkey[1].tv.u64;
 796       scr = ra_allock(as, kk, allow);
 797       emit_nm(as, A64I_CMPx, scr, tmp);
 798       emit_lso(as, A64I_LDRx, tmp, dest, offsetof(Node, key.u64));
 799     } else {
 800       scr = ra_scratch(as, allow);
 801       emit_nm(as, A64I_CMPx, tmp, scr);
 802       emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key.u64));
 803     }
 804     rset_clear(allow, scr);
 805   } else {
 806     Reg type, scr;
 807     lua_assert(irt_ispri(kt) && !irt_isnil(kt));
 808     type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t) << 47), allow);
 809     scr = ra_scratch(as, rset_clear(allow, type));
 810     rset_clear(allow, scr);
 811     emit_nm(as, A64I_CMPw, scr, type);
 812     emit_lso(as, A64I_LDRx, scr, dest, offsetof(Node, key));
 813   }
 814 
 815   *l_loop = A64I_BCC | A64F_S19(as->mcp - l_loop) | CC_NE;
 816   if (!isk && irt_isaddr(kt)) {
 817     Reg type = ra_allock(as, (int32_t)irt_toitype(kt), allow);
 818     emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, key, type);
 819     rset_clear(allow, type);
 820   }
 821   /* Load main position relative to tab->node into dest. */
 822   khash = isk ? ir_khash(irkey) : 1;
 823   if (khash == 0) {
 824     emit_lso(as, A64I_LDRx, dest, tab, offsetof(GCtab, node));
 825   } else {
 826     emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 3), dest, tmp, dest);
 827     emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 1), dest, dest, dest);
 828     emit_lso(as, A64I_LDRx, tmp, tab, offsetof(GCtab, node));
 829     if (isk) {
 830       Reg tmphash = ra_allock(as, khash, allow);
 831       emit_dnm(as, A64I_ANDw, dest, dest, tmphash);
 832       emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
 833     } else if (irt_isstr(kt)) {
 834       /* Fetch of str->hash is cheaper than ra_allock. */
 835       emit_dnm(as, A64I_ANDw, dest, dest, tmp);
 836       emit_lso(as, A64I_LDRw, tmp, key, offsetof(GCstr, hash));
 837       emit_lso(as, A64I_LDRw, dest, tab, offsetof(GCtab, hmask));
 838     } else {  /* Must match with hash*() in lj_tab.c. */
 839       emit_dnm(as, A64I_ANDw, dest, dest, tmp);
 840       emit_lso(as, A64I_LDRw, tmp, tab, offsetof(GCtab, hmask));
 841       emit_dnm(as, A64I_SUBw, dest, dest, tmp);
 842       emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT3)), tmp, tmp, tmp);
 843       emit_dnm(as, A64I_EORw, dest, dest, tmp);
 844       emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT2)), dest, dest, dest);
 845       emit_dnm(as, A64I_SUBw, tmp, tmp, dest);
 846       emit_dnm(as, A64I_EXTRw | (A64F_IMMS(32-HASH_ROT1)), dest, dest, dest);
 847       emit_dnm(as, A64I_EORw, tmp, tmp, dest);
 848       if (irt_isnum(kt)) {
 849         emit_dnm(as, A64I_ADDw, dest, dest, dest);
 850         emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
 851         emit_dm(as, A64I_MOVw, tmp, dest);
 852         emit_dn(as, A64I_FMOV_R_D, dest, (key & 31));
 853       } else {
 854         checkmclim(as);
 855         emit_dm(as, A64I_MOVw, tmp, key);
 856         emit_dnm(as, A64I_EORw, dest, dest,
 857                  ra_allock(as, irt_toitype(kt) << 15, allow));
 858         emit_dn(as, A64I_LSRx | A64F_IMMR(32)|A64F_IMMS(32), dest, dest);
 859         emit_dm(as, A64I_MOVx, dest, key);
 860       }
 861     }
 862   }
 863 }
 864 
 865 static void asm_hrefk(ASMState *as, IRIns *ir)
 866 {
 867   IRIns *kslot = IR(ir->op2);
 868   IRIns *irkey = IR(kslot->op1);
 869   int32_t ofs = (int32_t)(kslot->op2 * sizeof(Node));
 870   int32_t kofs = ofs + (int32_t)offsetof(Node, key);
 871   int bigofs = !emit_checkofs(A64I_LDRx, ofs);
 872   Reg dest = (ra_used(ir) || bigofs) ? ra_dest(as, ir, RSET_GPR) : RID_NONE;
 873   Reg node = ra_alloc1(as, ir->op1, RSET_GPR);
 874   Reg key, idx = node;
 875   RegSet allow = rset_exclude(RSET_GPR, node);
 876   uint64_t k;
 877   lua_assert(ofs % sizeof(Node) == 0);
 878   if (bigofs) {
 879     idx = dest;
 880     rset_clear(allow, dest);
 881     kofs = (int32_t)offsetof(Node, key);
 882   } else if (ra_hasreg(dest)) {
 883     emit_opk(as, A64I_ADDx, dest, node, ofs, allow);
 884   }
 885   asm_guardcc(as, CC_NE);
 886   if (irt_ispri(irkey->t)) {
 887     k = ~((int64_t)~irt_toitype(irkey->t) << 47);
 888   } else if (irt_isnum(irkey->t)) {
 889     k = ir_knum(irkey)->u64;
 890   } else {
 891     k = ((uint64_t)irt_toitype(irkey->t) << 47) | (uint64_t)ir_kgc(irkey);
 892   }
 893   key = ra_scratch(as, allow);
 894   emit_nm(as, A64I_CMPx, key, ra_allock(as, k, rset_exclude(allow, key)));
 895   emit_lso(as, A64I_LDRx, key, idx, kofs);
 896   if (bigofs)
 897     emit_opk(as, A64I_ADDx, dest, node, ofs, RSET_GPR);
 898 }
 899 
 900 static void asm_uref(ASMState *as, IRIns *ir)
 901 {
 902   Reg dest = ra_dest(as, ir, RSET_GPR);
 903   if (irref_isk(ir->op1)) {
 904     GCfunc *fn = ir_kfunc(IR(ir->op1));
 905     MRef *v = &gcref(fn->l.uvptr[(ir->op2 >> 8)])->uv.v;
 906     emit_lsptr(as, A64I_LDRx, dest, v);
 907   } else {
 908     Reg uv = ra_scratch(as, RSET_GPR);
 909     Reg func = ra_alloc1(as, ir->op1, RSET_GPR);
 910     if (ir->o == IR_UREFC) {
 911       asm_guardcc(as, CC_NE);
 912       emit_n(as, (A64I_CMPx^A64I_K12) | A64F_U12(1), RID_TMP);
 913       emit_opk(as, A64I_ADDx, dest, uv,
 914                (int32_t)offsetof(GCupval, tv), RSET_GPR);
 915       emit_lso(as, A64I_LDRB, RID_TMP, uv, (int32_t)offsetof(GCupval, closed));
 916     } else {
 917       emit_lso(as, A64I_LDRx, dest, uv, (int32_t)offsetof(GCupval, v));
 918     }
 919     emit_lso(as, A64I_LDRx, uv, func,
 920              (int32_t)offsetof(GCfuncL, uvptr) + 8*(int32_t)(ir->op2 >> 8));
 921   }
 922 }
 923 
 924 static void asm_fref(ASMState *as, IRIns *ir)
 925 {
 926   UNUSED(as); UNUSED(ir);
 927   lua_assert(!ra_used(ir));
 928 }
 929 
 930 static void asm_strref(ASMState *as, IRIns *ir)
 931 {
 932   RegSet allow = RSET_GPR;
 933   Reg dest = ra_dest(as, ir, allow);
 934   Reg base = ra_alloc1(as, ir->op1, allow);
 935   IRIns *irr = IR(ir->op2);
 936   int32_t ofs = sizeof(GCstr);
 937   uint32_t m;
 938   rset_clear(allow, base);
 939   if (irref_isk(ir->op2) && (m = emit_isk12(ofs + irr->i))) {
 940     emit_dn(as, A64I_ADDx^m, dest, base);
 941   } else {
 942     emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, dest);
 943     emit_dnm(as, A64I_ADDx, dest, base, ra_alloc1(as, ir->op2, allow));
 944   }
 945 }
 946 
 947 /* -- Loads and stores ---------------------------------------------------- */
 948 
 949 static A64Ins asm_fxloadins(IRIns *ir)
 950 {
 951   switch (irt_type(ir->t)) {
 952   case IRT_I8: return A64I_LDRB ^ A64I_LS_S;
 953   case IRT_U8: return A64I_LDRB;
 954   case IRT_I16: return A64I_LDRH ^ A64I_LS_S;
 955   case IRT_U16: return A64I_LDRH;
 956   case IRT_NUM: return A64I_LDRd;
 957   case IRT_FLOAT: return A64I_LDRs;
 958   default: return irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw;
 959   }
 960 }
 961 
 962 static A64Ins asm_fxstoreins(IRIns *ir)
 963 {
 964   switch (irt_type(ir->t)) {
 965   case IRT_I8: case IRT_U8: return A64I_STRB;
 966   case IRT_I16: case IRT_U16: return A64I_STRH;
 967   case IRT_NUM: return A64I_STRd;
 968   case IRT_FLOAT: return A64I_STRs;
 969   default: return irt_is64(ir->t) ? A64I_STRx : A64I_STRw;
 970   }
 971 }
 972 
 973 static void asm_fload(ASMState *as, IRIns *ir)
 974 {
 975   Reg dest = ra_dest(as, ir, RSET_GPR);
 976   Reg idx;
 977   A64Ins ai = asm_fxloadins(ir);
 978   int32_t ofs;
 979   if (ir->op1 == REF_NIL) {
 980     idx = RID_GL;
 981     ofs = (ir->op2 << 2) - GG_OFS(g);
 982   } else {
 983     idx = ra_alloc1(as, ir->op1, RSET_GPR);
 984     if (ir->op2 == IRFL_TAB_ARRAY) {
 985       ofs = asm_fuseabase(as, ir->op1);
 986       if (ofs) {  /* Turn the t->array load into an add for colocated arrays. */
 987         emit_dn(as, (A64I_ADDx^A64I_K12) | A64F_U12(ofs), dest, idx);
 988         return;
 989       }
 990     }
 991     ofs = field_ofs[ir->op2];
 992   }
 993   emit_lso(as, ai, (dest & 31), idx, ofs);
 994 }
 995 
 996 static void asm_fstore(ASMState *as, IRIns *ir)
 997 {
 998   if (ir->r != RID_SINK) {
 999     Reg src = ra_alloc1(as, ir->op2, RSET_GPR);
1000     IRIns *irf = IR(ir->op1);
1001     Reg idx = ra_alloc1(as, irf->op1, rset_exclude(RSET_GPR, src));
1002     int32_t ofs = field_ofs[irf->op2];
1003     emit_lso(as, asm_fxstoreins(ir), (src & 31), idx, ofs);
1004   }
1005 }
1006 
1007 static void asm_xload(ASMState *as, IRIns *ir)
1008 {
1009   Reg dest = ra_dest(as, ir, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1010   lua_assert(!(ir->op2 & IRXLOAD_UNALIGNED));
1011   asm_fusexref(as, asm_fxloadins(ir), dest, ir->op1, RSET_GPR);
1012 }
1013 
1014 static void asm_xstore(ASMState *as, IRIns *ir)
1015 {
1016   if (ir->r != RID_SINK) {
1017     Reg src = ra_alloc1(as, ir->op2, irt_isfp(ir->t) ? RSET_FPR : RSET_GPR);
1018     asm_fusexref(as, asm_fxstoreins(ir), src, ir->op1,
1019                  rset_exclude(RSET_GPR, src));
1020   }
1021 }
1022 
1023 static void asm_ahuvload(ASMState *as, IRIns *ir)
1024 {
1025   Reg idx, tmp, type;
1026   int32_t ofs = 0;
1027   RegSet gpr = RSET_GPR, allow = irt_isnum(ir->t) ? RSET_FPR : RSET_GPR;
1028   lua_assert(irt_isnum(ir->t) || irt_ispri(ir->t) || irt_isaddr(ir->t) ||
1029              irt_isint(ir->t));
1030   if (ra_used(ir)) {
1031     Reg dest = ra_dest(as, ir, allow);
1032     tmp = irt_isnum(ir->t) ? ra_scratch(as, rset_clear(gpr, dest)) : dest;
1033     if (irt_isaddr(ir->t)) {
1034       emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1035     } else if (irt_isnum(ir->t)) {
1036       emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1037     } else if (irt_isint(ir->t)) {
1038       emit_dm(as, A64I_MOVw, dest, dest);
1039     }
1040   } else {
1041     tmp = ra_scratch(as, gpr);
1042   }
1043   type = ra_scratch(as, rset_clear(gpr, tmp));
1044   idx = asm_fuseahuref(as, ir->op1, &ofs, rset_clear(gpr, type), A64I_LDRx);
1045   /* Always do the type check, even if the load result is unused. */
1046   asm_guardcc(as, irt_isnum(ir->t) ? CC_LS : CC_NE);
1047   if (irt_type(ir->t) >= IRT_NUM) {
1048     lua_assert(irt_isinteger(ir->t) || irt_isnum(ir->t));
1049     emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1050             ra_allock(as, LJ_TISNUM << 15, rset_exclude(gpr, idx)), tmp);
1051   } else if (irt_isaddr(ir->t)) {
1052     emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(ir->t)), type);
1053     emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
1054   } else if (irt_isnil(ir->t)) {
1055     emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1056   } else {
1057     emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1058             ra_allock(as, (irt_toitype(ir->t) << 15) | 0x7fff, allow), tmp);
1059   }
1060   if (ofs & FUSE_REG)
1061     emit_dnm(as, (A64I_LDRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1062   else
1063     emit_lso(as, A64I_LDRx, tmp, idx, ofs);
1064 }
1065 
1066 static void asm_ahustore(ASMState *as, IRIns *ir)
1067 {
1068   if (ir->r != RID_SINK) {
1069     RegSet allow = RSET_GPR;
1070     Reg idx, src = RID_NONE, tmp = RID_TMP, type = RID_NONE;
1071     int32_t ofs = 0;
1072     if (irt_isnum(ir->t)) {
1073       src = ra_alloc1(as, ir->op2, RSET_FPR);
1074       idx = asm_fuseahuref(as, ir->op1, &ofs, allow, A64I_STRd);
1075       if (ofs & FUSE_REG)
1076         emit_dnm(as, (A64I_STRd^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, (src & 31), idx, (ofs &31));
1077       else
1078         emit_lso(as, A64I_STRd, (src & 31), idx, ofs);
1079     } else {
1080       if (!irt_ispri(ir->t)) {
1081         src = ra_alloc1(as, ir->op2, allow);
1082         rset_clear(allow, src);
1083         if (irt_isinteger(ir->t))
1084           type = ra_allock(as, (uint64_t)(int32_t)LJ_TISNUM << 47, allow);
1085         else
1086           type = ra_allock(as, irt_toitype(ir->t), allow);
1087       } else {
1088         tmp = type = ra_allock(as, ~((int64_t)~irt_toitype(ir->t)<<47), allow);
1089       }
1090       idx = asm_fuseahuref(as, ir->op1, &ofs, rset_exclude(allow, type),
1091                            A64I_STRx);
1092       if (ofs & FUSE_REG)
1093         emit_dnm(as, (A64I_STRx^A64I_LS_R)|A64I_LS_UXTWx|A64I_LS_SH, tmp, idx, (ofs & 31));
1094       else
1095         emit_lso(as, A64I_STRx, tmp, idx, ofs);
1096       if (ra_hasreg(src)) {
1097         if (irt_isinteger(ir->t)) {
1098           emit_dnm(as, A64I_ADDx | A64F_EX(A64EX_UXTW), tmp, type, src);
1099         } else {
1100           emit_dnm(as, A64I_ADDx | A64F_SH(A64SH_LSL, 47), tmp, src, type);
1101         }
1102       }
1103     }
1104   }
1105 }
1106 
1107 static void asm_sload(ASMState *as, IRIns *ir)
1108 {
1109   int32_t ofs = 8*((int32_t)ir->op1-2);
1110   IRType1 t = ir->t;
1111   Reg dest = RID_NONE, base;
1112   RegSet allow = RSET_GPR;
1113   lua_assert(!(ir->op2 & IRSLOAD_PARENT));  /* Handled by asm_head_side(). */
1114   lua_assert(irt_isguard(t) || !(ir->op2 & IRSLOAD_TYPECHECK));
1115   if ((ir->op2 & IRSLOAD_CONVERT) && irt_isguard(t) && irt_isint(t)) {
1116     dest = ra_scratch(as, RSET_FPR);
1117     asm_tointg(as, ir, dest);
1118     t.irt = IRT_NUM;  /* Continue with a regular number type check. */
1119   } else if (ra_used(ir)) {
1120     Reg tmp = RID_NONE;
1121     if ((ir->op2 & IRSLOAD_CONVERT))
1122       tmp = ra_scratch(as, irt_isint(t) ? RSET_FPR : RSET_GPR);
1123     lua_assert((irt_isnum(t)) || irt_isint(t) || irt_isaddr(t));
1124     dest = ra_dest(as, ir, irt_isnum(t) ? RSET_FPR : allow);
1125     base = ra_alloc1(as, REF_BASE, rset_clear(allow, dest));
1126     if (irt_isaddr(t)) {
1127       emit_dn(as, A64I_ANDx^emit_isk13(LJ_GCVMASK, 1), dest, dest);
1128     } else if ((ir->op2 & IRSLOAD_CONVERT)) {
1129       if (irt_isint(t)) {
1130         emit_dn(as, A64I_FCVT_S32_F64, dest, (tmp & 31));
1131         /* If value is already loaded for type check, move it to FPR. */
1132         if ((ir->op2 & IRSLOAD_TYPECHECK))
1133           emit_dn(as, A64I_FMOV_D_R, (tmp & 31), dest);
1134         else
1135           dest = tmp;
1136         t.irt = IRT_NUM;  /* Check for original type. */
1137       } else {
1138         emit_dn(as, A64I_FCVT_F64_S32, (dest & 31), tmp);
1139         dest = tmp;
1140         t.irt = IRT_INT;  /* Check for original type. */
1141       }
1142     } else if (irt_isint(t) && (ir->op2 & IRSLOAD_TYPECHECK)) {
1143       emit_dm(as, A64I_MOVw, dest, dest);
1144     }
1145     goto dotypecheck;
1146   }
1147   base = ra_alloc1(as, REF_BASE, allow);
1148 dotypecheck:
1149   rset_clear(allow, base);
1150   if ((ir->op2 & IRSLOAD_TYPECHECK)) {
1151     Reg tmp;
1152     if (ra_hasreg(dest) && rset_test(RSET_GPR, dest)) {
1153       tmp = dest;
1154     } else {
1155       tmp = ra_scratch(as, allow);
1156       rset_clear(allow, tmp);
1157     }
1158     if (irt_isnum(t) && !(ir->op2 & IRSLOAD_CONVERT))
1159       emit_dn(as, A64I_FMOV_D_R, (dest & 31), tmp);
1160     /* Need type check, even if the load result is unused. */
1161     asm_guardcc(as, irt_isnum(t) ? CC_LS : CC_NE);
1162     if (irt_type(t) >= IRT_NUM) {
1163       lua_assert(irt_isinteger(t) || irt_isnum(t));
1164       emit_nm(as, A64I_CMPx | A64F_SH(A64SH_LSR, 32),
1165               ra_allock(as, LJ_TISNUM << 15, allow), tmp);
1166     } else if (irt_isnil(t)) {
1167       emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(1), tmp);
1168     } else if (irt_ispri(t)) {
1169       emit_nm(as, A64I_CMPx,
1170               ra_allock(as, ~((int64_t)~irt_toitype(t) << 47) , allow), tmp);
1171     } else {
1172       Reg type = ra_scratch(as, allow);
1173       emit_n(as, (A64I_CMNx^A64I_K12) | A64F_U12(-irt_toitype(t)), type);
1174       emit_dn(as, A64I_ASRx | A64F_IMMR(47), type, tmp);
1175     }
1176     emit_lso(as, A64I_LDRx, tmp, base, ofs);
1177     return;
1178   }
1179   if (ra_hasreg(dest)) {
1180     emit_lso(as, irt_isnum(t) ? A64I_LDRd :
1181              (irt_isint(t) ? A64I_LDRw : A64I_LDRx), (dest & 31), base,
1182              ofs ^ ((LJ_BE && irt_isint(t) ? 4 : 0)));
1183   }
1184 }
1185 
1186 /* -- Allocations --------------------------------------------------------- */
1187 
1188 #if LJ_HASFFI
1189 static void asm_cnew(ASMState *as, IRIns *ir)
1190 {
1191   CTState *cts = ctype_ctsG(J2G(as->J));
1192   CTypeID id = (CTypeID)IR(ir->op1)->i;
1193   CTSize sz;
1194   CTInfo info = lj_ctype_info(cts, id, &sz);
1195   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_mem_newgco];
1196   IRRef args[4];
1197   RegSet allow = (RSET_GPR & ~RSET_SCRATCH);
1198   lua_assert(sz != CTSIZE_INVALID || (ir->o == IR_CNEW && ir->op2 != REF_NIL));
1199 
1200   as->gcsteps++;
1201   asm_setupresult(as, ir, ci);  /* GCcdata * */
1202   /* Initialize immutable cdata object. */
1203   if (ir->o == IR_CNEWI) {
1204     int32_t ofs = sizeof(GCcdata);
1205     Reg r = ra_alloc1(as, ir->op2, allow);
1206     lua_assert(sz == 4 || sz == 8);
1207     emit_lso(as, sz == 8 ? A64I_STRx : A64I_STRw, r, RID_RET, ofs);
1208   } else if (ir->op2 != REF_NIL) {  /* Create VLA/VLS/aligned cdata. */
1209     ci = &lj_ir_callinfo[IRCALL_lj_cdata_newv];
1210     args[0] = ASMREF_L;     /* lua_State *L */
1211     args[1] = ir->op1;      /* CTypeID id   */
1212     args[2] = ir->op2;      /* CTSize sz    */
1213     args[3] = ASMREF_TMP1;  /* CTSize align */
1214     asm_gencall(as, ci, args);
1215     emit_loadi(as, ra_releasetmp(as, ASMREF_TMP1), (int32_t)ctype_align(info));
1216     return;
1217   }
1218 
1219   /* Initialize gct and ctypeid. lj_mem_newgco() already sets marked. */
1220   {
1221     Reg r = (id < 65536) ? RID_X1 : ra_allock(as, id, allow);
1222     emit_lso(as, A64I_STRB, RID_TMP, RID_RET, offsetof(GCcdata, gct));
1223     emit_lso(as, A64I_STRH, r, RID_RET, offsetof(GCcdata, ctypeid));
1224     emit_d(as, A64I_MOVZw | A64F_U16(~LJ_TCDATA), RID_TMP);
1225     if (id < 65536) emit_d(as, A64I_MOVZw | A64F_U16(id), RID_X1);
1226   }
1227   args[0] = ASMREF_L;     /* lua_State *L */
1228   args[1] = ASMREF_TMP1;  /* MSize size   */
1229   asm_gencall(as, ci, args);
1230   ra_allockreg(as, (int32_t)(sz+sizeof(GCcdata)),
1231                ra_releasetmp(as, ASMREF_TMP1));
1232 }
1233 #else
1234 #define asm_cnew(as, ir)        ((void)0)
1235 #endif
1236 
1237 /* -- Write barriers ------------------------------------------------------ */
1238 
1239 static void asm_tbar(ASMState *as, IRIns *ir)
1240 {
1241   Reg tab = ra_alloc1(as, ir->op1, RSET_GPR);
1242   Reg link = ra_scratch(as, rset_exclude(RSET_GPR, tab));
1243   Reg gr = ra_allock(as, i64ptr(J2G(as->J)),
1244                      rset_exclude(rset_exclude(RSET_GPR, tab), link));
1245   Reg mark = RID_TMP;
1246   MCLabel l_end = emit_label(as);
1247   emit_lso(as, A64I_STRx, link, tab, (int32_t)offsetof(GCtab, gclist));
1248   emit_lso(as, A64I_STRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1249   emit_lso(as, A64I_STRx, tab, gr,
1250            (int32_t)offsetof(global_State, gc.grayagain));
1251   emit_dn(as, A64I_ANDw^emit_isk13(~LJ_GC_BLACK, 0), mark, mark);
1252   emit_lso(as, A64I_LDRx, link, gr,
1253            (int32_t)offsetof(global_State, gc.grayagain));
1254   emit_cond_branch(as, CC_EQ, l_end);
1255   emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), mark);
1256   emit_lso(as, A64I_LDRB, mark, tab, (int32_t)offsetof(GCtab, marked));
1257 }
1258 
1259 static void asm_obar(ASMState *as, IRIns *ir)
1260 {
1261   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_barrieruv];
1262   IRRef args[2];
1263   MCLabel l_end;
1264   RegSet allow = RSET_GPR;
1265   Reg obj, val, tmp;
1266   /* No need for other object barriers (yet). */
1267   lua_assert(IR(ir->op1)->o == IR_UREFC);
1268   ra_evictset(as, RSET_SCRATCH);
1269   l_end = emit_label(as);
1270   args[0] = ASMREF_TMP1;  /* global_State *g */
1271   args[1] = ir->op1;      /* TValue *tv      */
1272   asm_gencall(as, ci, args);
1273   ra_allockreg(as, i64ptr(J2G(as->J)), ra_releasetmp(as, ASMREF_TMP1) );
1274   obj = IR(ir->op1)->r;
1275   tmp = ra_scratch(as, rset_exclude(allow, obj));
1276   emit_cond_branch(as, CC_EQ, l_end);
1277   emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_BLACK, 0), tmp);
1278   emit_cond_branch(as, CC_EQ, l_end);
1279   emit_n(as, A64I_TSTw^emit_isk13(LJ_GC_WHITES, 0), RID_TMP);
1280   val = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, obj));
1281   emit_lso(as, A64I_LDRB, tmp, obj,
1282      (int32_t)offsetof(GCupval, marked)-(int32_t)offsetof(GCupval, tv));
1283   emit_lso(as, A64I_LDRB, RID_TMP, val, (int32_t)offsetof(GChead, marked));
1284 }
1285 
1286 /* -- Arithmetic and logic operations ------------------------------------- */
1287 
1288 static void asm_fparith(ASMState *as, IRIns *ir, A64Ins ai)
1289 {
1290   Reg dest = ra_dest(as, ir, RSET_FPR);
1291   Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1292   right = (left >> 8); left &= 255;
1293   emit_dnm(as, ai, (dest & 31), (left & 31), (right & 31));
1294 }
1295 
1296 static void asm_fpunary(ASMState *as, IRIns *ir, A64Ins ai)
1297 {
1298   Reg dest = ra_dest(as, ir, RSET_FPR);
1299   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_FPR);
1300   emit_dn(as, ai, (dest & 31), (left & 31));
1301 }
1302 
1303 static void asm_fpmath(ASMState *as, IRIns *ir)
1304 {
1305   IRFPMathOp fpm = (IRFPMathOp)ir->op2;
1306   if (fpm == IRFPM_SQRT) {
1307     asm_fpunary(as, ir, A64I_FSQRTd);
1308   } else if (fpm <= IRFPM_TRUNC) {
1309     asm_fpunary(as, ir, fpm == IRFPM_FLOOR ? A64I_FRINTMd :
1310                         fpm == IRFPM_CEIL ? A64I_FRINTPd : A64I_FRINTZd);
1311   } else if (fpm == IRFPM_EXP2 && asm_fpjoin_pow(as, ir)) {
1312     return;
1313   } else {
1314     asm_callid(as, ir, IRCALL_lj_vm_floor + fpm);
1315   }
1316 }
1317 
1318 static int asm_swapops(ASMState *as, IRRef lref, IRRef rref)
1319 {
1320   IRIns *ir;
1321   if (irref_isk(rref))
1322     return 0;  /* Don't swap constants to the left. */
1323   if (irref_isk(lref))
1324     return 1;  /* But swap constants to the right. */
1325   ir = IR(rref);
1326   if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
1327       (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1328       (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1329     return 0;  /* Don't swap fusable operands to the left. */
1330   ir = IR(lref);
1331   if ((ir->o >= IR_BSHL && ir->o <= IR_BSAR) ||
1332       (ir->o == IR_ADD && ir->op1 == ir->op2) ||
1333       (ir->o == IR_CONV && ir->op2 == ((IRT_I64<<IRCONV_DSH)|IRT_INT|IRCONV_SEXT)))
1334     return 1;  /* But swap fusable operands to the right. */
1335   return 0;  /* Otherwise don't swap. */
1336 }
1337 
1338 static void asm_intop(ASMState *as, IRIns *ir, A64Ins ai)
1339 {
1340   IRRef lref = ir->op1, rref = ir->op2;
1341   Reg left, dest = ra_dest(as, ir, RSET_GPR);
1342   uint32_t m;
1343   if ((ai & ~A64I_S) != A64I_SUBw && asm_swapops(as, lref, rref)) {
1344     IRRef tmp = lref; lref = rref; rref = tmp;
1345   }
1346   left = ra_hintalloc(as, lref, dest, RSET_GPR);
1347   if (irt_is64(ir->t)) ai |= A64I_X;
1348   m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1349   if (irt_isguard(ir->t)) {  /* For IR_ADDOV etc. */
1350     asm_guardcc(as, CC_VS);
1351     ai |= A64I_S;
1352   }
1353   emit_dn(as, ai^m, dest, left);
1354 }
1355 
1356 static void asm_intop_s(ASMState *as, IRIns *ir, A64Ins ai)
1357 {
1358   if (as->flagmcp == as->mcp) {  /* Drop cmp r, #0. */
1359     as->flagmcp = NULL;
1360     as->mcp++;
1361     ai |= A64I_S;
1362   }
1363   asm_intop(as, ir, ai);
1364 }
1365 
1366 static void asm_intneg(ASMState *as, IRIns *ir)
1367 {
1368   Reg dest = ra_dest(as, ir, RSET_GPR);
1369   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1370   emit_dm(as, irt_is64(ir->t) ? A64I_NEGx : A64I_NEGw, dest, left);
1371 }
1372 
1373 /* NYI: use add/shift for MUL(OV) with constants. FOLD only does 2^k. */
1374 static void asm_intmul(ASMState *as, IRIns *ir)
1375 {
1376   Reg dest = ra_dest(as, ir, RSET_GPR);
1377   Reg left = ra_alloc1(as, ir->op1, rset_exclude(RSET_GPR, dest));
1378   Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1379   if (irt_isguard(ir->t)) {  /* IR_MULOV */
1380     asm_guardcc(as, CC_NE);
1381     emit_dm(as, A64I_MOVw, dest, dest);  /* Zero-extend. */
1382     emit_nm(as, A64I_CMPw | A64F_SH(A64SH_ASR, 31), RID_TMP, dest);
1383     emit_dn(as, A64I_ASRx | A64F_IMMR(32), RID_TMP, dest);
1384     emit_dnm(as, A64I_SMULL, dest, right, left);
1385   } else {
1386     emit_dnm(as, irt_is64(ir->t) ? A64I_MULx : A64I_MULw, dest, left, right);
1387   }
1388 }
1389 
1390 static void asm_add(ASMState *as, IRIns *ir)
1391 {
1392   if (irt_isnum(ir->t)) {
1393     if (!asm_fusemadd(as, ir, A64I_FMADDd, A64I_FMADDd))
1394       asm_fparith(as, ir, A64I_FADDd);
1395     return;
1396   }
1397   asm_intop_s(as, ir, A64I_ADDw);
1398 }
1399 
1400 static void asm_sub(ASMState *as, IRIns *ir)
1401 {
1402   if (irt_isnum(ir->t)) {
1403     if (!asm_fusemadd(as, ir, A64I_FNMSUBd, A64I_FMSUBd))
1404       asm_fparith(as, ir, A64I_FSUBd);
1405     return;
1406   }
1407   asm_intop_s(as, ir, A64I_SUBw);
1408 }
1409 
1410 static void asm_mul(ASMState *as, IRIns *ir)
1411 {
1412   if (irt_isnum(ir->t)) {
1413     asm_fparith(as, ir, A64I_FMULd);
1414     return;
1415   }
1416   asm_intmul(as, ir);
1417 }
1418 
1419 static void asm_div(ASMState *as, IRIns *ir)
1420 {
1421 #if LJ_HASFFI
1422   if (!irt_isnum(ir->t))
1423     asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
1424                                           IRCALL_lj_carith_divu64);
1425   else
1426 #endif
1427     asm_fparith(as, ir, A64I_FDIVd);
1428 }
1429 
1430 static void asm_pow(ASMState *as, IRIns *ir)
1431 {
1432 #if LJ_HASFFI
1433   if (!irt_isnum(ir->t))
1434     asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
1435                                           IRCALL_lj_carith_powu64);
1436   else
1437 #endif
1438     asm_callid(as, ir, IRCALL_lj_vm_powi);
1439 }
1440 
1441 #define asm_addov(as, ir)       asm_add(as, ir)
1442 #define asm_subov(as, ir)       asm_sub(as, ir)
1443 #define asm_mulov(as, ir)       asm_mul(as, ir)
1444 
1445 #define asm_abs(as, ir)         asm_fpunary(as, ir, A64I_FABS)
1446 #define asm_atan2(as, ir)       asm_callid(as, ir, IRCALL_atan2)
1447 #define asm_ldexp(as, ir)       asm_callid(as, ir, IRCALL_ldexp)
1448 
1449 static void asm_mod(ASMState *as, IRIns *ir)
1450 {
1451 #if LJ_HASFFI
1452   if (!irt_isint(ir->t))
1453     asm_callid(as, ir, irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
1454                                           IRCALL_lj_carith_modu64);
1455   else
1456 #endif
1457     asm_callid(as, ir, IRCALL_lj_vm_modi);
1458 }
1459 
1460 static void asm_neg(ASMState *as, IRIns *ir)
1461 {
1462   if (irt_isnum(ir->t)) {
1463     asm_fpunary(as, ir, A64I_FNEGd);
1464     return;
1465   }
1466   asm_intneg(as, ir);
1467 }
1468 
1469 static void asm_band(ASMState *as, IRIns *ir)
1470 {
1471   A64Ins ai = A64I_ANDw;
1472   if (asm_fuseandshift(as, ir))
1473     return;
1474   if (as->flagmcp == as->mcp) {
1475     /* Try to drop cmp r, #0. */
1476     as->flagmcp = NULL;
1477     as->mcp++;
1478     ai = A64I_ANDSw;
1479   }
1480   asm_intop(as, ir, ai);
1481 }
1482 
1483 static void asm_borbxor(ASMState *as, IRIns *ir, A64Ins ai)
1484 {
1485   IRRef lref = ir->op1, rref = ir->op2;
1486   IRIns *irl = IR(lref), *irr = IR(rref);
1487   if ((canfuse(as, irl) && irl->o == IR_BNOT && !irref_isk(rref)) ||
1488       (canfuse(as, irr) && irr->o == IR_BNOT && !irref_isk(lref))) {
1489     Reg left, dest = ra_dest(as, ir, RSET_GPR);
1490     uint32_t m;
1491     if (irl->o == IR_BNOT) {
1492       IRRef tmp = lref; lref = rref; rref = tmp;
1493     }
1494     left = ra_alloc1(as, lref, RSET_GPR);
1495     ai |= A64I_ON;
1496     if (irt_is64(ir->t)) ai |= A64I_X;
1497     m = asm_fuseopm(as, ai, IR(rref)->op1, rset_exclude(RSET_GPR, left));
1498     emit_dn(as, ai^m, dest, left);
1499   } else {
1500     asm_intop(as, ir, ai);
1501   }
1502 }
1503 
1504 static void asm_bor(ASMState *as, IRIns *ir)
1505 {
1506   if (asm_fuseorshift(as, ir))
1507     return;
1508   asm_borbxor(as, ir, A64I_ORRw);
1509 }
1510 
1511 #define asm_bxor(as, ir)        asm_borbxor(as, ir, A64I_EORw)
1512 
1513 static void asm_bnot(ASMState *as, IRIns *ir)
1514 {
1515   A64Ins ai = A64I_MVNw;
1516   Reg dest = ra_dest(as, ir, RSET_GPR);
1517   uint32_t m = asm_fuseopm(as, ai, ir->op1, RSET_GPR);
1518   if (irt_is64(ir->t)) ai |= A64I_X;
1519   emit_d(as, ai^m, dest);
1520 }
1521 
1522 static void asm_bswap(ASMState *as, IRIns *ir)
1523 {
1524   Reg dest = ra_dest(as, ir, RSET_GPR);
1525   Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1526   emit_dn(as, irt_is64(ir->t) ? A64I_REVx : A64I_REVw, dest, left);
1527 }
1528 
1529 static void asm_bitshift(ASMState *as, IRIns *ir, A64Ins ai, A64Shift sh)
1530 {
1531   int32_t shmask = irt_is64(ir->t) ? 63 : 31;
1532   if (irref_isk(ir->op2)) {  /* Constant shifts. */
1533     Reg left, dest = ra_dest(as, ir, RSET_GPR);
1534     int32_t shift = (IR(ir->op2)->i & shmask);
1535     IRIns *irl = IR(ir->op1);
1536     if (shmask == 63) ai += A64I_UBFMx - A64I_UBFMw;
1537 
1538     /* Fuse BSHL + BSHR/BSAR into UBFM/SBFM aka UBFX/SBFX/UBFIZ/SBFIZ. */
1539     if ((sh == A64SH_LSR || sh == A64SH_ASR) && canfuse(as, irl)) {
1540       if (irl->o == IR_BSHL && irref_isk(irl->op2)) {
1541         int32_t shift2 = (IR(irl->op2)->i & shmask);
1542         shift = ((shift - shift2) & shmask);
1543         shmask -= shift2;
1544         ir = irl;
1545       }
1546     }
1547 
1548     left = ra_alloc1(as, ir->op1, RSET_GPR);
1549     switch (sh) {
1550     case A64SH_LSL:
1551       emit_dn(as, ai | A64F_IMMS(shmask-shift) |
1552                   A64F_IMMR((shmask-shift+1)&shmask), dest, left);
1553       break;
1554     case A64SH_LSR: case A64SH_ASR:
1555       emit_dn(as, ai | A64F_IMMS(shmask) | A64F_IMMR(shift), dest, left);
1556       break;
1557     case A64SH_ROR:
1558       emit_dnm(as, ai | A64F_IMMS(shift), dest, left, left);
1559       break;
1560     }
1561   } else {  /* Variable-length shifts. */
1562     Reg dest = ra_dest(as, ir, RSET_GPR);
1563     Reg left = ra_alloc1(as, ir->op1, RSET_GPR);
1564     Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1565     emit_dnm(as, (shmask == 63 ? A64I_SHRx : A64I_SHRw) | A64F_BSH(sh), dest, left, right);
1566   }
1567 }
1568 
1569 #define asm_bshl(as, ir)        asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSL)
1570 #define asm_bshr(as, ir)        asm_bitshift(as, ir, A64I_UBFMw, A64SH_LSR)
1571 #define asm_bsar(as, ir)        asm_bitshift(as, ir, A64I_SBFMw, A64SH_ASR)
1572 #define asm_bror(as, ir)        asm_bitshift(as, ir, A64I_EXTRw, A64SH_ROR)
1573 #define asm_brol(as, ir)        lua_assert(0)
1574 
1575 static void asm_intmin_max(ASMState *as, IRIns *ir, A64CC cc)
1576 {
1577   Reg dest = ra_dest(as, ir, RSET_GPR);
1578   Reg left = ra_hintalloc(as, ir->op1, dest, RSET_GPR);
1579   Reg right = ra_alloc1(as, ir->op2, rset_exclude(RSET_GPR, left));
1580   emit_dnm(as, A64I_CSELw|A64F_CC(cc), dest, left, right);
1581   emit_nm(as, A64I_CMPw, left, right);
1582 }
1583 
1584 static void asm_fpmin_max(ASMState *as, IRIns *ir, A64CC fcc)
1585 {
1586   Reg dest = (ra_dest(as, ir, RSET_FPR) & 31);
1587   Reg right, left = ra_alloc2(as, ir, RSET_FPR);
1588   right = ((left >> 8) & 31); left &= 31;
1589   emit_dnm(as, A64I_FCSELd | A64F_CC(fcc), dest, left, right);
1590   emit_nm(as, A64I_FCMPd, left, right);
1591 }
1592 
1593 static void asm_min_max(ASMState *as, IRIns *ir, A64CC cc, A64CC fcc)
1594 {
1595   if (irt_isnum(ir->t))
1596     asm_fpmin_max(as, ir, fcc);
1597   else
1598     asm_intmin_max(as, ir, cc);
1599 }
1600 
1601 #define asm_max(as, ir)         asm_min_max(as, ir, CC_GT, CC_HI)
1602 #define asm_min(as, ir)         asm_min_max(as, ir, CC_LT, CC_LO)
1603 
1604 /* -- Comparisons --------------------------------------------------------- */
1605 
1606 /* Map of comparisons to flags. ORDER IR. */
1607 static const uint8_t asm_compmap[IR_ABC+1] = {
1608   /* op  FP swp  int cc   FP cc */
1609   /* LT       */ CC_GE + (CC_HS << 4),
1610   /* GE    x  */ CC_LT + (CC_HI << 4),
1611   /* LE       */ CC_GT + (CC_HI << 4),
1612   /* GT    x  */ CC_LE + (CC_HS << 4),
1613   /* ULT   x  */ CC_HS + (CC_LS << 4),
1614   /* UGE      */ CC_LO + (CC_LO << 4),
1615   /* ULE   x  */ CC_HI + (CC_LO << 4),
1616   /* UGT      */ CC_LS + (CC_LS << 4),
1617   /* EQ       */ CC_NE + (CC_NE << 4),
1618   /* NE       */ CC_EQ + (CC_EQ << 4),
1619   /* ABC      */ CC_LS + (CC_LS << 4)  /* Same as UGT. */
1620 };
1621 
1622 /* FP comparisons. */
1623 static void asm_fpcomp(ASMState *as, IRIns *ir)
1624 {
1625   Reg left, right;
1626   A64Ins ai;
1627   int swp = ((ir->o ^ (ir->o >> 2)) & ~(ir->o >> 3) & 1);
1628   if (!swp && irref_isk(ir->op2) && ir_knum(IR(ir->op2))->u64 == 0) {
1629     left = (ra_alloc1(as, ir->op1, RSET_FPR) & 31);
1630     right = 0;
1631     ai = A64I_FCMPZd;
1632   } else {
1633     left = ra_alloc2(as, ir, RSET_FPR);
1634     if (swp) {
1635       right = (left & 31); left = ((left >> 8) & 31);
1636     } else {
1637       right = ((left >> 8) & 31); left &= 31;
1638     }
1639     ai = A64I_FCMPd;
1640   }
1641   asm_guardcc(as, (asm_compmap[ir->o] >> 4));
1642   emit_nm(as, ai, left, right);
1643 }
1644 
1645 /* Integer comparisons. */
1646 static void asm_intcomp(ASMState *as, IRIns *ir)
1647 {
1648   A64CC oldcc, cc = (asm_compmap[ir->o] & 15);
1649   A64Ins ai = irt_is64(ir->t) ? A64I_CMPx : A64I_CMPw;
1650   IRRef lref = ir->op1, rref = ir->op2;
1651   Reg left;
1652   uint32_t m;
1653   int cmpprev0 = 0;
1654   lua_assert(irt_is64(ir->t) || irt_isint(ir->t) ||
1655              irt_isu32(ir->t) || irt_isaddr(ir->t) || irt_isu8(ir->t));
1656   if (asm_swapops(as, lref, rref)) {
1657     IRRef tmp = lref; lref = rref; rref = tmp;
1658     if (cc >= CC_GE) cc ^= 7;  /* LT <-> GT, LE <-> GE */
1659     else if (cc > CC_NE) cc ^= 11;  /* LO <-> HI, LS <-> HS */
1660   }
1661   oldcc = cc;
1662   if (irref_isk(rref) && get_k64val(IR(rref)) == 0) {
1663     IRIns *irl = IR(lref);
1664     if (cc == CC_GE) cc = CC_PL;
1665     else if (cc == CC_LT) cc = CC_MI;
1666     else if (cc > CC_NE) goto nocombine;  /* Other conds don't work with tst. */
1667     cmpprev0 = (irl+1 == ir);
1668     /* Combine and-cmp-bcc into tbz/tbnz or and-cmp into tst. */
1669     if (cmpprev0 && irl->o == IR_BAND && !ra_used(irl)) {
1670       IRRef blref = irl->op1, brref = irl->op2;
1671       uint32_t m2 = 0;
1672       Reg bleft;
1673       if (asm_swapops(as, blref, brref)) {
1674         Reg tmp = blref; blref = brref; brref = tmp;
1675       }
1676       if (irref_isk(brref)) {
1677         uint64_t k = get_k64val(IR(brref));
1678         if (k && !(k & (k-1)) && (cc == CC_EQ || cc == CC_NE)) {
1679           asm_guardtnb(as, cc == CC_EQ ? A64I_TBZ : A64I_TBNZ,
1680                        ra_alloc1(as, blref, RSET_GPR), emit_ctz64(k));
1681           return;
1682         }
1683         m2 = emit_isk13(k, irt_is64(irl->t));
1684       }
1685       bleft = ra_alloc1(as, blref, RSET_GPR);
1686       ai = (irt_is64(irl->t) ? A64I_TSTx : A64I_TSTw);
1687       if (!m2)
1688         m2 = asm_fuseopm(as, ai, brref, rset_exclude(RSET_GPR, bleft));
1689       asm_guardcc(as, cc);
1690       emit_n(as, ai^m2, bleft);
1691       return;
1692     }
1693     if (cc == CC_EQ || cc == CC_NE) {
1694       /* Combine cmp-bcc into cbz/cbnz. */
1695       ai = cc == CC_EQ ? A64I_CBZ : A64I_CBNZ;
1696       if (irt_is64(ir->t)) ai |= A64I_X;
1697       asm_guardcnb(as, ai, ra_alloc1(as, lref, RSET_GPR));
1698       return;
1699     }
1700   }
1701 nocombine:
1702   left = ra_alloc1(as, lref, RSET_GPR);
1703   m = asm_fuseopm(as, ai, rref, rset_exclude(RSET_GPR, left));
1704   asm_guardcc(as, cc);
1705   emit_n(as, ai^m, left);
1706   /* Signed comparison with zero and referencing previous ins? */
1707   if (cmpprev0 && (oldcc <= CC_NE || oldcc >= CC_GE))
1708     as->flagmcp = as->mcp;  /* Allow elimination of the compare. */
1709 }
1710 
1711 static void asm_comp(ASMState *as, IRIns *ir)
1712 {
1713   if (irt_isnum(ir->t))
1714     asm_fpcomp(as, ir);
1715   else
1716     asm_intcomp(as, ir);
1717 }
1718 
1719 #define asm_equal(as, ir)       asm_comp(as, ir)
1720 
1721 /* -- Support for 64 bit ops in 32 bit mode ------------------------------- */
1722 
1723 /* Hiword op of a split 64 bit op. Previous op must be the loword op. */
1724 static void asm_hiop(ASMState *as, IRIns *ir)
1725 {
1726   UNUSED(as); UNUSED(ir); lua_assert(0);  /* Unused on 64 bit. */
1727 }
1728 
1729 /* -- Profiling ----------------------------------------------------------- */
1730 
1731 static void asm_prof(ASMState *as, IRIns *ir)
1732 {
1733   uint32_t k = emit_isk13(HOOK_PROFILE, 0);
1734   lua_assert(k != 0);
1735   UNUSED(ir);
1736   asm_guardcc(as, CC_NE);
1737   emit_n(as, A64I_TSTw^k, RID_TMP);
1738   emit_lsptr(as, A64I_LDRB, RID_TMP, (void *)&J2G(as->J)->hookmask);
1739 }
1740 
1741 /* -- Stack handling ------------------------------------------------------ */
1742 
1743 /* Check Lua stack size for overflow. Use exit handler as fallback. */
1744 static void asm_stack_check(ASMState *as, BCReg topslot,
1745                             IRIns *irp, RegSet allow, ExitNo exitno)
1746 {
1747   Reg pbase;
1748   uint32_t k;
1749   if (irp) {
1750     if (!ra_hasspill(irp->s)) {
1751       pbase = irp->r;
1752       lua_assert(ra_hasreg(pbase));
1753     } else if (allow) {
1754       pbase = rset_pickbot(allow);
1755     } else {
1756       pbase = RID_RET;
1757       emit_lso(as, A64I_LDRx, RID_RET, RID_SP, 0);  /* Restore temp register. */
1758     }
1759   } else {
1760     pbase = RID_BASE;
1761   }
1762   emit_cond_branch(as, CC_LS, asm_exitstub_addr(as, exitno));
1763   k = emit_isk12((8*topslot));
1764   lua_assert(k);
1765   emit_n(as, A64I_CMPx^k, RID_TMP);
1766   emit_dnm(as, A64I_SUBx, RID_TMP, RID_TMP, pbase);
1767   emit_lso(as, A64I_LDRx, RID_TMP, RID_TMP,
1768            (int32_t)offsetof(lua_State, maxstack));
1769   if (irp) {  /* Must not spill arbitrary registers in head of side trace. */
1770     if (ra_hasspill(irp->s))
1771       emit_lso(as, A64I_LDRx, pbase, RID_SP, sps_scale(irp->s));
1772     emit_lso(as, A64I_LDRx, RID_TMP, RID_GL, glofs(as, &J2G(as->J)->cur_L));
1773     if (ra_hasspill(irp->s) && !allow)
1774       emit_lso(as, A64I_STRx, RID_RET, RID_SP, 0);  /* Save temp register. */
1775   } else {
1776     emit_getgl(as, RID_TMP, cur_L);
1777   }
1778 }
1779 
1780 /* Restore Lua stack from on-trace state. */
1781 static void asm_stack_restore(ASMState *as, SnapShot *snap)
1782 {
1783   SnapEntry *map = &as->T->snapmap[snap->mapofs];
1784 #ifdef LUA_USE_ASSERT
1785   SnapEntry *flinks = &as->T->snapmap[snap_nextofs(as->T, snap)-1-LJ_FR2];
1786 #endif
1787   MSize n, nent = snap->nent;
1788   /* Store the value of all modified slots to the Lua stack. */
1789   for (n = 0; n < nent; n++) {
1790     SnapEntry sn = map[n];
1791     BCReg s = snap_slot(sn);
1792     int32_t ofs = 8*((int32_t)s-1-LJ_FR2);
1793     IRRef ref = snap_ref(sn);
1794     IRIns *ir = IR(ref);
1795     if ((sn & SNAP_NORESTORE))
1796       continue;
1797     if (irt_isnum(ir->t)) {
1798       Reg src = ra_alloc1(as, ref, RSET_FPR);
1799       emit_lso(as, A64I_STRd, (src & 31), RID_BASE, ofs);
1800     } else {
1801       asm_tvstore64(as, RID_BASE, ofs, ref);
1802     }
1803     checkmclim(as);
1804   }
1805   lua_assert(map + nent == flinks);
1806 }
1807 
1808 /* -- GC handling --------------------------------------------------------- */
1809 
1810 /* Check GC threshold and do one or more GC steps. */
1811 static void asm_gc_check(ASMState *as)
1812 {
1813   const CCallInfo *ci = &lj_ir_callinfo[IRCALL_lj_gc_step_jit];
1814   IRRef args[2];
1815   MCLabel l_end;
1816   Reg tmp1, tmp2;
1817   ra_evictset(as, RSET_SCRATCH);
1818   l_end = emit_label(as);
1819   /* Exit trace if in GCSatomic or GCSfinalize. Avoids syncing GC objects. */
1820   asm_guardcnb(as, A64I_CBNZ, RID_RET); /* Assumes asm_snap_prep() is done. */
1821   args[0] = ASMREF_TMP1;  /* global_State *g */
1822   args[1] = ASMREF_TMP2;  /* MSize steps     */
1823   asm_gencall(as, ci, args);
1824   tmp1 = ra_releasetmp(as, ASMREF_TMP1);
1825   tmp2 = ra_releasetmp(as, ASMREF_TMP2);
1826   emit_loadi(as, tmp2, as->gcsteps);
1827   /* Jump around GC step if GC total < GC threshold. */
1828   emit_cond_branch(as, CC_LS, l_end);
1829   emit_nm(as, A64I_CMPx, RID_TMP, tmp2);
1830   emit_lso(as, A64I_LDRx, tmp2, tmp1,
1831            (int32_t)offsetof(global_State, gc.threshold));
1832   emit_lso(as, A64I_LDRx, RID_TMP, tmp1,
1833            (int32_t)offsetof(global_State, gc.total));
1834   ra_allockreg(as, i64ptr(J2G(as->J)), tmp1);
1835   as->gcsteps = 0;
1836   checkmclim(as);
1837 }
1838 
1839 /* -- Loop handling ------------------------------------------------------- */
1840 
1841 /* Fixup the loop branch. */
1842 static void asm_loop_fixup(ASMState *as)
1843 {
1844   MCode *p = as->mctop;
1845   MCode *target = as->mcp;
1846   if (as->loopinv) {  /* Inverted loop branch? */
1847     uint32_t mask = (p[-2] & 0x7e000000) == 0x36000000 ? 0x3fffu : 0x7ffffu;
1848     ptrdiff_t delta = target - (p - 2);
1849     /* asm_guard* already inverted the bcc/tnb/cnb and patched the final b. */
1850     p[-2] |= ((uint32_t)delta & mask) << 5;
1851   } else {
1852     ptrdiff_t delta = target - (p - 1);
1853     p[-1] = A64I_B | ((uint32_t)(delta) & 0x03ffffffu);
1854   }
1855 }
1856 
1857 /* -- Head of trace ------------------------------------------------------- */
1858 
1859 /* Reload L register from g->cur_L. */
1860 static void asm_head_lreg(ASMState *as)
1861 {
1862   IRIns *ir = IR(ASMREF_L);
1863   if (ra_used(ir)) {
1864     Reg r = ra_dest(as, ir, RSET_GPR);
1865     emit_getgl(as, r, cur_L);
1866     ra_evictk(as);
1867   }
1868 }
1869 
1870 /* Coalesce BASE register for a root trace. */
1871 static void asm_head_root_base(ASMState *as)
1872 {
1873   IRIns *ir;
1874   asm_head_lreg(as);
1875   ir = IR(REF_BASE);
1876   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
1877     ra_spill(as, ir);
1878   ra_destreg(as, ir, RID_BASE);
1879 }
1880 
1881 /* Coalesce BASE register for a side trace. */
1882 static RegSet asm_head_side_base(ASMState *as, IRIns *irp, RegSet allow)
1883 {
1884   IRIns *ir;
1885   asm_head_lreg(as);
1886   ir = IR(REF_BASE);
1887   if (ra_hasreg(ir->r) && (rset_test(as->modset, ir->r) || irt_ismarked(ir->t)))
1888     ra_spill(as, ir);
1889   if (ra_hasspill(irp->s)) {
1890     rset_clear(allow, ra_dest(as, ir, allow));
1891   } else {
1892     Reg r = irp->r;
1893     lua_assert(ra_hasreg(r));
1894     rset_clear(allow, r);
1895     if (r != ir->r && !rset_test(as->freeset, r))
1896       ra_restore(as, regcost_ref(as->cost[r]));
1897     ra_destreg(as, ir, r);
1898   }
1899   return allow;
1900 }
1901 
1902 /* -- Tail of trace ------------------------------------------------------- */
1903 
1904 /* Fixup the tail code. */
1905 static void asm_tail_fixup(ASMState *as, TraceNo lnk)
1906 {
1907   MCode *p = as->mctop;
1908   MCode *target;
1909   /* Undo the sp adjustment in BC_JLOOP when exiting to the interpreter. */
1910   int32_t spadj = as->T->spadjust + (lnk ? 0 : sps_scale(SPS_FIXED));
1911   if (spadj == 0) {
1912     *--p = A64I_LE(A64I_NOP);
1913     as->mctop = p;
1914   } else {
1915     /* Patch stack adjustment. */
1916     uint32_t k = emit_isk12(spadj);
1917     lua_assert(k);
1918     p[-2] = (A64I_ADDx^k) | A64F_D(RID_SP) | A64F_N(RID_SP);
1919   }
1920   /* Patch exit branch. */
1921   target = lnk ? traceref(as->J, lnk)->mcode : (MCode *)lj_vm_exit_interp;
1922   p[-1] = A64I_B | (((target-p)+1)&0x03ffffffu);
1923 }
1924 
1925 /* Prepare tail of code. */
1926 static void asm_tail_prep(ASMState *as)
1927 {
1928   MCode *p = as->mctop - 1;  /* Leave room for exit branch. */
1929   if (as->loopref) {
1930     as->invmcp = as->mcp = p;
1931   } else {
1932     as->mcp = p-1;  /* Leave room for stack pointer adjustment. */
1933     as->invmcp = NULL;
1934   }
1935   *p = 0;  /* Prevent load/store merging. */
1936 }
1937 
1938 /* -- Trace setup --------------------------------------------------------- */
1939 
1940 /* Ensure there are enough stack slots for call arguments. */
1941 static Reg asm_setup_call_slots(ASMState *as, IRIns *ir, const CCallInfo *ci)
1942 {
1943   IRRef args[CCI_NARGS_MAX*2];
1944   uint32_t i, nargs = CCI_XNARGS(ci);
1945   int nslots = 0, ngpr = REGARG_NUMGPR, nfpr = REGARG_NUMFPR;
1946   asm_collectargs(as, ir, ci, args);
1947   for (i = 0; i < nargs; i++) {
1948     if (args[i] && irt_isfp(IR(args[i])->t)) {
1949       if (nfpr > 0) nfpr--; else nslots += 2;
1950     } else {
1951       if (ngpr > 0) ngpr--; else nslots += 2;
1952     }
1953   }
1954   if (nslots > as->evenspill)  /* Leave room for args in stack slots. */
1955     as->evenspill = nslots;
1956   return REGSP_HINT(RID_RET);
1957 }
1958 
1959 static void asm_setup_target(ASMState *as)
1960 {
1961   /* May need extra exit for asm_stack_check on side traces. */
1962   asm_exitstub_setup(as, as->T->nsnap + (as->parent ? 1 : 0));
1963 }
1964 
1965 #if LJ_BE
1966 /* ARM64 instructions are always little-endian. Swap for ARM64BE. */
1967 static void asm_mcode_fixup(MCode *mcode, MSize size)
1968 {
1969   MCode *pe = (MCode *)((char *)mcode + size);
1970   while (mcode < pe) {
1971     MCode ins = *mcode;
1972     *mcode++ = lj_bswap(ins);
1973   }
1974 }
1975 #define LJ_TARGET_MCODE_FIXUP   1
1976 #endif
1977 
1978 /* -- Trace patching ------------------------------------------------------ */
1979 
1980 /* Patch exit jumps of existing machine code to a new target. */
1981 void lj_asm_patchexit(jit_State *J, GCtrace *T, ExitNo exitno, MCode *target)
1982 {
1983   MCode *p = T->mcode;
1984   MCode *pe = (MCode *)((char *)p + T->szmcode);
1985   MCode *cstart = NULL, *cend = p;
1986   MCode *mcarea = lj_mcode_patch(J, p, 0);
1987   MCode *px = exitstub_trace_addr(T, exitno);
1988   for (; p < pe; p++) {
1989     /* Look for exitstub branch, replace with branch to target. */
1990     MCode ins = A64I_LE(*p);
1991     if ((ins & 0xff000000u) == 0x54000000u &&
1992         ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
1993       /* Patch bcc exitstub. */
1994       *p = A64I_LE((ins & 0xff00001fu) | (((target-p)<<5) & 0x00ffffe0u));
1995       cend = p+1;
1996       if (!cstart) cstart = p;
1997     } else if ((ins & 0xfc000000u) == 0x14000000u &&
1998                ((ins ^ (px-p)) & 0x03ffffffu) == 0) {
1999       /* Patch b exitstub. */
2000       *p = A64I_LE((ins & 0xfc000000u) | ((target-p) & 0x03ffffffu));
2001       cend = p+1;
2002       if (!cstart) cstart = p;
2003     } else if ((ins & 0x7e000000u) == 0x34000000u &&
2004                ((ins ^ ((px-p)<<5)) & 0x00ffffe0u) == 0) {
2005       /* Patch cbz/cbnz exitstub. */
2006       *p = A64I_LE((ins & 0xff00001f) | (((target-p)<<5) & 0x00ffffe0u));
2007       cend = p+1;
2008       if (!cstart) cstart = p;
2009     } else if ((ins & 0x7e000000u) == 0x36000000u &&
2010                ((ins ^ ((px-p)<<5)) & 0x0007ffe0u) == 0) {
2011       /* Patch tbz/tbnz exitstub. */
2012       *p = A64I_LE((ins & 0xfff8001fu) | (((target-p)<<5) & 0x0007ffe0u));
2013       cend = p+1;
2014       if (!cstart) cstart = p;
2015     }
2016   }
2017   lua_assert(cstart != NULL);
2018   lj_mcode_sync(cstart, cend);
2019   lj_mcode_patch(J, mcarea, 1);
2020 }
2021 

/* [<][>][^][v][top][bottom][index][help] */