root/lj_record.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. rec_check_ir
  2. rec_check_slots
  3. sloadt
  4. sload
  5. getcurrf
  6. lj_record_objcmp
  7. lj_record_constify
  8. canonicalize_slots
  9. rec_stop
  10. find_kinit
  11. fori_load
  12. fori_arg
  13. rec_for_direction
  14. rec_for_iter
  15. rec_for_check
  16. rec_for_loop
  17. rec_for
  18. rec_iterl
  19. rec_loop
  20. innerloopleft
  21. rec_loop_interp
  22. rec_loop_jit
  23. rec_call_specialize
  24. rec_call_setup
  25. lj_record_call
  26. lj_record_tailcall
  27. check_downrec_unroll
  28. lj_record_ret
  29. rec_mm_prep
  30. lj_record_mm_lookup
  31. rec_mm_arith
  32. rec_mm_len
  33. rec_mm_callcomp
  34. rec_mm_equal
  35. rec_mm_comp
  36. rec_mm_comp_cdata
  37. rec_idx_abc
  38. rec_idx_key
  39. nommstr
  40. lj_record_idx
  41. rec_upvalue_constify
  42. rec_upvalue
  43. check_call_unroll
  44. rec_func_setup
  45. rec_func_vararg
  46. rec_func_lua
  47. rec_func_jit
  48. select_detect
  49. rec_varg
  50. rec_tnew
  51. rec_comp_prep
  52. rec_comp_fixup
  53. lj_record_ins
  54. rec_setup_root
  55. lj_record_setup

   1 /*
   2 ** Trace recorder (bytecode -> SSA IR).
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 */
   5 
   6 #define lj_record_c
   7 #define LUA_CORE
   8 
   9 #include "lj_obj.h"
  10 
  11 #if LJ_HASJIT
  12 
  13 #include "lj_err.h"
  14 #include "lj_str.h"
  15 #include "lj_tab.h"
  16 #include "lj_meta.h"
  17 #include "lj_frame.h"
  18 #if LJ_HASFFI
  19 #include "lj_ctype.h"
  20 #endif
  21 #include "lj_bc.h"
  22 #include "lj_ff.h"
  23 #include "lj_ir.h"
  24 #include "lj_jit.h"
  25 #include "lj_ircall.h"
  26 #include "lj_iropt.h"
  27 #include "lj_trace.h"
  28 #include "lj_record.h"
  29 #include "lj_ffrecord.h"
  30 #include "lj_snap.h"
  31 #include "lj_dispatch.h"
  32 #include "lj_vm.h"
  33 
  34 /* Some local macros to save typing. Undef'd at the end. */
  35 #define IR(ref)                 (&J->cur.ir[(ref)])
  36 
  37 /* Pass IR on to next optimization in chain (FOLD). */
  38 #define emitir(ot, a, b)        (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
  39 
  40 /* Emit raw IR without passing through optimizations. */
  41 #define emitir_raw(ot, a, b)    (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
  42 
  43 /* -- Sanity checks ------------------------------------------------------- */
  44 
  45 #ifdef LUA_USE_ASSERT
  46 /* Sanity check the whole IR -- sloooow. */
  47 static void rec_check_ir(jit_State *J)
  48 {
  49   IRRef i, nins = J->cur.nins, nk = J->cur.nk;
  50   lua_assert(nk <= REF_BIAS && nins >= REF_BIAS && nins < 65536);
  51   for (i = nins-1; i >= nk; i--) {
  52     IRIns *ir = IR(i);
  53     uint32_t mode = lj_ir_mode[ir->o];
  54     IRRef op1 = ir->op1;
  55     IRRef op2 = ir->op2;
  56     switch (irm_op1(mode)) {
  57     case IRMnone: lua_assert(op1 == 0); break;
  58     case IRMref: lua_assert(op1 >= nk);
  59       lua_assert(i >= REF_BIAS ? op1 < i : op1 > i); break;
  60     case IRMlit: break;
  61     case IRMcst: lua_assert(i < REF_BIAS); continue;
  62     }
  63     switch (irm_op2(mode)) {
  64     case IRMnone: lua_assert(op2 == 0); break;
  65     case IRMref: lua_assert(op2 >= nk);
  66       lua_assert(i >= REF_BIAS ? op2 < i : op2 > i); break;
  67     case IRMlit: break;
  68     case IRMcst: lua_assert(0); break;
  69     }
  70     if (ir->prev) {
  71       lua_assert(ir->prev >= nk);
  72       lua_assert(i >= REF_BIAS ? ir->prev < i : ir->prev > i);
  73       lua_assert(ir->o == IR_NOP || IR(ir->prev)->o == ir->o);
  74     }
  75   }
  76 }
  77 
  78 /* Compare stack slots and frames of the recorder and the VM. */
  79 static void rec_check_slots(jit_State *J)
  80 {
  81   BCReg s, nslots = J->baseslot + J->maxslot;
  82   int32_t depth = 0;
  83   cTValue *base = J->L->base - J->baseslot;
  84   lua_assert(J->baseslot >= 1);
  85   lua_assert(J->baseslot == 1 || (J->slot[J->baseslot-1] & TREF_FRAME));
  86   lua_assert(nslots <= LJ_MAX_JSLOTS);
  87   for (s = 0; s < nslots; s++) {
  88     TRef tr = J->slot[s];
  89     if (tr) {
  90       cTValue *tv = &base[s];
  91       IRRef ref = tref_ref(tr);
  92       IRIns *ir;
  93       lua_assert(ref >= J->cur.nk && ref < J->cur.nins);
  94       ir = IR(ref);
  95       lua_assert(irt_t(ir->t) == tref_t(tr));
  96       if (s == 0) {
  97         lua_assert(tref_isfunc(tr));
  98       } else if ((tr & TREF_FRAME)) {
  99         GCfunc *fn = gco2func(frame_gc(tv));
 100         BCReg delta = (BCReg)(tv - frame_prev(tv));
 101         lua_assert(tref_isfunc(tr));
 102         if (tref_isk(tr)) lua_assert(fn == ir_kfunc(ir));
 103         lua_assert(s > delta ? (J->slot[s-delta] & TREF_FRAME) : (s == delta));
 104         depth++;
 105       } else if ((tr & TREF_CONT)) {
 106         lua_assert(ir_kptr(ir) == gcrefp(tv->gcr, void));
 107         lua_assert((J->slot[s+1] & TREF_FRAME));
 108         depth++;
 109       } else {
 110         if (tvisnumber(tv))
 111           lua_assert(tref_isnumber(tr));  /* Could be IRT_INT etc., too. */
 112         else
 113           lua_assert(itype2irt(tv) == tref_type(tr));
 114         if (tref_isk(tr)) {  /* Compare constants. */
 115           TValue tvk;
 116           lj_ir_kvalue(J->L, &tvk, ir);
 117           if (!(tvisnum(&tvk) && tvisnan(&tvk)))
 118             lua_assert(lj_obj_equal(tv, &tvk));
 119           else
 120             lua_assert(tvisnum(tv) && tvisnan(tv));
 121         }
 122       }
 123     }
 124   }
 125   lua_assert(J->framedepth == depth);
 126 }
 127 #endif
 128 
 129 /* -- Type handling and specialization ------------------------------------ */
 130 
 131 /* Note: these functions return tagged references (TRef). */
 132 
 133 /* Specialize a slot to a specific type. Note: slot can be negative! */
 134 static TRef sloadt(jit_State *J, int32_t slot, IRType t, int mode)
 135 {
 136   /* Caller may set IRT_GUARD in t. */
 137   TRef ref = emitir_raw(IRT(IR_SLOAD, t), (int32_t)J->baseslot+slot, mode);
 138   J->base[slot] = ref;
 139   return ref;
 140 }
 141 
 142 /* Specialize a slot to the runtime type. Note: slot can be negative! */
 143 static TRef sload(jit_State *J, int32_t slot)
 144 {
 145   IRType t = itype2irt(&J->L->base[slot]);
 146   TRef ref = emitir_raw(IRTG(IR_SLOAD, t), (int32_t)J->baseslot+slot,
 147                         IRSLOAD_TYPECHECK);
 148   if (irtype_ispri(t)) ref = TREF_PRI(t);  /* Canonicalize primitive refs. */
 149   J->base[slot] = ref;
 150   return ref;
 151 }
 152 
 153 /* Get TRef from slot. Load slot and specialize if not done already. */
 154 #define getslot(J, s)   (J->base[(s)] ? J->base[(s)] : sload(J, (int32_t)(s)))
 155 
 156 /* Get TRef for current function. */
 157 static TRef getcurrf(jit_State *J)
 158 {
 159   if (J->base[-1])
 160     return J->base[-1];
 161   lua_assert(J->baseslot == 1);
 162   return sloadt(J, -1, IRT_FUNC, IRSLOAD_READONLY);
 163 }
 164 
 165 /* Compare for raw object equality.
 166 ** Returns 0 if the objects are the same.
 167 ** Returns 1 if they are different, but the same type.
 168 ** Returns 2 for two different types.
 169 ** Comparisons between primitives always return 1 -- no caller cares about it.
 170 */
 171 int lj_record_objcmp(jit_State *J, TRef a, TRef b, cTValue *av, cTValue *bv)
 172 {
 173   int diff = !lj_obj_equal(av, bv);
 174   if (!tref_isk2(a, b)) {  /* Shortcut, also handles primitives. */
 175     IRType ta = tref_isinteger(a) ? IRT_INT : tref_type(a);
 176     IRType tb = tref_isinteger(b) ? IRT_INT : tref_type(b);
 177     if (ta != tb) {
 178       /* Widen mixed number/int comparisons to number/number comparison. */
 179       if (ta == IRT_INT && tb == IRT_NUM) {
 180         a = emitir(IRTN(IR_CONV), a, IRCONV_NUM_INT);
 181         ta = IRT_NUM;
 182       } else if (ta == IRT_NUM && tb == IRT_INT) {
 183         b = emitir(IRTN(IR_CONV), b, IRCONV_NUM_INT);
 184       } else {
 185         return 2;  /* Two different types are never equal. */
 186       }
 187     }
 188     emitir(IRTG(diff ? IR_NE : IR_EQ, ta), a, b);
 189   }
 190   return diff;
 191 }
 192 
 193 /* Constify a value. Returns 0 for non-representable object types. */
 194 TRef lj_record_constify(jit_State *J, cTValue *o)
 195 {
 196   if (tvisgcv(o))
 197     return lj_ir_kgc(J, gcV(o), itype2irt(o));
 198   else if (tvisint(o))
 199     return lj_ir_kint(J, intV(o));
 200   else if (tvisnum(o))
 201     return lj_ir_knumint(J, numV(o));
 202   else if (tvisbool(o))
 203     return TREF_PRI(itype2irt(o));
 204   else
 205     return 0;  /* Can't represent lightuserdata (pointless). */
 206 }
 207 
 208 /* -- Record loop ops ----------------------------------------------------- */
 209 
 210 /* Loop event. */
 211 typedef enum {
 212   LOOPEV_LEAVE,         /* Loop is left or not entered. */
 213   LOOPEV_ENTERLO,       /* Loop is entered with a low iteration count left. */
 214   LOOPEV_ENTER          /* Loop is entered. */
 215 } LoopEvent;
 216 
 217 /* Canonicalize slots: convert integers to numbers. */
 218 static void canonicalize_slots(jit_State *J)
 219 {
 220   BCReg s;
 221   if (LJ_DUALNUM) return;
 222   for (s = J->baseslot+J->maxslot-1; s >= 1; s--) {
 223     TRef tr = J->slot[s];
 224     if (tref_isinteger(tr)) {
 225       IRIns *ir = IR(tref_ref(tr));
 226       if (!(ir->o == IR_SLOAD && (ir->op2 & IRSLOAD_READONLY)))
 227         J->slot[s] = emitir(IRTN(IR_CONV), tr, IRCONV_NUM_INT);
 228     }
 229   }
 230 }
 231 
 232 /* Stop recording. */
 233 static void rec_stop(jit_State *J, TraceLink linktype, TraceNo lnk)
 234 {
 235   lj_trace_end(J);
 236   J->cur.linktype = (uint8_t)linktype;
 237   J->cur.link = (uint16_t)lnk;
 238   /* Looping back at the same stack level? */
 239   if (lnk == J->cur.traceno && J->framedepth + J->retdepth == 0) {
 240     if ((J->flags & JIT_F_OPT_LOOP))  /* Shall we try to create a loop? */
 241       goto nocanon;  /* Do not canonicalize or we lose the narrowing. */
 242     if (J->cur.root)  /* Otherwise ensure we always link to the root trace. */
 243       J->cur.link = J->cur.root;
 244   }
 245   canonicalize_slots(J);
 246 nocanon:
 247   /* Note: all loop ops must set J->pc to the following instruction! */
 248   lj_snap_add(J);  /* Add loop snapshot. */
 249   J->needsnap = 0;
 250   J->mergesnap = 1;  /* In case recording continues. */
 251 }
 252 
 253 /* Search bytecode backwards for a int/num constant slot initializer. */
 254 static TRef find_kinit(jit_State *J, const BCIns *endpc, BCReg slot, IRType t)
 255 {
 256   /* This algorithm is rather simplistic and assumes quite a bit about
 257   ** how the bytecode is generated. It works fine for FORI initializers,
 258   ** but it won't necessarily work in other cases (e.g. iterator arguments).
 259   ** It doesn't do anything fancy, either (like backpropagating MOVs).
 260   */
 261   const BCIns *pc, *startpc = proto_bc(J->pt);
 262   for (pc = endpc-1; pc > startpc; pc--) {
 263     BCIns ins = *pc;
 264     BCOp op = bc_op(ins);
 265     /* First try to find the last instruction that stores to this slot. */
 266     if (bcmode_a(op) == BCMbase && bc_a(ins) <= slot) {
 267       return 0;  /* Multiple results, e.g. from a CALL or KNIL. */
 268     } else if (bcmode_a(op) == BCMdst && bc_a(ins) == slot) {
 269       if (op == BC_KSHORT || op == BC_KNUM) {  /* Found const. initializer. */
 270         /* Now try to verify there's no forward jump across it. */
 271         const BCIns *kpc = pc;
 272         for (; pc > startpc; pc--)
 273           if (bc_op(*pc) == BC_JMP) {
 274             const BCIns *target = pc+bc_j(*pc)+1;
 275             if (target > kpc && target <= endpc)
 276               return 0;  /* Conditional assignment. */
 277           }
 278         if (op == BC_KSHORT) {
 279           int32_t k = (int32_t)(int16_t)bc_d(ins);
 280           return t == IRT_INT ? lj_ir_kint(J, k) : lj_ir_knum(J, (lua_Number)k);
 281         } else {
 282           cTValue *tv = proto_knumtv(J->pt, bc_d(ins));
 283           if (t == IRT_INT) {
 284             int32_t k = numberVint(tv);
 285             if (tvisint(tv) || numV(tv) == (lua_Number)k)  /* -0 is ok here. */
 286               return lj_ir_kint(J, k);
 287             return 0;  /* Type mismatch. */
 288           } else {
 289             return lj_ir_knum(J, numberVnum(tv));
 290           }
 291         }
 292       }
 293       return 0;  /* Non-constant initializer. */
 294     }
 295   }
 296   return 0;  /* No assignment to this slot found? */
 297 }
 298 
 299 /* Load and optionally convert a FORI argument from a slot. */
 300 static TRef fori_load(jit_State *J, BCReg slot, IRType t, int mode)
 301 {
 302   int conv = (tvisint(&J->L->base[slot]) != (t==IRT_INT)) ? IRSLOAD_CONVERT : 0;
 303   return sloadt(J, (int32_t)slot,
 304                 t + (((mode & IRSLOAD_TYPECHECK) ||
 305                       (conv && t == IRT_INT && !(mode >> 16))) ?
 306                      IRT_GUARD : 0),
 307                 mode + conv);
 308 }
 309 
 310 /* Peek before FORI to find a const initializer. Otherwise load from slot. */
 311 static TRef fori_arg(jit_State *J, const BCIns *fori, BCReg slot,
 312                      IRType t, int mode)
 313 {
 314   TRef tr = J->base[slot];
 315   if (!tr) {
 316     tr = find_kinit(J, fori, slot, t);
 317     if (!tr)
 318       tr = fori_load(J, slot, t, mode);
 319   }
 320   return tr;
 321 }
 322 
 323 /* Return the direction of the FOR loop iterator.
 324 ** It's important to exactly reproduce the semantics of the interpreter.
 325 */
 326 static int rec_for_direction(cTValue *o)
 327 {
 328   return (tvisint(o) ? intV(o) : (int32_t)o->u32.hi) >= 0;
 329 }
 330 
 331 /* Simulate the runtime behavior of the FOR loop iterator. */
 332 static LoopEvent rec_for_iter(IROp *op, cTValue *o, int isforl)
 333 {
 334   lua_Number stopv = numberVnum(&o[FORL_STOP]);
 335   lua_Number idxv = numberVnum(&o[FORL_IDX]);
 336   lua_Number stepv = numberVnum(&o[FORL_STEP]);
 337   if (isforl)
 338     idxv += stepv;
 339   if (rec_for_direction(&o[FORL_STEP])) {
 340     if (idxv <= stopv) {
 341       *op = IR_LE;
 342       return idxv + 2*stepv > stopv ? LOOPEV_ENTERLO : LOOPEV_ENTER;
 343     }
 344     *op = IR_GT; return LOOPEV_LEAVE;
 345   } else {
 346     if (stopv <= idxv) {
 347       *op = IR_GE;
 348       return idxv + 2*stepv < stopv ? LOOPEV_ENTERLO : LOOPEV_ENTER;
 349     }
 350     *op = IR_LT; return LOOPEV_LEAVE;
 351   }
 352 }
 353 
 354 /* Record checks for FOR loop overflow and step direction. */
 355 static void rec_for_check(jit_State *J, IRType t, int dir,
 356                           TRef stop, TRef step, int init)
 357 {
 358   if (!tref_isk(step)) {
 359     /* Non-constant step: need a guard for the direction. */
 360     TRef zero = (t == IRT_INT) ? lj_ir_kint(J, 0) : lj_ir_knum_zero(J);
 361     emitir(IRTG(dir ? IR_GE : IR_LT, t), step, zero);
 362     /* Add hoistable overflow checks for a narrowed FORL index. */
 363     if (init && t == IRT_INT) {
 364       if (tref_isk(stop)) {
 365         /* Constant stop: optimize check away or to a range check for step. */
 366         int32_t k = IR(tref_ref(stop))->i;
 367         if (dir) {
 368           if (k > 0)
 369             emitir(IRTGI(IR_LE), step, lj_ir_kint(J, (int32_t)0x7fffffff-k));
 370         } else {
 371           if (k < 0)
 372             emitir(IRTGI(IR_GE), step, lj_ir_kint(J, (int32_t)0x80000000-k));
 373         }
 374       } else {
 375         /* Stop+step variable: need full overflow check. */
 376         TRef tr = emitir(IRTGI(IR_ADDOV), step, stop);
 377         emitir(IRTI(IR_USE), tr, 0);  /* ADDOV is weak. Avoid dead result. */
 378       }
 379     }
 380   } else if (init && t == IRT_INT && !tref_isk(stop)) {
 381     /* Constant step: optimize overflow check to a range check for stop. */
 382     int32_t k = IR(tref_ref(step))->i;
 383     k = (int32_t)(dir ? 0x7fffffff : 0x80000000) - k;
 384     emitir(IRTGI(dir ? IR_LE : IR_GE), stop, lj_ir_kint(J, k));
 385   }
 386 }
 387 
 388 /* Record a FORL instruction. */
 389 static void rec_for_loop(jit_State *J, const BCIns *fori, ScEvEntry *scev,
 390                          int init)
 391 {
 392   BCReg ra = bc_a(*fori);
 393   cTValue *tv = &J->L->base[ra];
 394   TRef idx = J->base[ra+FORL_IDX];
 395   IRType t = idx ? tref_type(idx) :
 396              (init || LJ_DUALNUM) ? lj_opt_narrow_forl(J, tv) : IRT_NUM;
 397   int mode = IRSLOAD_INHERIT +
 398     ((!LJ_DUALNUM || tvisint(tv) == (t == IRT_INT)) ? IRSLOAD_READONLY : 0);
 399   TRef stop = fori_arg(J, fori, ra+FORL_STOP, t, mode);
 400   TRef step = fori_arg(J, fori, ra+FORL_STEP, t, mode);
 401   int tc, dir = rec_for_direction(&tv[FORL_STEP]);
 402   lua_assert(bc_op(*fori) == BC_FORI || bc_op(*fori) == BC_JFORI);
 403   scev->t.irt = t;
 404   scev->dir = dir;
 405   scev->stop = tref_ref(stop);
 406   scev->step = tref_ref(step);
 407   rec_for_check(J, t, dir, stop, step, init);
 408   scev->start = tref_ref(find_kinit(J, fori, ra+FORL_IDX, IRT_INT));
 409   tc = (LJ_DUALNUM &&
 410         !(scev->start && irref_isk(scev->stop) && irref_isk(scev->step) &&
 411           tvisint(&tv[FORL_IDX]) == (t == IRT_INT))) ?
 412         IRSLOAD_TYPECHECK : 0;
 413   if (tc) {
 414     J->base[ra+FORL_STOP] = stop;
 415     J->base[ra+FORL_STEP] = step;
 416   }
 417   if (!idx)
 418     idx = fori_load(J, ra+FORL_IDX, t,
 419                     IRSLOAD_INHERIT + tc + (J->scev.start << 16));
 420   if (!init)
 421     J->base[ra+FORL_IDX] = idx = emitir(IRT(IR_ADD, t), idx, step);
 422   J->base[ra+FORL_EXT] = idx;
 423   scev->idx = tref_ref(idx);
 424   setmref(scev->pc, fori);
 425   J->maxslot = ra+FORL_EXT+1;
 426 }
 427 
 428 /* Record FORL/JFORL or FORI/JFORI. */
 429 static LoopEvent rec_for(jit_State *J, const BCIns *fori, int isforl)
 430 {
 431   BCReg ra = bc_a(*fori);
 432   TValue *tv = &J->L->base[ra];
 433   TRef *tr = &J->base[ra];
 434   IROp op;
 435   LoopEvent ev;
 436   TRef stop;
 437   IRType t;
 438   if (isforl) {  /* Handle FORL/JFORL opcodes. */
 439     TRef idx = tr[FORL_IDX];
 440     if (mref(J->scev.pc, const BCIns) == fori && tref_ref(idx) == J->scev.idx) {
 441       t = J->scev.t.irt;
 442       stop = J->scev.stop;
 443       idx = emitir(IRT(IR_ADD, t), idx, J->scev.step);
 444       tr[FORL_EXT] = tr[FORL_IDX] = idx;
 445     } else {
 446       ScEvEntry scev;
 447       rec_for_loop(J, fori, &scev, 0);
 448       t = scev.t.irt;
 449       stop = scev.stop;
 450     }
 451   } else {  /* Handle FORI/JFORI opcodes. */
 452     BCReg i;
 453     lj_meta_for(J->L, tv);
 454     t = (LJ_DUALNUM || tref_isint(tr[FORL_IDX])) ? lj_opt_narrow_forl(J, tv) :
 455                                                    IRT_NUM;
 456     for (i = FORL_IDX; i <= FORL_STEP; i++) {
 457       if (!tr[i]) sload(J, ra+i);
 458       lua_assert(tref_isnumber_str(tr[i]));
 459       if (tref_isstr(tr[i]))
 460         tr[i] = emitir(IRTG(IR_STRTO, IRT_NUM), tr[i], 0);
 461       if (t == IRT_INT) {
 462         if (!tref_isinteger(tr[i]))
 463           tr[i] = emitir(IRTGI(IR_CONV), tr[i], IRCONV_INT_NUM|IRCONV_CHECK);
 464       } else {
 465         if (!tref_isnum(tr[i]))
 466           tr[i] = emitir(IRTN(IR_CONV), tr[i], IRCONV_NUM_INT);
 467       }
 468     }
 469     tr[FORL_EXT] = tr[FORL_IDX];
 470     stop = tr[FORL_STOP];
 471     rec_for_check(J, t, rec_for_direction(&tv[FORL_STEP]),
 472                   stop, tr[FORL_STEP], 1);
 473   }
 474 
 475   ev = rec_for_iter(&op, tv, isforl);
 476   if (ev == LOOPEV_LEAVE) {
 477     J->maxslot = ra+FORL_EXT+1;
 478     J->pc = fori+1;
 479   } else {
 480     J->maxslot = ra;
 481     J->pc = fori+bc_j(*fori)+1;
 482   }
 483   lj_snap_add(J);
 484 
 485   emitir(IRTG(op, t), tr[FORL_IDX], stop);
 486 
 487   if (ev == LOOPEV_LEAVE) {
 488     J->maxslot = ra;
 489     J->pc = fori+bc_j(*fori)+1;
 490   } else {
 491     J->maxslot = ra+FORL_EXT+1;
 492     J->pc = fori+1;
 493   }
 494   J->needsnap = 1;
 495   return ev;
 496 }
 497 
 498 /* Record ITERL/JITERL. */
 499 static LoopEvent rec_iterl(jit_State *J, const BCIns iterins)
 500 {
 501   BCReg ra = bc_a(iterins);
 502   lua_assert(J->base[ra] != 0);
 503   if (!tref_isnil(J->base[ra])) {  /* Looping back? */
 504     J->base[ra-1] = J->base[ra];  /* Copy result of ITERC to control var. */
 505     J->maxslot = ra-1+bc_b(J->pc[-1]);
 506     J->pc += bc_j(iterins)+1;
 507     return LOOPEV_ENTER;
 508   } else {
 509     J->maxslot = ra-3;
 510     J->pc++;
 511     return LOOPEV_LEAVE;
 512   }
 513 }
 514 
 515 /* Record LOOP/JLOOP. Now, that was easy. */
 516 static LoopEvent rec_loop(jit_State *J, BCReg ra)
 517 {
 518   if (ra < J->maxslot) J->maxslot = ra;
 519   J->pc++;
 520   return LOOPEV_ENTER;
 521 }
 522 
 523 /* Check if a loop repeatedly failed to trace because it didn't loop back. */
 524 static int innerloopleft(jit_State *J, const BCIns *pc)
 525 {
 526   ptrdiff_t i;
 527   for (i = 0; i < PENALTY_SLOTS; i++)
 528     if (mref(J->penalty[i].pc, const BCIns) == pc) {
 529       if ((J->penalty[i].reason == LJ_TRERR_LLEAVE ||
 530            J->penalty[i].reason == LJ_TRERR_LINNER) &&
 531           J->penalty[i].val >= 2*PENALTY_MIN)
 532         return 1;
 533       break;
 534     }
 535   return 0;
 536 }
 537 
 538 /* Handle the case when an interpreted loop op is hit. */
 539 static void rec_loop_interp(jit_State *J, const BCIns *pc, LoopEvent ev)
 540 {
 541   if (J->parent == 0) {
 542     if (pc == J->startpc && J->framedepth + J->retdepth == 0) {
 543       /* Same loop? */
 544       if (ev == LOOPEV_LEAVE)  /* Must loop back to form a root trace. */
 545         lj_trace_err(J, LJ_TRERR_LLEAVE);
 546       rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Looping root trace. */
 547     } else if (ev != LOOPEV_LEAVE) {  /* Entering inner loop? */
 548       /* It's usually better to abort here and wait until the inner loop
 549       ** is traced. But if the inner loop repeatedly didn't loop back,
 550       ** this indicates a low trip count. In this case try unrolling
 551       ** an inner loop even in a root trace. But it's better to be a bit
 552       ** more conservative here and only do it for very short loops.
 553       */
 554       if (bc_j(*pc) != -1 && !innerloopleft(J, pc))
 555         lj_trace_err(J, LJ_TRERR_LINNER);  /* Root trace hit an inner loop. */
 556       if ((ev != LOOPEV_ENTERLO &&
 557            J->loopref && J->cur.nins - J->loopref > 24) || --J->loopunroll < 0)
 558         lj_trace_err(J, LJ_TRERR_LUNROLL);  /* Limit loop unrolling. */
 559       J->loopref = J->cur.nins;
 560     }
 561   } else if (ev != LOOPEV_LEAVE) {  /* Side trace enters an inner loop. */
 562     J->loopref = J->cur.nins;
 563     if (--J->loopunroll < 0)
 564       lj_trace_err(J, LJ_TRERR_LUNROLL);  /* Limit loop unrolling. */
 565   }  /* Side trace continues across a loop that's left or not entered. */
 566 }
 567 
 568 /* Handle the case when an already compiled loop op is hit. */
 569 static void rec_loop_jit(jit_State *J, TraceNo lnk, LoopEvent ev)
 570 {
 571   if (J->parent == 0) {  /* Root trace hit an inner loop. */
 572     /* Better let the inner loop spawn a side trace back here. */
 573     lj_trace_err(J, LJ_TRERR_LINNER);
 574   } else if (ev != LOOPEV_LEAVE) {  /* Side trace enters a compiled loop. */
 575     J->instunroll = 0;  /* Cannot continue across a compiled loop op. */
 576     if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
 577       rec_stop(J, LJ_TRLINK_LOOP, J->cur.traceno);  /* Form an extra loop. */
 578     else
 579       rec_stop(J, LJ_TRLINK_ROOT, lnk);  /* Link to the loop. */
 580   }  /* Side trace continues across a loop that's left or not entered. */
 581 }
 582 
 583 /* -- Record calls and returns -------------------------------------------- */
 584 
 585 /* Specialize to the runtime value of the called function or its prototype. */
 586 static TRef rec_call_specialize(jit_State *J, GCfunc *fn, TRef tr)
 587 {
 588   TRef kfunc;
 589   if (isluafunc(fn)) {
 590     GCproto *pt = funcproto(fn);
 591     /* Too many closures created? Probably not a monomorphic function. */
 592     if (pt->flags >= PROTO_CLC_POLY) {  /* Specialize to prototype instead. */
 593       TRef trpt = emitir(IRT(IR_FLOAD, IRT_P32), tr, IRFL_FUNC_PC);
 594       emitir(IRTG(IR_EQ, IRT_P32), trpt, lj_ir_kptr(J, proto_bc(pt)));
 595       (void)lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);  /* Prevent GC of proto. */
 596       return tr;
 597     }
 598   }
 599   /* Otherwise specialize to the function (closure) value itself. */
 600   kfunc = lj_ir_kfunc(J, fn);
 601   emitir(IRTG(IR_EQ, IRT_FUNC), tr, kfunc);
 602   return kfunc;
 603 }
 604 
 605 /* Record call setup. */
 606 static void rec_call_setup(jit_State *J, BCReg func, ptrdiff_t nargs)
 607 {
 608   RecordIndex ix;
 609   TValue *functv = &J->L->base[func];
 610   TRef *fbase = &J->base[func];
 611   ptrdiff_t i;
 612   for (i = 0; i <= nargs; i++)
 613     (void)getslot(J, func+i);  /* Ensure func and all args have a reference. */
 614   if (!tref_isfunc(fbase[0])) {  /* Resolve __call metamethod. */
 615     ix.tab = fbase[0];
 616     copyTV(J->L, &ix.tabv, functv);
 617     if (!lj_record_mm_lookup(J, &ix, MM_call) || !tref_isfunc(ix.mobj))
 618       lj_trace_err(J, LJ_TRERR_NOMM);
 619     for (i = ++nargs; i > 0; i--)  /* Shift arguments up. */
 620       fbase[i] = fbase[i-1];
 621     fbase[0] = ix.mobj;  /* Replace function. */
 622     functv = &ix.mobjv;
 623   }
 624   fbase[0] = TREF_FRAME | rec_call_specialize(J, funcV(functv), fbase[0]);
 625   J->maxslot = (BCReg)nargs;
 626 }
 627 
 628 /* Record call. */
 629 void lj_record_call(jit_State *J, BCReg func, ptrdiff_t nargs)
 630 {
 631   rec_call_setup(J, func, nargs);
 632   /* Bump frame. */
 633   J->framedepth++;
 634   J->base += func+1;
 635   J->baseslot += func+1;
 636   if (J->baseslot + J->maxslot >= LJ_MAX_JSLOTS)
 637     lj_trace_err(J, LJ_TRERR_STACKOV);
 638 }
 639 
 640 /* Record tail call. */
 641 void lj_record_tailcall(jit_State *J, BCReg func, ptrdiff_t nargs)
 642 {
 643   rec_call_setup(J, func, nargs);
 644   if (frame_isvarg(J->L->base - 1)) {
 645     BCReg cbase = (BCReg)frame_delta(J->L->base - 1);
 646     if (--J->framedepth < 0)
 647       lj_trace_err(J, LJ_TRERR_NYIRETL);
 648     J->baseslot -= (BCReg)cbase;
 649     J->base -= cbase;
 650     func += cbase;
 651   }
 652   /* Move func + args down. */
 653   memmove(&J->base[-1], &J->base[func], sizeof(TRef)*(J->maxslot+1));
 654   /* Note: the new TREF_FRAME is now at J->base[-1] (even for slot #0). */
 655   /* Tailcalls can form a loop, so count towards the loop unroll limit. */
 656   if (++J->tailcalled > J->loopunroll)
 657     lj_trace_err(J, LJ_TRERR_LUNROLL);
 658 }
 659 
 660 /* Check unroll limits for down-recursion. */
 661 static int check_downrec_unroll(jit_State *J, GCproto *pt)
 662 {
 663   IRRef ptref;
 664   for (ptref = J->chain[IR_KGC]; ptref; ptref = IR(ptref)->prev)
 665     if (ir_kgc(IR(ptref)) == obj2gco(pt)) {
 666       int count = 0;
 667       IRRef ref;
 668       for (ref = J->chain[IR_RETF]; ref; ref = IR(ref)->prev)
 669         if (IR(ref)->op1 == ptref)
 670           count++;
 671       if (count) {
 672         if (J->pc == J->startpc) {
 673           if (count + J->tailcalled > J->param[JIT_P_recunroll])
 674             return 1;
 675         } else {
 676           lj_trace_err(J, LJ_TRERR_DOWNREC);
 677         }
 678       }
 679     }
 680   return 0;
 681 }
 682 
 683 /* Record return. */
 684 void lj_record_ret(jit_State *J, BCReg rbase, ptrdiff_t gotresults)
 685 {
 686   TValue *frame = J->L->base - 1;
 687   ptrdiff_t i;
 688   for (i = 0; i < gotresults; i++)
 689     (void)getslot(J, rbase+i);  /* Ensure all results have a reference. */
 690   while (frame_ispcall(frame)) {  /* Immediately resolve pcall() returns. */
 691     BCReg cbase = (BCReg)frame_delta(frame);
 692     if (--J->framedepth <= 0)
 693       lj_trace_err(J, LJ_TRERR_NYIRETL);
 694     lua_assert(J->baseslot > 1);
 695     gotresults++;
 696     rbase += cbase;
 697     J->baseslot -= (BCReg)cbase;
 698     J->base -= cbase;
 699     J->base[--rbase] = TREF_TRUE;  /* Prepend true to results. */
 700     frame = frame_prevd(frame);
 701   }
 702   /* Return to lower frame via interpreter for unhandled cases. */
 703   if (J->framedepth == 0 && J->pt && bc_isret(bc_op(*J->pc)) &&
 704        (!frame_islua(frame) ||
 705         (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))))) {
 706     /* NYI: specialize to frame type and return directly, not via RET*. */
 707     for (i = 0; i < (ptrdiff_t)rbase; i++)
 708       J->base[i] = 0;  /* Purge dead slots. */
 709     J->maxslot = rbase + (BCReg)gotresults;
 710     rec_stop(J, LJ_TRLINK_RETURN, 0);  /* Return to interpreter. */
 711     return;
 712   }
 713   if (frame_isvarg(frame)) {
 714     BCReg cbase = (BCReg)frame_delta(frame);
 715     if (--J->framedepth < 0)  /* NYI: return of vararg func to lower frame. */
 716       lj_trace_err(J, LJ_TRERR_NYIRETL);
 717     lua_assert(J->baseslot > 1);
 718     rbase += cbase;
 719     J->baseslot -= (BCReg)cbase;
 720     J->base -= cbase;
 721     frame = frame_prevd(frame);
 722   }
 723   if (frame_islua(frame)) {  /* Return to Lua frame. */
 724     BCIns callins = *(frame_pc(frame)-1);
 725     ptrdiff_t nresults = bc_b(callins) ? (ptrdiff_t)bc_b(callins)-1 :gotresults;
 726     BCReg cbase = bc_a(callins);
 727     GCproto *pt = funcproto(frame_func(frame - (cbase+1)));
 728     if ((pt->flags & PROTO_NOJIT))
 729       lj_trace_err(J, LJ_TRERR_CJITOFF);
 730     if (J->framedepth == 0 && J->pt && frame == J->L->base - 1) {
 731       if (check_downrec_unroll(J, pt)) {
 732         J->maxslot = (BCReg)(rbase + gotresults);
 733         lj_snap_purge(J);
 734         rec_stop(J, LJ_TRLINK_DOWNREC, J->cur.traceno);  /* Down-recursion. */
 735         return;
 736       }
 737       lj_snap_add(J);
 738     }
 739     for (i = 0; i < nresults; i++)  /* Adjust results. */
 740       J->base[i-1] = i < gotresults ? J->base[rbase+i] : TREF_NIL;
 741     J->maxslot = cbase+(BCReg)nresults;
 742     if (J->framedepth > 0) {  /* Return to a frame that is part of the trace. */
 743       J->framedepth--;
 744       lua_assert(J->baseslot > cbase+1);
 745       J->baseslot -= cbase+1;
 746       J->base -= cbase+1;
 747     } else if (J->parent == 0 && !bc_isret(bc_op(J->cur.startins))) {
 748       /* Return to lower frame would leave the loop in a root trace. */
 749       lj_trace_err(J, LJ_TRERR_LLEAVE);
 750     } else if (J->needsnap) {  /* Tailcalled to ff with side-effects. */
 751       lj_trace_err(J, LJ_TRERR_NYIRETL);  /* No way to insert snapshot here. */
 752     } else {  /* Return to lower frame. Guard for the target we return to. */
 753       TRef trpt = lj_ir_kgc(J, obj2gco(pt), IRT_PROTO);
 754       TRef trpc = lj_ir_kptr(J, (void *)frame_pc(frame));
 755       emitir(IRTG(IR_RETF, IRT_P32), trpt, trpc);
 756       J->retdepth++;
 757       J->needsnap = 1;
 758       lua_assert(J->baseslot == 1);
 759       /* Shift result slots up and clear the slots of the new frame below. */
 760       memmove(J->base + cbase, J->base-1, sizeof(TRef)*nresults);
 761       memset(J->base-1, 0, sizeof(TRef)*(cbase+1));
 762     }
 763   } else if (frame_iscont(frame)) {  /* Return to continuation frame. */
 764     ASMFunction cont = frame_contf(frame);
 765     BCReg cbase = (BCReg)frame_delta(frame);
 766     if ((J->framedepth -= 2) < 0)
 767       lj_trace_err(J, LJ_TRERR_NYIRETL);
 768     J->baseslot -= (BCReg)cbase;
 769     J->base -= cbase;
 770     J->maxslot = cbase-2;
 771     if (cont == lj_cont_ra) {
 772       /* Copy result to destination slot. */
 773       BCReg dst = bc_a(*(frame_contpc(frame)-1));
 774       J->base[dst] = gotresults ? J->base[cbase+rbase] : TREF_NIL;
 775       if (dst >= J->maxslot) J->maxslot = dst+1;
 776     } else if (cont == lj_cont_nop) {
 777       /* Nothing to do here. */
 778     } else if (cont == lj_cont_cat) {
 779       lua_assert(0);
 780     } else {
 781       /* Result type already specialized. */
 782       lua_assert(cont == lj_cont_condf || cont == lj_cont_condt);
 783     }
 784   } else {
 785     lj_trace_err(J, LJ_TRERR_NYIRETL);  /* NYI: handle return to C frame. */
 786   }
 787   lua_assert(J->baseslot >= 1);
 788 }
 789 
 790 /* -- Metamethod handling ------------------------------------------------- */
 791 
 792 /* Prepare to record call to metamethod. */
 793 static BCReg rec_mm_prep(jit_State *J, ASMFunction cont)
 794 {
 795   BCReg s, top = curr_proto(J->L)->framesize;
 796   TRef trcont;
 797   setcont(&J->L->base[top], cont);
 798 #if LJ_64
 799   trcont = lj_ir_kptr(J, (void *)((int64_t)cont - (int64_t)lj_vm_asm_begin));
 800 #else
 801   trcont = lj_ir_kptr(J, (void *)cont);
 802 #endif
 803   J->base[top] = trcont | TREF_CONT;
 804   J->framedepth++;
 805   for (s = J->maxslot; s < top; s++)
 806     J->base[s] = 0;  /* Clear frame gap to avoid resurrecting previous refs. */
 807   return top+1;
 808 }
 809 
 810 /* Record metamethod lookup. */
 811 int lj_record_mm_lookup(jit_State *J, RecordIndex *ix, MMS mm)
 812 {
 813   RecordIndex mix;
 814   GCtab *mt;
 815   if (tref_istab(ix->tab)) {
 816     mt = tabref(tabV(&ix->tabv)->metatable);
 817     mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META);
 818   } else if (tref_isudata(ix->tab)) {
 819     int udtype = udataV(&ix->tabv)->udtype;
 820     mt = tabref(udataV(&ix->tabv)->metatable);
 821     /* The metatables of special userdata objects are treated as immutable. */
 822     if (udtype != UDTYPE_USERDATA) {
 823       cTValue *mo;
 824       if (LJ_HASFFI && udtype == UDTYPE_FFI_CLIB) {
 825         /* Specialize to the C library namespace object. */
 826         emitir(IRTG(IR_EQ, IRT_P32), ix->tab, lj_ir_kptr(J, udataV(&ix->tabv)));
 827       } else {
 828         /* Specialize to the type of userdata. */
 829         TRef tr = emitir(IRT(IR_FLOAD, IRT_U8), ix->tab, IRFL_UDATA_UDTYPE);
 830         emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, udtype));
 831       }
 832   immutable_mt:
 833       mo = lj_tab_getstr(mt, mmname_str(J2G(J), mm));
 834       if (!mo || tvisnil(mo))
 835         return 0;  /* No metamethod. */
 836       /* Treat metamethod or index table as immutable, too. */
 837       if (!(tvisfunc(mo) || tvistab(mo)))
 838         lj_trace_err(J, LJ_TRERR_BADTYPE);
 839       copyTV(J->L, &ix->mobjv, mo);
 840       ix->mobj = lj_ir_kgc(J, gcV(mo), tvisfunc(mo) ? IRT_FUNC : IRT_TAB);
 841       ix->mtv = mt;
 842       ix->mt = TREF_NIL;  /* Dummy value for comparison semantics. */
 843       return 1;  /* Got metamethod or index table. */
 844     }
 845     mix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_UDATA_META);
 846   } else {
 847     /* Specialize to base metatable. Must flush mcode in lua_setmetatable(). */
 848     mt = tabref(basemt_obj(J2G(J), &ix->tabv));
 849     if (mt == NULL) {
 850       ix->mt = TREF_NIL;
 851       return 0;  /* No metamethod. */
 852     }
 853     /* The cdata metatable is treated as immutable. */
 854     if (LJ_HASFFI && tref_iscdata(ix->tab)) goto immutable_mt;
 855     ix->mt = mix.tab = lj_ir_ktab(J, mt);
 856     goto nocheck;
 857   }
 858   ix->mt = mt ? mix.tab : TREF_NIL;
 859   emitir(IRTG(mt ? IR_NE : IR_EQ, IRT_TAB), mix.tab, lj_ir_knull(J, IRT_TAB));
 860 nocheck:
 861   if (mt) {
 862     GCstr *mmstr = mmname_str(J2G(J), mm);
 863     cTValue *mo = lj_tab_getstr(mt, mmstr);
 864     if (mo && !tvisnil(mo))
 865       copyTV(J->L, &ix->mobjv, mo);
 866     ix->mtv = mt;
 867     settabV(J->L, &mix.tabv, mt);
 868     setstrV(J->L, &mix.keyv, mmstr);
 869     mix.key = lj_ir_kstr(J, mmstr);
 870     mix.val = 0;
 871     mix.idxchain = 0;
 872     ix->mobj = lj_record_idx(J, &mix);
 873     return !tref_isnil(ix->mobj);  /* 1 if metamethod found, 0 if not. */
 874   }
 875   return 0;  /* No metamethod. */
 876 }
 877 
 878 /* Record call to arithmetic metamethod. */
 879 static TRef rec_mm_arith(jit_State *J, RecordIndex *ix, MMS mm)
 880 {
 881   /* Set up metamethod call first to save ix->tab and ix->tabv. */
 882   BCReg func = rec_mm_prep(J, lj_cont_ra);
 883   TRef *base = J->base + func;
 884   TValue *basev = J->L->base + func;
 885   base[1] = ix->tab; base[2] = ix->key;
 886   copyTV(J->L, basev+1, &ix->tabv);
 887   copyTV(J->L, basev+2, &ix->keyv);
 888   if (!lj_record_mm_lookup(J, ix, mm)) {  /* Lookup mm on 1st operand. */
 889     if (mm != MM_unm) {
 890       ix->tab = ix->key;
 891       copyTV(J->L, &ix->tabv, &ix->keyv);
 892       if (lj_record_mm_lookup(J, ix, mm))  /* Lookup mm on 2nd operand. */
 893         goto ok;
 894     }
 895     lj_trace_err(J, LJ_TRERR_NOMM);
 896   }
 897 ok:
 898   base[0] = ix->mobj;
 899   copyTV(J->L, basev+0, &ix->mobjv);
 900   lj_record_call(J, func, 2);
 901   return 0;  /* No result yet. */
 902 }
 903 
 904 /* Record call to __len metamethod. */
 905 static TRef rec_mm_len(jit_State *J, TRef tr, TValue *tv)
 906 {
 907   RecordIndex ix;
 908   ix.tab = tr;
 909   copyTV(J->L, &ix.tabv, tv);
 910   if (lj_record_mm_lookup(J, &ix, MM_len)) {
 911     BCReg func = rec_mm_prep(J, lj_cont_ra);
 912     TRef *base = J->base + func;
 913     TValue *basev = J->L->base + func;
 914     base[0] = ix.mobj; copyTV(J->L, basev+0, &ix.mobjv);
 915     base[1] = tr; copyTV(J->L, basev+1, tv);
 916 #if LJ_52
 917     base[2] = tr; copyTV(J->L, basev+2, tv);
 918 #else
 919     base[2] = TREF_NIL; setnilV(basev+2);
 920 #endif
 921     lj_record_call(J, func, 2);
 922   } else {
 923     if (LJ_52 && tref_istab(tr))
 924       return lj_ir_call(J, IRCALL_lj_tab_len, tr);
 925     lj_trace_err(J, LJ_TRERR_NOMM);
 926   }
 927   return 0;  /* No result yet. */
 928 }
 929 
 930 /* Call a comparison metamethod. */
 931 static void rec_mm_callcomp(jit_State *J, RecordIndex *ix, int op)
 932 {
 933   BCReg func = rec_mm_prep(J, (op&1) ? lj_cont_condf : lj_cont_condt);
 934   TRef *base = J->base + func;
 935   TValue *tv = J->L->base + func;
 936   base[0] = ix->mobj; base[1] = ix->val; base[2] = ix->key;
 937   copyTV(J->L, tv+0, &ix->mobjv);
 938   copyTV(J->L, tv+1, &ix->valv);
 939   copyTV(J->L, tv+2, &ix->keyv);
 940   lj_record_call(J, func, 2);
 941 }
 942 
 943 /* Record call to equality comparison metamethod (for tab and udata only). */
 944 static void rec_mm_equal(jit_State *J, RecordIndex *ix, int op)
 945 {
 946   ix->tab = ix->val;
 947   copyTV(J->L, &ix->tabv, &ix->valv);
 948   if (lj_record_mm_lookup(J, ix, MM_eq)) {  /* Lookup mm on 1st operand. */
 949     cTValue *bv;
 950     TRef mo1 = ix->mobj;
 951     TValue mo1v;
 952     copyTV(J->L, &mo1v, &ix->mobjv);
 953     /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
 954     bv = &ix->keyv;
 955     if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) {
 956       TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META);
 957       emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
 958     } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) {
 959       TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META);
 960       emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
 961     } else {  /* Lookup metamethod on 2nd operand and compare both. */
 962       ix->tab = ix->key;
 963       copyTV(J->L, &ix->tabv, bv);
 964       if (!lj_record_mm_lookup(J, ix, MM_eq) ||
 965           lj_record_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv))
 966         return;
 967     }
 968     rec_mm_callcomp(J, ix, op);
 969   }
 970 }
 971 
 972 /* Record call to ordered comparison metamethods (for arbitrary objects). */
 973 static void rec_mm_comp(jit_State *J, RecordIndex *ix, int op)
 974 {
 975   ix->tab = ix->val;
 976   copyTV(J->L, &ix->tabv, &ix->valv);
 977   while (1) {
 978     MMS mm = (op & 2) ? MM_le : MM_lt;  /* Try __le + __lt or only __lt. */
 979 #if LJ_52
 980     if (!lj_record_mm_lookup(J, ix, mm)) {  /* Lookup mm on 1st operand. */
 981       ix->tab = ix->key;
 982       copyTV(J->L, &ix->tabv, &ix->keyv);
 983       if (!lj_record_mm_lookup(J, ix, mm))  /* Lookup mm on 2nd operand. */
 984         goto nomatch;
 985     }
 986     rec_mm_callcomp(J, ix, op);
 987     return;
 988 #else
 989     if (lj_record_mm_lookup(J, ix, mm)) {  /* Lookup mm on 1st operand. */
 990       cTValue *bv;
 991       TRef mo1 = ix->mobj;
 992       TValue mo1v;
 993       copyTV(J->L, &mo1v, &ix->mobjv);
 994       /* Avoid the 2nd lookup and the objcmp if the metatables are equal. */
 995       bv = &ix->keyv;
 996       if (tvistab(bv) && tabref(tabV(bv)->metatable) == ix->mtv) {
 997         TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_TAB_META);
 998         emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
 999       } else if (tvisudata(bv) && tabref(udataV(bv)->metatable) == ix->mtv) {
1000         TRef mt2 = emitir(IRT(IR_FLOAD, IRT_TAB), ix->key, IRFL_UDATA_META);
1001         emitir(IRTG(IR_EQ, IRT_TAB), mt2, ix->mt);
1002       } else {  /* Lookup metamethod on 2nd operand and compare both. */
1003         ix->tab = ix->key;
1004         copyTV(J->L, &ix->tabv, bv);
1005         if (!lj_record_mm_lookup(J, ix, mm) ||
1006             lj_record_objcmp(J, mo1, ix->mobj, &mo1v, &ix->mobjv))
1007           goto nomatch;
1008       }
1009       rec_mm_callcomp(J, ix, op);
1010       return;
1011     }
1012 #endif
1013   nomatch:
1014     /* Lookup failed. Retry with  __lt and swapped operands. */
1015     if (!(op & 2)) break;  /* Already at __lt. Interpreter will throw. */
1016     ix->tab = ix->key; ix->key = ix->val; ix->val = ix->tab;
1017     copyTV(J->L, &ix->tabv, &ix->keyv);
1018     copyTV(J->L, &ix->keyv, &ix->valv);
1019     copyTV(J->L, &ix->valv, &ix->tabv);
1020     op ^= 3;
1021   }
1022 }
1023 
1024 #if LJ_HASFFI
1025 /* Setup call to cdata comparison metamethod. */
1026 static void rec_mm_comp_cdata(jit_State *J, RecordIndex *ix, int op, MMS mm)
1027 {
1028   lj_snap_add(J);
1029   if (tref_iscdata(ix->val)) {
1030     ix->tab = ix->val;
1031     copyTV(J->L, &ix->tabv, &ix->valv);
1032   } else {
1033     lua_assert(tref_iscdata(ix->key));
1034     ix->tab = ix->key;
1035     copyTV(J->L, &ix->tabv, &ix->keyv);
1036   }
1037   lj_record_mm_lookup(J, ix, mm);
1038   rec_mm_callcomp(J, ix, op);
1039 }
1040 #endif
1041 
1042 /* -- Indexed access ------------------------------------------------------ */
1043 
1044 /* Record bounds-check. */
1045 static void rec_idx_abc(jit_State *J, TRef asizeref, TRef ikey, uint32_t asize)
1046 {
1047   /* Try to emit invariant bounds checks. */
1048   if ((J->flags & (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) ==
1049       (JIT_F_OPT_LOOP|JIT_F_OPT_ABC)) {
1050     IRRef ref = tref_ref(ikey);
1051     IRIns *ir = IR(ref);
1052     int32_t ofs = 0;
1053     IRRef ofsref = 0;
1054     /* Handle constant offsets. */
1055     if (ir->o == IR_ADD && irref_isk(ir->op2)) {
1056       ofsref = ir->op2;
1057       ofs = IR(ofsref)->i;
1058       ref = ir->op1;
1059       ir = IR(ref);
1060     }
1061     /* Got scalar evolution analysis results for this reference? */
1062     if (ref == J->scev.idx) {
1063       int32_t stop;
1064       lua_assert(irt_isint(J->scev.t) && ir->o == IR_SLOAD);
1065       stop = numberVint(&(J->L->base - J->baseslot)[ir->op1 + FORL_STOP]);
1066       /* Runtime value for stop of loop is within bounds? */
1067       if ((uint64_t)stop + ofs < (uint64_t)asize) {
1068         /* Emit invariant bounds check for stop. */
1069         emitir(IRTG(IR_ABC, IRT_P32), asizeref, ofs == 0 ? J->scev.stop :
1070                emitir(IRTI(IR_ADD), J->scev.stop, ofsref));
1071         /* Emit invariant bounds check for start, if not const or negative. */
1072         if (!(J->scev.dir && J->scev.start &&
1073               (int64_t)IR(J->scev.start)->i + ofs >= 0))
1074           emitir(IRTG(IR_ABC, IRT_P32), asizeref, ikey);
1075         return;
1076       }
1077     }
1078   }
1079   emitir(IRTGI(IR_ABC), asizeref, ikey);  /* Emit regular bounds check. */
1080 }
1081 
1082 /* Record indexed key lookup. */
1083 static TRef rec_idx_key(jit_State *J, RecordIndex *ix)
1084 {
1085   TRef key;
1086   GCtab *t = tabV(&ix->tabv);
1087   ix->oldv = lj_tab_get(J->L, t, &ix->keyv);  /* Lookup previous value. */
1088 
1089   /* Integer keys are looked up in the array part first. */
1090   key = ix->key;
1091   if (tref_isnumber(key)) {
1092     int32_t k = numberVint(&ix->keyv);
1093     if (!tvisint(&ix->keyv) && numV(&ix->keyv) != (lua_Number)k)
1094       k = LJ_MAX_ASIZE;
1095     if ((MSize)k < LJ_MAX_ASIZE) {  /* Potential array key? */
1096       TRef ikey = lj_opt_narrow_index(J, key);
1097       TRef asizeref = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
1098       if ((MSize)k < t->asize) {  /* Currently an array key? */
1099         TRef arrayref;
1100         rec_idx_abc(J, asizeref, ikey, t->asize);
1101         arrayref = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_ARRAY);
1102         return emitir(IRT(IR_AREF, IRT_P32), arrayref, ikey);
1103       } else {  /* Currently not in array (may be an array extension)? */
1104         emitir(IRTGI(IR_ULE), asizeref, ikey);  /* Inv. bounds check. */
1105         if (k == 0 && tref_isk(key))
1106           key = lj_ir_knum_zero(J);  /* Canonicalize 0 or +-0.0 to +0.0. */
1107         /* And continue with the hash lookup. */
1108       }
1109     } else if (!tref_isk(key)) {
1110       /* We can rule out const numbers which failed the integerness test
1111       ** above. But all other numbers are potential array keys.
1112       */
1113       if (t->asize == 0) {  /* True sparse tables have an empty array part. */
1114         /* Guard that the array part stays empty. */
1115         TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_ASIZE);
1116         emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0));
1117       } else {
1118         lj_trace_err(J, LJ_TRERR_NYITMIX);
1119       }
1120     }
1121   }
1122 
1123   /* Otherwise the key is located in the hash part. */
1124   if (t->hmask == 0) {  /* Shortcut for empty hash part. */
1125     /* Guard that the hash part stays empty. */
1126     TRef tmp = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
1127     emitir(IRTGI(IR_EQ), tmp, lj_ir_kint(J, 0));
1128     return lj_ir_kkptr(J, niltvg(J2G(J)));
1129   }
1130   if (tref_isinteger(key))  /* Hash keys are based on numbers, not ints. */
1131     key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
1132   if (tref_isk(key)) {
1133     /* Optimize lookup of constant hash keys. */
1134     MSize hslot = (MSize)((char *)ix->oldv - (char *)&noderef(t->node)[0].val);
1135     if (t->hmask > 0 && hslot <= t->hmask*(MSize)sizeof(Node) &&
1136         hslot <= 65535*(MSize)sizeof(Node)) {
1137       TRef node, kslot;
1138       TRef hm = emitir(IRTI(IR_FLOAD), ix->tab, IRFL_TAB_HMASK);
1139       emitir(IRTGI(IR_EQ), hm, lj_ir_kint(J, (int32_t)t->hmask));
1140       node = emitir(IRT(IR_FLOAD, IRT_P32), ix->tab, IRFL_TAB_NODE);
1141       kslot = lj_ir_kslot(J, key, hslot / sizeof(Node));
1142       return emitir(IRTG(IR_HREFK, IRT_P32), node, kslot);
1143     }
1144   }
1145   /* Fall back to a regular hash lookup. */
1146   return emitir(IRT(IR_HREF, IRT_P32), ix->tab, key);
1147 }
1148 
1149 /* Determine whether a key is NOT one of the fast metamethod names. */
1150 static int nommstr(jit_State *J, TRef key)
1151 {
1152   if (tref_isstr(key)) {
1153     if (tref_isk(key)) {
1154       GCstr *str = ir_kstr(IR(tref_ref(key)));
1155       uint32_t mm;
1156       for (mm = 0; mm <= MM_FAST; mm++)
1157         if (mmname_str(J2G(J), mm) == str)
1158           return 0;  /* MUST be one the fast metamethod names. */
1159     } else {
1160       return 0;  /* Variable string key MAY be a metamethod name. */
1161     }
1162   }
1163   return 1;  /* CANNOT be a metamethod name. */
1164 }
1165 
1166 /* Record indexed load/store. */
1167 TRef lj_record_idx(jit_State *J, RecordIndex *ix)
1168 {
1169   TRef xref;
1170   IROp xrefop, loadop;
1171   cTValue *oldv;
1172 
1173   while (!tref_istab(ix->tab)) { /* Handle non-table lookup. */
1174     /* Never call raw lj_record_idx() on non-table. */
1175     lua_assert(ix->idxchain != 0);
1176     if (!lj_record_mm_lookup(J, ix, ix->val ? MM_newindex : MM_index))
1177       lj_trace_err(J, LJ_TRERR_NOMM);
1178   handlemm:
1179     if (tref_isfunc(ix->mobj)) {  /* Handle metamethod call. */
1180       BCReg func = rec_mm_prep(J, ix->val ? lj_cont_nop : lj_cont_ra);
1181       TRef *base = J->base + func;
1182       TValue *tv = J->L->base + func;
1183       base[0] = ix->mobj; base[1] = ix->tab; base[2] = ix->key;
1184       setfuncV(J->L, tv+0, funcV(&ix->mobjv));
1185       copyTV(J->L, tv+1, &ix->tabv);
1186       copyTV(J->L, tv+2, &ix->keyv);
1187       if (ix->val) {
1188         base[3] = ix->val;
1189         copyTV(J->L, tv+3, &ix->valv);
1190         lj_record_call(J, func, 3);  /* mobj(tab, key, val) */
1191         return 0;
1192       } else {
1193         lj_record_call(J, func, 2);  /* res = mobj(tab, key) */
1194         return 0;  /* No result yet. */
1195       }
1196     }
1197     /* Otherwise retry lookup with metaobject. */
1198     ix->tab = ix->mobj;
1199     copyTV(J->L, &ix->tabv, &ix->mobjv);
1200     if (--ix->idxchain == 0)
1201       lj_trace_err(J, LJ_TRERR_IDXLOOP);
1202   }
1203 
1204   /* First catch nil and NaN keys for tables. */
1205   if (tvisnil(&ix->keyv) || (tvisnum(&ix->keyv) && tvisnan(&ix->keyv))) {
1206     if (ix->val)  /* Better fail early. */
1207       lj_trace_err(J, LJ_TRERR_STORENN);
1208     if (tref_isk(ix->key)) {
1209       if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
1210         goto handlemm;
1211       return TREF_NIL;
1212     }
1213   }
1214 
1215   /* Record the key lookup. */
1216   xref = rec_idx_key(J, ix);
1217   xrefop = IR(tref_ref(xref))->o;
1218   loadop = xrefop == IR_AREF ? IR_ALOAD : IR_HLOAD;
1219   /* The lj_meta_tset() inconsistency is gone, but better play safe. */
1220   oldv = xrefop == IR_KKPTR ? (cTValue *)ir_kptr(IR(tref_ref(xref))) : ix->oldv;
1221 
1222   if (ix->val == 0) {  /* Indexed load */
1223     IRType t = itype2irt(oldv);
1224     TRef res;
1225     if (oldv == niltvg(J2G(J))) {
1226       emitir(IRTG(IR_EQ, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1227       res = TREF_NIL;
1228     } else {
1229       res = emitir(IRTG(loadop, t), xref, 0);
1230     }
1231     if (t == IRT_NIL && ix->idxchain && lj_record_mm_lookup(J, ix, MM_index))
1232       goto handlemm;
1233     if (irtype_ispri(t)) res = TREF_PRI(t);  /* Canonicalize primitives. */
1234     return res;
1235   } else {  /* Indexed store. */
1236     GCtab *mt = tabref(tabV(&ix->tabv)->metatable);
1237     int keybarrier = tref_isgcv(ix->key) && !tref_isnil(ix->val);
1238     if (tvisnil(oldv)) {  /* Previous value was nil? */
1239       /* Need to duplicate the hasmm check for the early guards. */
1240       int hasmm = 0;
1241       if (ix->idxchain && mt) {
1242         cTValue *mo = lj_tab_getstr(mt, mmname_str(J2G(J), MM_newindex));
1243         hasmm = mo && !tvisnil(mo);
1244       }
1245       if (hasmm)
1246         emitir(IRTG(loadop, IRT_NIL), xref, 0);  /* Guard for nil value. */
1247       else if (xrefop == IR_HREF)
1248         emitir(IRTG(oldv == niltvg(J2G(J)) ? IR_EQ : IR_NE, IRT_P32),
1249                xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1250       if (ix->idxchain && lj_record_mm_lookup(J, ix, MM_newindex)) {
1251         lua_assert(hasmm);
1252         goto handlemm;
1253       }
1254       lua_assert(!hasmm);
1255       if (oldv == niltvg(J2G(J))) {  /* Need to insert a new key. */
1256         TRef key = ix->key;
1257         if (tref_isinteger(key))  /* NEWREF needs a TValue as a key. */
1258           key = emitir(IRTN(IR_CONV), key, IRCONV_NUM_INT);
1259         xref = emitir(IRT(IR_NEWREF, IRT_P32), ix->tab, key);
1260         keybarrier = 0;  /* NEWREF already takes care of the key barrier. */
1261       }
1262     } else if (!lj_opt_fwd_wasnonnil(J, loadop, tref_ref(xref))) {
1263       /* Cannot derive that the previous value was non-nil, must do checks. */
1264       if (xrefop == IR_HREF)  /* Guard against store to niltv. */
1265         emitir(IRTG(IR_NE, IRT_P32), xref, lj_ir_kkptr(J, niltvg(J2G(J))));
1266       if (ix->idxchain) {  /* Metamethod lookup required? */
1267         /* A check for NULL metatable is cheaper (hoistable) than a load. */
1268         if (!mt) {
1269           TRef mtref = emitir(IRT(IR_FLOAD, IRT_TAB), ix->tab, IRFL_TAB_META);
1270           emitir(IRTG(IR_EQ, IRT_TAB), mtref, lj_ir_knull(J, IRT_TAB));
1271         } else {
1272           IRType t = itype2irt(oldv);
1273           emitir(IRTG(loadop, t), xref, 0);  /* Guard for non-nil value. */
1274         }
1275       }
1276     } else {
1277       keybarrier = 0;  /* Previous non-nil value kept the key alive. */
1278     }
1279     /* Convert int to number before storing. */
1280     if (!LJ_DUALNUM && tref_isinteger(ix->val))
1281       ix->val = emitir(IRTN(IR_CONV), ix->val, IRCONV_NUM_INT);
1282     emitir(IRT(loadop+IRDELTA_L2S, tref_type(ix->val)), xref, ix->val);
1283     if (keybarrier || tref_isgcv(ix->val))
1284       emitir(IRT(IR_TBAR, IRT_NIL), ix->tab, 0);
1285     /* Invalidate neg. metamethod cache for stores with certain string keys. */
1286     if (!nommstr(J, ix->key)) {
1287       TRef fref = emitir(IRT(IR_FREF, IRT_P32), ix->tab, IRFL_TAB_NOMM);
1288       emitir(IRT(IR_FSTORE, IRT_U8), fref, lj_ir_kint(J, 0));
1289     }
1290     J->needsnap = 1;
1291     return 0;
1292   }
1293 }
1294 
1295 /* -- Upvalue access ------------------------------------------------------ */
1296 
1297 /* Check whether upvalue is immutable and ok to constify. */
1298 static int rec_upvalue_constify(jit_State *J, GCupval *uvp)
1299 {
1300   if (uvp->immutable) {
1301     cTValue *o = uvval(uvp);
1302     /* Don't constify objects that may retain large amounts of memory. */
1303 #if LJ_HASFFI
1304     if (tviscdata(o)) {
1305       GCcdata *cd = cdataV(o);
1306       if (!cdataisv(cd) && !(cd->marked & LJ_GC_CDATA_FIN)) {
1307         CType *ct = ctype_raw(ctype_ctsG(J2G(J)), cd->ctypeid);
1308         if (!ctype_hassize(ct->info) || ct->size <= 16)
1309           return 1;
1310       }
1311       return 0;
1312     }
1313 #else
1314     UNUSED(J);
1315 #endif
1316     if (!(tvistab(o) || tvisudata(o) || tvisthread(o)))
1317       return 1;
1318   }
1319   return 0;
1320 }
1321 
1322 /* Record upvalue load/store. */
1323 static TRef rec_upvalue(jit_State *J, uint32_t uv, TRef val)
1324 {
1325   GCupval *uvp = &gcref(J->fn->l.uvptr[uv])->uv;
1326   TRef fn = getcurrf(J);
1327   IRRef uref;
1328   int needbarrier = 0;
1329   if (rec_upvalue_constify(J, uvp)) {  /* Try to constify immutable upvalue. */
1330     TRef tr, kfunc;
1331     lua_assert(val == 0);
1332     if (!tref_isk(fn)) {  /* Late specialization of current function. */
1333       if (J->pt->flags >= PROTO_CLC_POLY)
1334         goto noconstify;
1335       kfunc = lj_ir_kfunc(J, J->fn);
1336       emitir(IRTG(IR_EQ, IRT_FUNC), fn, kfunc);
1337       J->base[-1] = TREF_FRAME | kfunc;
1338       fn = kfunc;
1339     }
1340     tr = lj_record_constify(J, uvval(uvp));
1341     if (tr)
1342       return tr;
1343   }
1344 noconstify:
1345   /* Note: this effectively limits LJ_MAX_UPVAL to 127. */
1346   uv = (uv << 8) | (hashrot(uvp->dhash, uvp->dhash + HASH_BIAS) & 0xff);
1347   if (!uvp->closed) {
1348     uref = tref_ref(emitir(IRTG(IR_UREFO, IRT_P32), fn, uv));
1349     /* In current stack? */
1350     if (uvval(uvp) >= tvref(J->L->stack) &&
1351         uvval(uvp) < tvref(J->L->maxstack)) {
1352       int32_t slot = (int32_t)(uvval(uvp) - (J->L->base - J->baseslot));
1353       if (slot >= 0) {  /* Aliases an SSA slot? */
1354         emitir(IRTG(IR_EQ, IRT_P32),
1355                REF_BASE,
1356                emitir(IRT(IR_ADD, IRT_P32), uref,
1357                       lj_ir_kint(J, (slot - 1) * -8)));
1358         slot -= (int32_t)J->baseslot;  /* Note: slot number may be negative! */
1359         if (val == 0) {
1360           return getslot(J, slot);
1361         } else {
1362           J->base[slot] = val;
1363           if (slot >= (int32_t)J->maxslot) J->maxslot = (BCReg)(slot+1);
1364           return 0;
1365         }
1366       }
1367     }
1368     emitir(IRTG(IR_UGT, IRT_P32),
1369            emitir(IRT(IR_SUB, IRT_P32), uref, REF_BASE),
1370            lj_ir_kint(J, (J->baseslot + J->maxslot) * 8));
1371   } else {
1372     needbarrier = 1;
1373     uref = tref_ref(emitir(IRTG(IR_UREFC, IRT_P32), fn, uv));
1374   }
1375   if (val == 0) {  /* Upvalue load */
1376     IRType t = itype2irt(uvval(uvp));
1377     TRef res = emitir(IRTG(IR_ULOAD, t), uref, 0);
1378     if (irtype_ispri(t)) res = TREF_PRI(t);  /* Canonicalize primitive refs. */
1379     return res;
1380   } else {  /* Upvalue store. */
1381     /* Convert int to number before storing. */
1382     if (!LJ_DUALNUM && tref_isinteger(val))
1383       val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
1384     emitir(IRT(IR_USTORE, tref_type(val)), uref, val);
1385     if (needbarrier && tref_isgcv(val))
1386       emitir(IRT(IR_OBAR, IRT_NIL), uref, val);
1387     J->needsnap = 1;
1388     return 0;
1389   }
1390 }
1391 
1392 /* -- Record calls to Lua functions --------------------------------------- */
1393 
1394 /* Check unroll limits for calls. */
1395 static void check_call_unroll(jit_State *J, TraceNo lnk)
1396 {
1397   cTValue *frame = J->L->base - 1;
1398   void *pc = mref(frame_func(frame)->l.pc, void);
1399   int32_t depth = J->framedepth;
1400   int32_t count = 0;
1401   if ((J->pt->flags & PROTO_VARARG)) depth--;  /* Vararg frame still missing. */
1402   for (; depth > 0; depth--) {  /* Count frames with same prototype. */
1403     if (frame_iscont(frame)) depth--;
1404     frame = frame_prev(frame);
1405     if (mref(frame_func(frame)->l.pc, void) == pc)
1406       count++;
1407   }
1408   if (J->pc == J->startpc) {
1409     if (count + J->tailcalled > J->param[JIT_P_recunroll]) {
1410       J->pc++;
1411       if (J->framedepth + J->retdepth == 0)
1412         rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno);  /* Tail-recursion. */
1413       else
1414         rec_stop(J, LJ_TRLINK_UPREC, J->cur.traceno);  /* Up-recursion. */
1415     }
1416   } else {
1417     if (count > J->param[JIT_P_callunroll]) {
1418       if (lnk) {  /* Possible tail- or up-recursion. */
1419         lj_trace_flush(J, lnk);  /* Flush trace that only returns. */
1420         /* Set a small, pseudo-random hotcount for a quick retry of JFUNC*. */
1421         hotcount_set(J2GG(J), J->pc+1, LJ_PRNG_BITS(J, 4));
1422       }
1423       lj_trace_err(J, LJ_TRERR_CUNROLL);
1424     }
1425   }
1426 }
1427 
1428 /* Record Lua function setup. */
1429 static void rec_func_setup(jit_State *J)
1430 {
1431   GCproto *pt = J->pt;
1432   BCReg s, numparams = pt->numparams;
1433   if ((pt->flags & PROTO_NOJIT))
1434     lj_trace_err(J, LJ_TRERR_CJITOFF);
1435   if (J->baseslot + pt->framesize >= LJ_MAX_JSLOTS)
1436     lj_trace_err(J, LJ_TRERR_STACKOV);
1437   /* Fill up missing parameters with nil. */
1438   for (s = J->maxslot; s < numparams; s++)
1439     J->base[s] = TREF_NIL;
1440   /* The remaining slots should never be read before they are written. */
1441   J->maxslot = numparams;
1442 }
1443 
1444 /* Record Lua vararg function setup. */
1445 static void rec_func_vararg(jit_State *J)
1446 {
1447   GCproto *pt = J->pt;
1448   BCReg s, fixargs, vframe = J->maxslot+1;
1449   lua_assert((pt->flags & PROTO_VARARG));
1450   if (J->baseslot + vframe + pt->framesize >= LJ_MAX_JSLOTS)
1451     lj_trace_err(J, LJ_TRERR_STACKOV);
1452   J->base[vframe-1] = J->base[-1];  /* Copy function up. */
1453   /* Copy fixarg slots up and set their original slots to nil. */
1454   fixargs = pt->numparams < J->maxslot ? pt->numparams : J->maxslot;
1455   for (s = 0; s < fixargs; s++) {
1456     J->base[vframe+s] = J->base[s];
1457     J->base[s] = TREF_NIL;
1458   }
1459   J->maxslot = fixargs;
1460   J->framedepth++;
1461   J->base += vframe;
1462   J->baseslot += vframe;
1463 }
1464 
1465 /* Record entry to a Lua function. */
1466 static void rec_func_lua(jit_State *J)
1467 {
1468   rec_func_setup(J);
1469   check_call_unroll(J, 0);
1470 }
1471 
1472 /* Record entry to an already compiled function. */
1473 static void rec_func_jit(jit_State *J, TraceNo lnk)
1474 {
1475   GCtrace *T;
1476   rec_func_setup(J);
1477   T = traceref(J, lnk);
1478   if (T->linktype == LJ_TRLINK_RETURN) {  /* Trace returns to interpreter? */
1479     check_call_unroll(J, lnk);
1480     /* Temporarily unpatch JFUNC* to continue recording across function. */
1481     J->patchins = *J->pc;
1482     J->patchpc = (BCIns *)J->pc;
1483     *J->patchpc = T->startins;
1484     return;
1485   }
1486   J->instunroll = 0;  /* Cannot continue across a compiled function. */
1487   if (J->pc == J->startpc && J->framedepth + J->retdepth == 0)
1488     rec_stop(J, LJ_TRLINK_TAILREC, J->cur.traceno);  /* Extra tail-recursion. */
1489   else
1490     rec_stop(J, LJ_TRLINK_ROOT, lnk);  /* Link to the function. */
1491 }
1492 
1493 /* -- Vararg handling ----------------------------------------------------- */
1494 
1495 /* Detect y = select(x, ...) idiom. */
1496 static int select_detect(jit_State *J)
1497 {
1498   BCIns ins = J->pc[1];
1499   if (bc_op(ins) == BC_CALLM && bc_b(ins) == 2 && bc_c(ins) == 1) {
1500     cTValue *func = &J->L->base[bc_a(ins)];
1501     if (tvisfunc(func) && funcV(func)->c.ffid == FF_select) {
1502       TRef kfunc = lj_ir_kfunc(J, funcV(func));
1503       emitir(IRTG(IR_EQ, IRT_FUNC), getslot(J, bc_a(ins)), kfunc);
1504       return 1;
1505     }
1506   }
1507   return 0;
1508 }
1509 
1510 /* Record vararg instruction. */
1511 static void rec_varg(jit_State *J, BCReg dst, ptrdiff_t nresults)
1512 {
1513   int32_t numparams = J->pt->numparams;
1514   ptrdiff_t nvararg = frame_delta(J->L->base-1) - numparams - 1;
1515   lua_assert(frame_isvarg(J->L->base-1));
1516   if (J->framedepth > 0) {  /* Simple case: varargs defined on-trace. */
1517     ptrdiff_t i;
1518     if (nvararg < 0) nvararg = 0;
1519     if (nresults == -1) {
1520       nresults = nvararg;
1521       J->maxslot = dst + (BCReg)nvararg;
1522     } else if (dst + nresults > J->maxslot) {
1523       J->maxslot = dst + (BCReg)nresults;
1524     }
1525     for (i = 0; i < nresults; i++)
1526       J->base[dst+i] = i < nvararg ? getslot(J, i - nvararg - 1) : TREF_NIL;
1527   } else {  /* Unknown number of varargs passed to trace. */
1528     TRef fr = emitir(IRTI(IR_SLOAD), 0, IRSLOAD_READONLY|IRSLOAD_FRAME);
1529     int32_t frofs = 8*(1+numparams)+FRAME_VARG;
1530     if (nresults >= 0) {  /* Known fixed number of results. */
1531       ptrdiff_t i;
1532       if (nvararg > 0) {
1533         ptrdiff_t nload = nvararg >= nresults ? nresults : nvararg;
1534         TRef vbase;
1535         if (nvararg >= nresults)
1536           emitir(IRTGI(IR_GE), fr, lj_ir_kint(J, frofs+8*(int32_t)nresults));
1537         else
1538           emitir(IRTGI(IR_EQ), fr, lj_ir_kint(J, frame_ftsz(J->L->base-1)));
1539         vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
1540         vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
1541         for (i = 0; i < nload; i++) {
1542           IRType t = itype2irt(&J->L->base[i-1-nvararg]);
1543           TRef aref = emitir(IRT(IR_AREF, IRT_P32),
1544                              vbase, lj_ir_kint(J, (int32_t)i));
1545           TRef tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
1546           if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
1547           J->base[dst+i] = tr;
1548         }
1549       } else {
1550         emitir(IRTGI(IR_LE), fr, lj_ir_kint(J, frofs));
1551         nvararg = 0;
1552       }
1553       for (i = nvararg; i < nresults; i++)
1554         J->base[dst+i] = TREF_NIL;
1555       if (dst + (BCReg)nresults > J->maxslot)
1556         J->maxslot = dst + (BCReg)nresults;
1557     } else if (select_detect(J)) {  /* y = select(x, ...) */
1558       TRef tridx = J->base[dst-1];
1559       TRef tr = TREF_NIL;
1560       ptrdiff_t idx = lj_ffrecord_select_mode(J, tridx, &J->L->base[dst-1]);
1561       if (idx < 0) goto nyivarg;
1562       if (idx != 0 && !tref_isinteger(tridx))
1563         tridx = emitir(IRTGI(IR_CONV), tridx, IRCONV_INT_NUM|IRCONV_INDEX);
1564       if (idx != 0 && tref_isk(tridx)) {
1565         emitir(IRTGI(idx <= nvararg ? IR_GE : IR_LT),
1566                fr, lj_ir_kint(J, frofs+8*(int32_t)idx));
1567         frofs -= 8;  /* Bias for 1-based index. */
1568       } else if (idx <= nvararg) {  /* Compute size. */
1569         TRef tmp = emitir(IRTI(IR_ADD), fr, lj_ir_kint(J, -frofs));
1570         if (numparams)
1571           emitir(IRTGI(IR_GE), tmp, lj_ir_kint(J, 0));
1572         tr = emitir(IRTI(IR_BSHR), tmp, lj_ir_kint(J, 3));
1573         if (idx != 0) {
1574           tridx = emitir(IRTI(IR_ADD), tridx, lj_ir_kint(J, -1));
1575           rec_idx_abc(J, tr, tridx, (uint32_t)nvararg);
1576         }
1577       } else {
1578         TRef tmp = lj_ir_kint(J, frofs);
1579         if (idx != 0) {
1580           TRef tmp2 = emitir(IRTI(IR_BSHL), tridx, lj_ir_kint(J, 3));
1581           tmp = emitir(IRTI(IR_ADD), tmp2, tmp);
1582         } else {
1583           tr = lj_ir_kint(J, 0);
1584         }
1585         emitir(IRTGI(IR_LT), fr, tmp);
1586       }
1587       if (idx != 0 && idx <= nvararg) {
1588         IRType t;
1589         TRef aref, vbase = emitir(IRTI(IR_SUB), REF_BASE, fr);
1590         vbase = emitir(IRT(IR_ADD, IRT_P32), vbase, lj_ir_kint(J, frofs-8));
1591         t = itype2irt(&J->L->base[idx-2-nvararg]);
1592         aref = emitir(IRT(IR_AREF, IRT_P32), vbase, tridx);
1593         tr = emitir(IRTG(IR_VLOAD, t), aref, 0);
1594         if (irtype_ispri(t)) tr = TREF_PRI(t);  /* Canonicalize primitives. */
1595       }
1596       J->base[dst-2] = tr;
1597       J->maxslot = dst-1;
1598       J->bcskip = 2;  /* Skip CALLM + select. */
1599     } else {
1600     nyivarg:
1601       setintV(&J->errinfo, BC_VARG);
1602       lj_trace_err_info(J, LJ_TRERR_NYIBC);
1603     }
1604   }
1605 }
1606 
1607 /* -- Record allocations -------------------------------------------------- */
1608 
1609 static TRef rec_tnew(jit_State *J, uint32_t ah)
1610 {
1611   uint32_t asize = ah & 0x7ff;
1612   uint32_t hbits = ah >> 11;
1613   if (asize == 0x7ff) asize = 0x801;
1614   return emitir(IRTG(IR_TNEW, IRT_TAB), asize, hbits);
1615 }
1616 
1617 /* -- Record bytecode ops ------------------------------------------------- */
1618 
1619 /* Prepare for comparison. */
1620 static void rec_comp_prep(jit_State *J)
1621 {
1622   /* Prevent merging with snapshot #0 (GC exit) since we fixup the PC. */
1623   if (J->cur.nsnap == 1 && J->cur.snap[0].ref == J->cur.nins)
1624     emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
1625   lj_snap_add(J);
1626 }
1627 
1628 /* Fixup comparison. */
1629 static void rec_comp_fixup(jit_State *J, const BCIns *pc, int cond)
1630 {
1631   BCIns jmpins = pc[1];
1632   const BCIns *npc = pc + 2 + (cond ? bc_j(jmpins) : 0);
1633   SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1634   /* Set PC to opposite target to avoid re-recording the comp. in side trace. */
1635   J->cur.snapmap[snap->mapofs + snap->nent] = SNAP_MKPC(npc);
1636   J->needsnap = 1;
1637   if (bc_a(jmpins) < J->maxslot) J->maxslot = bc_a(jmpins);
1638   lj_snap_shrink(J);  /* Shrink last snapshot if possible. */
1639 }
1640 
1641 /* Record the next bytecode instruction (_before_ it's executed). */
1642 void lj_record_ins(jit_State *J)
1643 {
1644   cTValue *lbase;
1645   RecordIndex ix;
1646   const BCIns *pc;
1647   BCIns ins;
1648   BCOp op;
1649   TRef ra, rb, rc;
1650 
1651   /* Perform post-processing action before recording the next instruction. */
1652   if (LJ_UNLIKELY(J->postproc != LJ_POST_NONE)) {
1653     switch (J->postproc) {
1654     case LJ_POST_FIXCOMP:  /* Fixup comparison. */
1655       pc = frame_pc(&J2G(J)->tmptv);
1656       rec_comp_fixup(J, pc, (!tvistruecond(&J2G(J)->tmptv2) ^ (bc_op(*pc)&1)));
1657       /* fallthrough */
1658     case LJ_POST_FIXGUARD:  /* Fixup and emit pending guard. */
1659     case LJ_POST_FIXGUARDSNAP:  /* Fixup and emit pending guard and snapshot. */
1660       if (!tvistruecond(&J2G(J)->tmptv2)) {
1661         J->fold.ins.o ^= 1;  /* Flip guard to opposite. */
1662         if (J->postproc == LJ_POST_FIXGUARDSNAP) {
1663           SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
1664           J->cur.snapmap[snap->mapofs+snap->nent-1]--;  /* False -> true. */
1665         }
1666       }
1667       lj_opt_fold(J);  /* Emit pending guard. */
1668       /* fallthrough */
1669     case LJ_POST_FIXBOOL:
1670       if (!tvistruecond(&J2G(J)->tmptv2)) {
1671         BCReg s;
1672         TValue *tv = J->L->base;
1673         for (s = 0; s < J->maxslot; s++)  /* Fixup stack slot (if any). */
1674           if (J->base[s] == TREF_TRUE && tvisfalse(&tv[s])) {
1675             J->base[s] = TREF_FALSE;
1676             break;
1677           }
1678       }
1679       break;
1680     case LJ_POST_FIXCONST:
1681       {
1682         BCReg s;
1683         TValue *tv = J->L->base;
1684         for (s = 0; s < J->maxslot; s++)  /* Constify stack slots (if any). */
1685           if (J->base[s] == TREF_NIL && !tvisnil(&tv[s]))
1686             J->base[s] = lj_record_constify(J, &tv[s]);
1687       }
1688       break;
1689     case LJ_POST_FFRETRY:  /* Suppress recording of retried fast function. */
1690       if (bc_op(*J->pc) >= BC__MAX)
1691         return;
1692       break;
1693     default: lua_assert(0); break;
1694     }
1695     J->postproc = LJ_POST_NONE;
1696   }
1697 
1698   /* Need snapshot before recording next bytecode (e.g. after a store). */
1699   if (J->needsnap) {
1700     J->needsnap = 0;
1701     lj_snap_purge(J);
1702     lj_snap_add(J);
1703     J->mergesnap = 1;
1704   }
1705 
1706   /* Skip some bytecodes. */
1707   if (LJ_UNLIKELY(J->bcskip > 0)) {
1708     J->bcskip--;
1709     return;
1710   }
1711 
1712   /* Record only closed loops for root traces. */
1713   pc = J->pc;
1714   if (J->framedepth == 0 &&
1715      (MSize)((char *)pc - (char *)J->bc_min) >= J->bc_extent)
1716     lj_trace_err(J, LJ_TRERR_LLEAVE);
1717 
1718 #ifdef LUA_USE_ASSERT
1719   rec_check_slots(J);
1720   rec_check_ir(J);
1721 #endif
1722 
1723   /* Keep a copy of the runtime values of var/num/str operands. */
1724 #define rav     (&ix.valv)
1725 #define rbv     (&ix.tabv)
1726 #define rcv     (&ix.keyv)
1727 
1728   lbase = J->L->base;
1729   ins = *pc;
1730   op = bc_op(ins);
1731   ra = bc_a(ins);
1732   ix.val = 0;
1733   switch (bcmode_a(op)) {
1734   case BCMvar:
1735     copyTV(J->L, rav, &lbase[ra]); ix.val = ra = getslot(J, ra); break;
1736   default: break;  /* Handled later. */
1737   }
1738   rb = bc_b(ins);
1739   rc = bc_c(ins);
1740   switch (bcmode_b(op)) {
1741   case BCMnone: rb = 0; rc = bc_d(ins); break;  /* Upgrade rc to 'rd'. */
1742   case BCMvar:
1743     copyTV(J->L, rbv, &lbase[rb]); ix.tab = rb = getslot(J, rb); break;
1744   default: break;  /* Handled later. */
1745   }
1746   switch (bcmode_c(op)) {
1747   case BCMvar:
1748     copyTV(J->L, rcv, &lbase[rc]); ix.key = rc = getslot(J, rc); break;
1749   case BCMpri: setitype(rcv, ~rc); ix.key = rc = TREF_PRI(IRT_NIL+rc); break;
1750   case BCMnum: { cTValue *tv = proto_knumtv(J->pt, rc);
1751     copyTV(J->L, rcv, tv); ix.key = rc = tvisint(tv) ? lj_ir_kint(J, intV(tv)) :
1752     lj_ir_knumint(J, numV(tv)); } break;
1753   case BCMstr: { GCstr *s = gco2str(proto_kgc(J->pt, ~(ptrdiff_t)rc));
1754     setstrV(J->L, rcv, s); ix.key = rc = lj_ir_kstr(J, s); } break;
1755   default: break;  /* Handled later. */
1756   }
1757 
1758   switch (op) {
1759 
1760   /* -- Comparison ops ---------------------------------------------------- */
1761 
1762   case BC_ISLT: case BC_ISGE: case BC_ISLE: case BC_ISGT:
1763 #if LJ_HASFFI
1764     if (tref_iscdata(ra) || tref_iscdata(rc)) {
1765       rec_mm_comp_cdata(J, &ix, op, ((int)op & 2) ? MM_le : MM_lt);
1766       break;
1767     }
1768 #endif
1769     /* Emit nothing for two numeric or string consts. */
1770     if (!(tref_isk2(ra,rc) && tref_isnumber_str(ra) && tref_isnumber_str(rc))) {
1771       IRType ta = tref_isinteger(ra) ? IRT_INT : tref_type(ra);
1772       IRType tc = tref_isinteger(rc) ? IRT_INT : tref_type(rc);
1773       int irop;
1774       if (ta != tc) {
1775         /* Widen mixed number/int comparisons to number/number comparison. */
1776         if (ta == IRT_INT && tc == IRT_NUM) {
1777           ra = emitir(IRTN(IR_CONV), ra, IRCONV_NUM_INT);
1778           ta = IRT_NUM;
1779         } else if (ta == IRT_NUM && tc == IRT_INT) {
1780           rc = emitir(IRTN(IR_CONV), rc, IRCONV_NUM_INT);
1781         } else if (LJ_52) {
1782           ta = IRT_NIL;  /* Force metamethod for different types. */
1783         } else if (!((ta == IRT_FALSE || ta == IRT_TRUE) &&
1784                      (tc == IRT_FALSE || tc == IRT_TRUE))) {
1785           break;  /* Interpreter will throw for two different types. */
1786         }
1787       }
1788       rec_comp_prep(J);
1789       irop = (int)op - (int)BC_ISLT + (int)IR_LT;
1790       if (ta == IRT_NUM) {
1791         if ((irop & 1)) irop ^= 4;  /* ISGE/ISGT are unordered. */
1792         if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop))
1793           irop ^= 5;
1794       } else if (ta == IRT_INT) {
1795         if (!lj_ir_numcmp(numberVnum(rav), numberVnum(rcv), (IROp)irop))
1796           irop ^= 1;
1797       } else if (ta == IRT_STR) {
1798         if (!lj_ir_strcmp(strV(rav), strV(rcv), (IROp)irop)) irop ^= 1;
1799         ra = lj_ir_call(J, IRCALL_lj_str_cmp, ra, rc);
1800         rc = lj_ir_kint(J, 0);
1801         ta = IRT_INT;
1802       } else {
1803         rec_mm_comp(J, &ix, (int)op);
1804         break;
1805       }
1806       emitir(IRTG(irop, ta), ra, rc);
1807       rec_comp_fixup(J, J->pc, ((int)op ^ irop) & 1);
1808     }
1809     break;
1810 
1811   case BC_ISEQV: case BC_ISNEV:
1812   case BC_ISEQS: case BC_ISNES:
1813   case BC_ISEQN: case BC_ISNEN:
1814   case BC_ISEQP: case BC_ISNEP:
1815 #if LJ_HASFFI
1816     if (tref_iscdata(ra) || tref_iscdata(rc)) {
1817       rec_mm_comp_cdata(J, &ix, op, MM_eq);
1818       break;
1819     }
1820 #endif
1821     /* Emit nothing for two non-table, non-udata consts. */
1822     if (!(tref_isk2(ra, rc) && !(tref_istab(ra) || tref_isudata(ra)))) {
1823       int diff;
1824       rec_comp_prep(J);
1825       diff = lj_record_objcmp(J, ra, rc, rav, rcv);
1826       if (diff == 2 || !(tref_istab(ra) || tref_isudata(ra)))
1827         rec_comp_fixup(J, J->pc, ((int)op & 1) == !diff);
1828       else if (diff == 1)  /* Only check __eq if different, but same type. */
1829         rec_mm_equal(J, &ix, (int)op);
1830     }
1831     break;
1832 
1833   /* -- Unary test and copy ops ------------------------------------------- */
1834 
1835   case BC_ISTC: case BC_ISFC:
1836     if ((op & 1) == tref_istruecond(rc))
1837       rc = 0;  /* Don't store if condition is not true. */
1838     /* fallthrough */
1839   case BC_IST: case BC_ISF:  /* Type specialization suffices. */
1840     if (bc_a(pc[1]) < J->maxslot)
1841       J->maxslot = bc_a(pc[1]);  /* Shrink used slots. */
1842     break;
1843 
1844   /* -- Unary ops --------------------------------------------------------- */
1845 
1846   case BC_NOT:
1847     /* Type specialization already forces const result. */
1848     rc = tref_istruecond(rc) ? TREF_FALSE : TREF_TRUE;
1849     break;
1850 
1851   case BC_LEN:
1852     if (tref_isstr(rc))
1853       rc = emitir(IRTI(IR_FLOAD), rc, IRFL_STR_LEN);
1854     else if (!LJ_52 && tref_istab(rc))
1855       rc = lj_ir_call(J, IRCALL_lj_tab_len, rc);
1856     else
1857       rc = rec_mm_len(J, rc, rcv);
1858     break;
1859 
1860   /* -- Arithmetic ops ---------------------------------------------------- */
1861 
1862   case BC_UNM:
1863     if (tref_isnumber_str(rc)) {
1864       rc = lj_opt_narrow_unm(J, rc, rcv);
1865     } else {
1866       ix.tab = rc;
1867       copyTV(J->L, &ix.tabv, rcv);
1868       rc = rec_mm_arith(J, &ix, MM_unm);
1869     }
1870     break;
1871 
1872   case BC_ADDNV: case BC_SUBNV: case BC_MULNV: case BC_DIVNV: case BC_MODNV:
1873     /* Swap rb/rc and rbv/rcv. rav is temp. */
1874     ix.tab = rc; ix.key = rc = rb; rb = ix.tab;
1875     copyTV(J->L, rav, rbv);
1876     copyTV(J->L, rbv, rcv);
1877     copyTV(J->L, rcv, rav);
1878     if (op == BC_MODNV)
1879       goto recmod;
1880     /* fallthrough */
1881   case BC_ADDVN: case BC_SUBVN: case BC_MULVN: case BC_DIVVN:
1882   case BC_ADDVV: case BC_SUBVV: case BC_MULVV: case BC_DIVVV: {
1883     MMS mm = bcmode_mm(op);
1884     if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
1885       rc = lj_opt_narrow_arith(J, rb, rc, rbv, rcv,
1886                                (int)mm - (int)MM_add + (int)IR_ADD);
1887     else
1888       rc = rec_mm_arith(J, &ix, mm);
1889     break;
1890     }
1891 
1892   case BC_MODVN: case BC_MODVV:
1893   recmod:
1894     if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
1895       rc = lj_opt_narrow_mod(J, rb, rc, rbv, rcv);
1896     else
1897       rc = rec_mm_arith(J, &ix, MM_mod);
1898     break;
1899 
1900   case BC_POW:
1901     if (tref_isnumber_str(rb) && tref_isnumber_str(rc))
1902       rc = lj_opt_narrow_pow(J, rb, rc, rbv, rcv);
1903     else
1904       rc = rec_mm_arith(J, &ix, MM_pow);
1905     break;
1906 
1907   /* -- Constant and move ops --------------------------------------------- */
1908 
1909   case BC_MOV:
1910     /* Clear gap of method call to avoid resurrecting previous refs. */
1911     if (ra > J->maxslot) J->base[ra-1] = 0;
1912     break;
1913   case BC_KSTR: case BC_KNUM: case BC_KPRI:
1914     break;
1915   case BC_KSHORT:
1916     rc = lj_ir_kint(J, (int32_t)(int16_t)rc);
1917     break;
1918   case BC_KNIL:
1919     while (ra <= rc)
1920       J->base[ra++] = TREF_NIL;
1921     if (rc >= J->maxslot) J->maxslot = rc+1;
1922     break;
1923 #if LJ_HASFFI
1924   case BC_KCDATA:
1925     rc = lj_ir_kgc(J, proto_kgc(J->pt, ~(ptrdiff_t)rc), IRT_CDATA);
1926     break;
1927 #endif
1928 
1929   /* -- Upvalue and function ops ------------------------------------------ */
1930 
1931   case BC_UGET:
1932     rc = rec_upvalue(J, rc, 0);
1933     break;
1934   case BC_USETV: case BC_USETS: case BC_USETN: case BC_USETP:
1935     rec_upvalue(J, ra, rc);
1936     break;
1937 
1938   /* -- Table ops --------------------------------------------------------- */
1939 
1940   case BC_GGET: case BC_GSET:
1941     settabV(J->L, &ix.tabv, tabref(J->fn->l.env));
1942     ix.tab = emitir(IRT(IR_FLOAD, IRT_TAB), getcurrf(J), IRFL_FUNC_ENV);
1943     ix.idxchain = LJ_MAX_IDXCHAIN;
1944     rc = lj_record_idx(J, &ix);
1945     break;
1946 
1947   case BC_TGETB: case BC_TSETB:
1948     setintV(&ix.keyv, (int32_t)rc);
1949     ix.key = lj_ir_kint(J, (int32_t)rc);
1950     /* fallthrough */
1951   case BC_TGETV: case BC_TGETS: case BC_TSETV: case BC_TSETS:
1952     ix.idxchain = LJ_MAX_IDXCHAIN;
1953     rc = lj_record_idx(J, &ix);
1954     break;
1955 
1956   case BC_TNEW:
1957     rc = rec_tnew(J, rc);
1958     break;
1959   case BC_TDUP:
1960     rc = emitir(IRTG(IR_TDUP, IRT_TAB),
1961                 lj_ir_ktab(J, gco2tab(proto_kgc(J->pt, ~(ptrdiff_t)rc))), 0);
1962     break;
1963 
1964   /* -- Calls and vararg handling ----------------------------------------- */
1965 
1966   case BC_ITERC:
1967     J->base[ra] = getslot(J, ra-3);
1968     J->base[ra+1] = getslot(J, ra-2);
1969     J->base[ra+2] = getslot(J, ra-1);
1970     { /* Do the actual copy now because lj_record_call needs the values. */
1971       TValue *b = &J->L->base[ra];
1972       copyTV(J->L, b, b-3);
1973       copyTV(J->L, b+1, b-2);
1974       copyTV(J->L, b+2, b-1);
1975     }
1976     lj_record_call(J, ra, (ptrdiff_t)rc-1);
1977     break;
1978 
1979   /* L->top is set to L->base+ra+rc+NARGS-1+1. See lj_dispatch_ins(). */
1980   case BC_CALLM:
1981     rc = (BCReg)(J->L->top - J->L->base) - ra;
1982     /* fallthrough */
1983   case BC_CALL:
1984     lj_record_call(J, ra, (ptrdiff_t)rc-1);
1985     break;
1986 
1987   case BC_CALLMT:
1988     rc = (BCReg)(J->L->top - J->L->base) - ra;
1989     /* fallthrough */
1990   case BC_CALLT:
1991     lj_record_tailcall(J, ra, (ptrdiff_t)rc-1);
1992     break;
1993 
1994   case BC_VARG:
1995     rec_varg(J, ra, (ptrdiff_t)rb-1);
1996     break;
1997 
1998   /* -- Returns ----------------------------------------------------------- */
1999 
2000   case BC_RETM:
2001     /* L->top is set to L->base+ra+rc+NRESULTS-1, see lj_dispatch_ins(). */
2002     rc = (BCReg)(J->L->top - J->L->base) - ra + 1;
2003     /* fallthrough */
2004   case BC_RET: case BC_RET0: case BC_RET1:
2005     lj_record_ret(J, ra, (ptrdiff_t)rc-1);
2006     break;
2007 
2008   /* -- Loops and branches ------------------------------------------------ */
2009 
2010   case BC_FORI:
2011     if (rec_for(J, pc, 0) != LOOPEV_LEAVE)
2012       J->loopref = J->cur.nins;
2013     break;
2014   case BC_JFORI:
2015     lua_assert(bc_op(pc[(ptrdiff_t)rc-BCBIAS_J]) == BC_JFORL);
2016     if (rec_for(J, pc, 0) != LOOPEV_LEAVE)  /* Link to existing loop. */
2017       rec_stop(J, LJ_TRLINK_ROOT, bc_d(pc[(ptrdiff_t)rc-BCBIAS_J]));
2018     /* Continue tracing if the loop is not entered. */
2019     break;
2020 
2021   case BC_FORL:
2022     rec_loop_interp(J, pc, rec_for(J, pc+((ptrdiff_t)rc-BCBIAS_J), 1));
2023     break;
2024   case BC_ITERL:
2025     rec_loop_interp(J, pc, rec_iterl(J, *pc));
2026     break;
2027   case BC_LOOP:
2028     rec_loop_interp(J, pc, rec_loop(J, ra));
2029     break;
2030 
2031   case BC_JFORL:
2032     rec_loop_jit(J, rc, rec_for(J, pc+bc_j(traceref(J, rc)->startins), 1));
2033     break;
2034   case BC_JITERL:
2035     rec_loop_jit(J, rc, rec_iterl(J, traceref(J, rc)->startins));
2036     break;
2037   case BC_JLOOP:
2038     rec_loop_jit(J, rc, rec_loop(J, ra));
2039     break;
2040 
2041   case BC_IFORL:
2042   case BC_IITERL:
2043   case BC_ILOOP:
2044   case BC_IFUNCF:
2045   case BC_IFUNCV:
2046     lj_trace_err(J, LJ_TRERR_BLACKL);
2047     break;
2048 
2049   case BC_JMP:
2050     if (ra < J->maxslot)
2051       J->maxslot = ra;  /* Shrink used slots. */
2052     break;
2053 
2054   /* -- Function headers -------------------------------------------------- */
2055 
2056   case BC_FUNCF:
2057     rec_func_lua(J);
2058     break;
2059   case BC_JFUNCF:
2060     rec_func_jit(J, rc);
2061     break;
2062 
2063   case BC_FUNCV:
2064     rec_func_vararg(J);
2065     rec_func_lua(J);
2066     break;
2067   case BC_JFUNCV:
2068     lua_assert(0);  /* Cannot happen. No hotcall counting for varag funcs. */
2069     break;
2070 
2071   case BC_FUNCC:
2072   case BC_FUNCCW:
2073     lj_ffrecord_func(J);
2074     break;
2075 
2076   default:
2077     if (op >= BC__MAX) {
2078       lj_ffrecord_func(J);
2079       break;
2080     }
2081     /* fallthrough */
2082   case BC_ITERN:
2083   case BC_ISNEXT:
2084   case BC_CAT:
2085   case BC_UCLO:
2086   case BC_FNEW:
2087   case BC_TSETM:
2088     setintV(&J->errinfo, (int32_t)op);
2089     lj_trace_err_info(J, LJ_TRERR_NYIBC);
2090     break;
2091   }
2092 
2093   /* rc == 0 if we have no result yet, e.g. pending __index metamethod call. */
2094   if (bcmode_a(op) == BCMdst && rc) {
2095     J->base[ra] = rc;
2096     if (ra >= J->maxslot) J->maxslot = ra+1;
2097   }
2098 
2099 #undef rav
2100 #undef rbv
2101 #undef rcv
2102 
2103   /* Limit the number of recorded IR instructions. */
2104   if (J->cur.nins > REF_FIRST+(IRRef)J->param[JIT_P_maxrecord])
2105     lj_trace_err(J, LJ_TRERR_TRACEOV);
2106 }
2107 
2108 /* -- Recording setup ----------------------------------------------------- */
2109 
2110 /* Setup recording for a root trace started by a hot loop. */
2111 static const BCIns *rec_setup_root(jit_State *J)
2112 {
2113   /* Determine the next PC and the bytecode range for the loop. */
2114   const BCIns *pcj, *pc = J->pc;
2115   BCIns ins = *pc;
2116   BCReg ra = bc_a(ins);
2117   switch (bc_op(ins)) {
2118   case BC_FORL:
2119     J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
2120     pc += 1+bc_j(ins);
2121     J->bc_min = pc;
2122     break;
2123   case BC_ITERL:
2124     lua_assert(bc_op(pc[-1]) == BC_ITERC);
2125     J->maxslot = ra + bc_b(pc[-1]) - 1;
2126     J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
2127     pc += 1+bc_j(ins);
2128     lua_assert(bc_op(pc[-1]) == BC_JMP);
2129     J->bc_min = pc;
2130     break;
2131   case BC_LOOP:
2132     /* Only check BC range for real loops, but not for "repeat until true". */
2133     pcj = pc + bc_j(ins);
2134     ins = *pcj;
2135     if (bc_op(ins) == BC_JMP && bc_j(ins) < 0) {
2136       J->bc_min = pcj+1 + bc_j(ins);
2137       J->bc_extent = (MSize)(-bc_j(ins))*sizeof(BCIns);
2138     }
2139     J->maxslot = ra;
2140     pc++;
2141     break;
2142   case BC_RET:
2143   case BC_RET0:
2144   case BC_RET1:
2145     /* No bytecode range check for down-recursive root traces. */
2146     J->maxslot = ra + bc_d(ins) - 1;
2147     break;
2148   case BC_FUNCF:
2149     /* No bytecode range check for root traces started by a hot call. */
2150     J->maxslot = J->pt->numparams;
2151     pc++;
2152     break;
2153   default:
2154     lua_assert(0);
2155     break;
2156   }
2157   return pc;
2158 }
2159 
2160 /* Setup for recording a new trace. */
2161 void lj_record_setup(jit_State *J)
2162 {
2163   uint32_t i;
2164 
2165   /* Initialize state related to current trace. */
2166   memset(J->slot, 0, sizeof(J->slot));
2167   memset(J->chain, 0, sizeof(J->chain));
2168   memset(J->bpropcache, 0, sizeof(J->bpropcache));
2169   J->scev.idx = REF_NIL;
2170   setmref(J->scev.pc, NULL);
2171 
2172   J->baseslot = 1;  /* Invoking function is at base[-1]. */
2173   J->base = J->slot + J->baseslot;
2174   J->maxslot = 0;
2175   J->framedepth = 0;
2176   J->retdepth = 0;
2177 
2178   J->instunroll = J->param[JIT_P_instunroll];
2179   J->loopunroll = J->param[JIT_P_loopunroll];
2180   J->tailcalled = 0;
2181   J->loopref = 0;
2182 
2183   J->bc_min = NULL;  /* Means no limit. */
2184   J->bc_extent = ~(MSize)0;
2185 
2186   /* Emit instructions for fixed references. Also triggers initial IR alloc. */
2187   emitir_raw(IRT(IR_BASE, IRT_P32), J->parent, J->exitno);
2188   for (i = 0; i <= 2; i++) {
2189     IRIns *ir = IR(REF_NIL-i);
2190     ir->i = 0;
2191     ir->t.irt = (uint8_t)(IRT_NIL+i);
2192     ir->o = IR_KPRI;
2193     ir->prev = 0;
2194   }
2195   J->cur.nk = REF_TRUE;
2196 
2197   J->startpc = J->pc;
2198   setmref(J->cur.startpc, J->pc);
2199   if (J->parent) {  /* Side trace. */
2200     GCtrace *T = traceref(J, J->parent);
2201     TraceNo root = T->root ? T->root : J->parent;
2202     J->cur.root = (uint16_t)root;
2203     J->cur.startins = BCINS_AD(BC_JMP, 0, 0);
2204     /* Check whether we could at least potentially form an extra loop. */
2205     if (J->exitno == 0 && T->snap[0].nent == 0) {
2206       /* We can narrow a FORL for some side traces, too. */
2207       if (J->pc > proto_bc(J->pt) && bc_op(J->pc[-1]) == BC_JFORI &&
2208           bc_d(J->pc[bc_j(J->pc[-1])-1]) == root) {
2209         lj_snap_add(J);
2210         rec_for_loop(J, J->pc-1, &J->scev, 1);
2211         goto sidecheck;
2212       }
2213     } else {
2214       J->startpc = NULL;  /* Prevent forming an extra loop. */
2215     }
2216     lj_snap_replay(J, T);
2217   sidecheck:
2218     if (traceref(J, J->cur.root)->nchild >= J->param[JIT_P_maxside] ||
2219         T->snap[J->exitno].count >= J->param[JIT_P_hotexit] +
2220                                     J->param[JIT_P_tryside]) {
2221       rec_stop(J, LJ_TRLINK_INTERP, 0);
2222     }
2223   } else {  /* Root trace. */
2224     J->cur.root = 0;
2225     J->cur.startins = *J->pc;
2226     J->pc = rec_setup_root(J);
2227     /* Note: the loop instruction itself is recorded at the end and not
2228     ** at the start! So snapshot #0 needs to point to the *next* instruction.
2229     */
2230     lj_snap_add(J);
2231     if (bc_op(J->cur.startins) == BC_FORL)
2232       rec_for_loop(J, J->pc-1, &J->scev, 1);
2233     if (1 + J->pt->framesize >= LJ_MAX_JSLOTS)
2234       lj_trace_err(J, LJ_TRERR_STACKOV);
2235   }
2236 #ifdef LUAJIT_ENABLE_CHECKHOOK
2237   /* Regularly check for instruction/line hooks from compiled code and
2238   ** exit to the interpreter if the hooks are set.
2239   **
2240   ** This is a compile-time option and disabled by default, since the
2241   ** hook checks may be quite expensive in tight loops.
2242   **
2243   ** Note this is only useful if hooks are *not* set most of the time.
2244   ** Use this only if you want to *asynchronously* interrupt the execution.
2245   **
2246   ** You can set the instruction hook via lua_sethook() with a count of 1
2247   ** from a signal handler or another native thread. Please have a look
2248   ** at the first few functions in luajit.c for an example (Ctrl-C handler).
2249   */
2250   {
2251     TRef tr = emitir(IRT(IR_XLOAD, IRT_U8),
2252                      lj_ir_kptr(J, &J2G(J)->hookmask), IRXLOAD_VOLATILE);
2253     tr = emitir(IRTI(IR_BAND), tr, lj_ir_kint(J, (LUA_MASKLINE|LUA_MASKCOUNT)));
2254     emitir(IRTGI(IR_EQ), tr, lj_ir_kint(J, 0));
2255   }
2256 #endif
2257 }
2258 
2259 #undef IR
2260 #undef emitir_raw
2261 #undef emitir
2262 
2263 #endif

/* [<][>][^][v][top][bottom][index][help] */