root/lj_snap.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lj_snap_grow_buf_
  2. lj_snap_grow_map_
  3. snapshot_slots
  4. snapshot_framelinks
  5. snapshot_stack
  6. lj_snap_add
  7. snap_usedef
  8. lj_snap_purge
  9. lj_snap_shrink
  10. snap_renamefilter
  11. snap_renameref
  12. lj_snap_regspmap
  13. snap_replay_const
  14. snap_dedup
  15. snap_pref
  16. snap_sunk_store2
  17. snap_sunk_store
  18. lj_snap_replay
  19. snap_restoreval
  20. snap_restoredata
  21. snap_unsink
  22. lj_snap_restore

   1 /*
   2 ** Snapshot handling.
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 */
   5 
   6 #define lj_snap_c
   7 #define LUA_CORE
   8 
   9 #include "lj_obj.h"
  10 
  11 #if LJ_HASJIT
  12 
  13 #include "lj_gc.h"
  14 #include "lj_tab.h"
  15 #include "lj_state.h"
  16 #include "lj_frame.h"
  17 #include "lj_bc.h"
  18 #include "lj_ir.h"
  19 #include "lj_jit.h"
  20 #include "lj_iropt.h"
  21 #include "lj_trace.h"
  22 #include "lj_snap.h"
  23 #include "lj_target.h"
  24 #if LJ_HASFFI
  25 #include "lj_ctype.h"
  26 #include "lj_cdata.h"
  27 #endif
  28 
  29 /* Pass IR on to next optimization in chain (FOLD). */
  30 #define emitir(ot, a, b)        (lj_ir_set(J, (ot), (a), (b)), lj_opt_fold(J))
  31 
  32 /* Emit raw IR without passing through optimizations. */
  33 #define emitir_raw(ot, a, b)    (lj_ir_set(J, (ot), (a), (b)), lj_ir_emit(J))
  34 
  35 /* -- Snapshot buffer allocation ------------------------------------------ */
  36 
  37 /* Grow snapshot buffer. */
  38 void lj_snap_grow_buf_(jit_State *J, MSize need)
  39 {
  40   MSize maxsnap = (MSize)J->param[JIT_P_maxsnap];
  41   if (need > maxsnap)
  42     lj_trace_err(J, LJ_TRERR_SNAPOV);
  43   lj_mem_growvec(J->L, J->snapbuf, J->sizesnap, maxsnap, SnapShot);
  44   J->cur.snap = J->snapbuf;
  45 }
  46 
  47 /* Grow snapshot map buffer. */
  48 void lj_snap_grow_map_(jit_State *J, MSize need)
  49 {
  50   if (need < 2*J->sizesnapmap)
  51     need = 2*J->sizesnapmap;
  52   else if (need < 64)
  53     need = 64;
  54   J->snapmapbuf = (SnapEntry *)lj_mem_realloc(J->L, J->snapmapbuf,
  55                     J->sizesnapmap*sizeof(SnapEntry), need*sizeof(SnapEntry));
  56   J->cur.snapmap = J->snapmapbuf;
  57   J->sizesnapmap = need;
  58 }
  59 
  60 /* -- Snapshot generation ------------------------------------------------- */
  61 
  62 /* Add all modified slots to the snapshot. */
  63 static MSize snapshot_slots(jit_State *J, SnapEntry *map, BCReg nslots)
  64 {
  65   IRRef retf = J->chain[IR_RETF];  /* Limits SLOAD restore elimination. */
  66   BCReg s;
  67   MSize n = 0;
  68   for (s = 0; s < nslots; s++) {
  69     TRef tr = J->slot[s];
  70     IRRef ref = tref_ref(tr);
  71 #if LJ_FR2
  72     if (s == 1) {  /* Ignore slot 1 in LJ_FR2 mode, except if tailcalled. */
  73       if ((tr & TREF_FRAME))
  74         map[n++] = SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL);
  75       continue;
  76     }
  77     if ((tr & (TREF_FRAME | TREF_CONT)) && !ref) {
  78       cTValue *base = J->L->base - J->baseslot;
  79       tr = J->slot[s] = (tr & 0xff0000) | lj_ir_k64(J, IR_KNUM, base[s].u64);
  80       ref = tref_ref(tr);
  81     }
  82 #endif
  83     if (ref) {
  84       SnapEntry sn = SNAP_TR(s, tr);
  85       IRIns *ir = &J->cur.ir[ref];
  86       if ((LJ_FR2 || !(sn & (SNAP_CONT|SNAP_FRAME))) &&
  87           ir->o == IR_SLOAD && ir->op1 == s && ref > retf) {
  88         /* No need to snapshot unmodified non-inherited slots. */
  89         if (!(ir->op2 & IRSLOAD_INHERIT))
  90           continue;
  91         /* No need to restore readonly slots and unmodified non-parent slots. */
  92         if (!(LJ_DUALNUM && (ir->op2 & IRSLOAD_CONVERT)) &&
  93             (ir->op2 & (IRSLOAD_READONLY|IRSLOAD_PARENT)) != IRSLOAD_PARENT)
  94           sn |= SNAP_NORESTORE;
  95       }
  96       if (LJ_SOFTFP32 && irt_isnum(ir->t))
  97         sn |= SNAP_SOFTFPNUM;
  98       map[n++] = sn;
  99     }
 100   }
 101   return n;
 102 }
 103 
 104 /* Add frame links at the end of the snapshot. */
 105 static MSize snapshot_framelinks(jit_State *J, SnapEntry *map, uint8_t *topslot)
 106 {
 107   cTValue *frame = J->L->base - 1;
 108   cTValue *lim = J->L->base - J->baseslot + LJ_FR2;
 109   GCfunc *fn = frame_func(frame);
 110   cTValue *ftop = isluafunc(fn) ? (frame+funcproto(fn)->framesize) : J->L->top;
 111 #if LJ_FR2
 112   uint64_t pcbase = (u64ptr(J->pc) << 8) | (J->baseslot - 2);
 113   lua_assert(2 <= J->baseslot && J->baseslot <= 257);
 114   memcpy(map, &pcbase, sizeof(uint64_t));
 115 #else
 116   MSize f = 0;
 117   map[f++] = SNAP_MKPC(J->pc);  /* The current PC is always the first entry. */
 118 #endif
 119   while (frame > lim) {  /* Backwards traversal of all frames above base. */
 120     if (frame_islua(frame)) {
 121 #if !LJ_FR2
 122       map[f++] = SNAP_MKPC(frame_pc(frame));
 123 #endif
 124       frame = frame_prevl(frame);
 125     } else if (frame_iscont(frame)) {
 126 #if !LJ_FR2
 127       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
 128       map[f++] = SNAP_MKPC(frame_contpc(frame));
 129 #endif
 130       frame = frame_prevd(frame);
 131     } else {
 132       lua_assert(!frame_isc(frame));
 133 #if !LJ_FR2
 134       map[f++] = SNAP_MKFTSZ(frame_ftsz(frame));
 135 #endif
 136       frame = frame_prevd(frame);
 137       continue;
 138     }
 139     if (frame + funcproto(frame_func(frame))->framesize > ftop)
 140       ftop = frame + funcproto(frame_func(frame))->framesize;
 141   }
 142   *topslot = (uint8_t)(ftop - lim);
 143 #if LJ_FR2
 144   lua_assert(sizeof(SnapEntry) * 2 == sizeof(uint64_t));
 145   return 2;
 146 #else
 147   lua_assert(f == (MSize)(1 + J->framedepth));
 148   return f;
 149 #endif
 150 }
 151 
 152 /* Take a snapshot of the current stack. */
 153 static void snapshot_stack(jit_State *J, SnapShot *snap, MSize nsnapmap)
 154 {
 155   BCReg nslots = J->baseslot + J->maxslot;
 156   MSize nent;
 157   SnapEntry *p;
 158   /* Conservative estimate. */
 159   lj_snap_grow_map(J, nsnapmap + nslots + (MSize)(LJ_FR2?2:J->framedepth+1));
 160   p = &J->cur.snapmap[nsnapmap];
 161   nent = snapshot_slots(J, p, nslots);
 162   snap->nent = (uint8_t)nent;
 163   nent += snapshot_framelinks(J, p + nent, &snap->topslot);
 164   snap->mapofs = (uint16_t)nsnapmap;
 165   snap->ref = (IRRef1)J->cur.nins;
 166   snap->nslots = (uint8_t)nslots;
 167   snap->count = 0;
 168   J->cur.nsnapmap = (uint16_t)(nsnapmap + nent);
 169 }
 170 
 171 /* Add or merge a snapshot. */
 172 void lj_snap_add(jit_State *J)
 173 {
 174   MSize nsnap = J->cur.nsnap;
 175   MSize nsnapmap = J->cur.nsnapmap;
 176   /* Merge if no ins. inbetween or if requested and no guard inbetween. */
 177   if ((nsnap > 0 && J->cur.snap[nsnap-1].ref == J->cur.nins) ||
 178       (J->mergesnap && !irt_isguard(J->guardemit))) {
 179     if (nsnap == 1) {  /* But preserve snap #0 PC. */
 180       emitir_raw(IRT(IR_NOP, IRT_NIL), 0, 0);
 181       goto nomerge;
 182     }
 183     nsnapmap = J->cur.snap[--nsnap].mapofs;
 184   } else {
 185   nomerge:
 186     lj_snap_grow_buf(J, nsnap+1);
 187     J->cur.nsnap = (uint16_t)(nsnap+1);
 188   }
 189   J->mergesnap = 0;
 190   J->guardemit.irt = 0;
 191   snapshot_stack(J, &J->cur.snap[nsnap], nsnapmap);
 192 }
 193 
 194 /* -- Snapshot modification ----------------------------------------------- */
 195 
 196 #define SNAP_USEDEF_SLOTS       (LJ_MAX_JSLOTS+LJ_STACK_EXTRA)
 197 
 198 /* Find unused slots with reaching-definitions bytecode data-flow analysis. */
 199 static BCReg snap_usedef(jit_State *J, uint8_t *udf,
 200                          const BCIns *pc, BCReg maxslot)
 201 {
 202   BCReg s;
 203   GCobj *o;
 204 
 205   if (maxslot == 0) return 0;
 206 #ifdef LUAJIT_USE_VALGRIND
 207   /* Avoid errors for harmless reads beyond maxslot. */
 208   memset(udf, 1, SNAP_USEDEF_SLOTS);
 209 #else
 210   memset(udf, 1, maxslot);
 211 #endif
 212 
 213   /* Treat open upvalues as used. */
 214   o = gcref(J->L->openupval);
 215   while (o) {
 216     if (uvval(gco2uv(o)) < J->L->base) break;
 217     udf[uvval(gco2uv(o)) - J->L->base] = 0;
 218     o = gcref(o->gch.nextgc);
 219   }
 220 
 221 #define USE_SLOT(s)             udf[(s)] &= ~1
 222 #define DEF_SLOT(s)             udf[(s)] *= 3
 223 
 224   /* Scan through following bytecode and check for uses/defs. */
 225   lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
 226   for (;;) {
 227     BCIns ins = *pc++;
 228     BCOp op = bc_op(ins);
 229     switch (bcmode_b(op)) {
 230     case BCMvar: USE_SLOT(bc_b(ins)); break;
 231     default: break;
 232     }
 233     switch (bcmode_c(op)) {
 234     case BCMvar: USE_SLOT(bc_c(ins)); break;
 235     case BCMrbase:
 236       lua_assert(op == BC_CAT);
 237       for (s = bc_b(ins); s <= bc_c(ins); s++) USE_SLOT(s);
 238       for (; s < maxslot; s++) DEF_SLOT(s);
 239       break;
 240     case BCMjump:
 241     handle_jump: {
 242       BCReg minslot = bc_a(ins);
 243       if (op >= BC_FORI && op <= BC_JFORL) minslot += FORL_EXT;
 244       else if (op >= BC_ITERL && op <= BC_JITERL) minslot += bc_b(pc[-2])-1;
 245       else if (op == BC_UCLO) { pc += bc_j(ins); break; }
 246       for (s = minslot; s < maxslot; s++) DEF_SLOT(s);
 247       return minslot < maxslot ? minslot : maxslot;
 248       }
 249     case BCMlit:
 250       if (op == BC_JFORL || op == BC_JITERL || op == BC_JLOOP) {
 251         goto handle_jump;
 252       } else if (bc_isret(op)) {
 253         BCReg top = op == BC_RETM ? maxslot : (bc_a(ins) + bc_d(ins)-1);
 254         for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
 255         for (; s < top; s++) USE_SLOT(s);
 256         for (; s < maxslot; s++) DEF_SLOT(s);
 257         return 0;
 258       }
 259       break;
 260     case BCMfunc: return maxslot;  /* NYI: will abort, anyway. */
 261     default: break;
 262     }
 263     switch (bcmode_a(op)) {
 264     case BCMvar: USE_SLOT(bc_a(ins)); break;
 265     case BCMdst:
 266        if (!(op == BC_ISTC || op == BC_ISFC)) DEF_SLOT(bc_a(ins));
 267        break;
 268     case BCMbase:
 269       if (op >= BC_CALLM && op <= BC_VARG) {
 270         BCReg top = (op == BC_CALLM || op == BC_CALLMT || bc_c(ins) == 0) ?
 271                     maxslot : (bc_a(ins) + bc_c(ins)+LJ_FR2);
 272         if (LJ_FR2) DEF_SLOT(bc_a(ins)+1);
 273         s = bc_a(ins) - ((op == BC_ITERC || op == BC_ITERN) ? 3 : 0);
 274         for (; s < top; s++) USE_SLOT(s);
 275         for (; s < maxslot; s++) DEF_SLOT(s);
 276         if (op == BC_CALLT || op == BC_CALLMT) {
 277           for (s = 0; s < bc_a(ins); s++) DEF_SLOT(s);
 278           return 0;
 279         }
 280       } else if (op == BC_KNIL) {
 281         for (s = bc_a(ins); s <= bc_d(ins); s++) DEF_SLOT(s);
 282       } else if (op == BC_TSETM) {
 283         for (s = bc_a(ins)-1; s < maxslot; s++) USE_SLOT(s);
 284       }
 285       break;
 286     default: break;
 287     }
 288     lua_assert(pc >= proto_bc(J->pt) && pc < proto_bc(J->pt) + J->pt->sizebc);
 289   }
 290 
 291 #undef USE_SLOT
 292 #undef DEF_SLOT
 293 
 294   return 0;  /* unreachable */
 295 }
 296 
 297 /* Purge dead slots before the next snapshot. */
 298 void lj_snap_purge(jit_State *J)
 299 {
 300   uint8_t udf[SNAP_USEDEF_SLOTS];
 301   BCReg maxslot = J->maxslot;
 302   BCReg s = snap_usedef(J, udf, J->pc, maxslot);
 303   for (; s < maxslot; s++)
 304     if (udf[s] != 0)
 305       J->base[s] = 0;  /* Purge dead slots. */
 306 }
 307 
 308 /* Shrink last snapshot. */
 309 void lj_snap_shrink(jit_State *J)
 310 {
 311   SnapShot *snap = &J->cur.snap[J->cur.nsnap-1];
 312   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
 313   MSize n, m, nlim, nent = snap->nent;
 314   uint8_t udf[SNAP_USEDEF_SLOTS];
 315   BCReg maxslot = J->maxslot;
 316   BCReg baseslot = J->baseslot;
 317   BCReg minslot = snap_usedef(J, udf, snap_pc(&map[nent]), maxslot);
 318   maxslot += baseslot;
 319   minslot += baseslot;
 320   snap->nslots = (uint8_t)maxslot;
 321   for (n = m = 0; n < nent; n++) {  /* Remove unused slots from snapshot. */
 322     BCReg s = snap_slot(map[n]);
 323     if (s < minslot || (s < maxslot && udf[s-baseslot] == 0))
 324       map[m++] = map[n];  /* Only copy used slots. */
 325   }
 326   snap->nent = (uint8_t)m;
 327   nlim = J->cur.nsnapmap - snap->mapofs - 1;
 328   while (n <= nlim) map[m++] = map[n++];  /* Move PC + frame links down. */
 329   J->cur.nsnapmap = (uint16_t)(snap->mapofs + m);  /* Free up space in map. */
 330 }
 331 
 332 /* -- Snapshot access ----------------------------------------------------- */
 333 
 334 /* Initialize a Bloom Filter with all renamed refs.
 335 ** There are very few renames (often none), so the filter has
 336 ** very few bits set. This makes it suitable for negative filtering.
 337 */
 338 static BloomFilter snap_renamefilter(GCtrace *T, SnapNo lim)
 339 {
 340   BloomFilter rfilt = 0;
 341   IRIns *ir;
 342   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
 343     if (ir->op2 <= lim)
 344       bloomset(rfilt, ir->op1);
 345   return rfilt;
 346 }
 347 
 348 /* Process matching renames to find the original RegSP. */
 349 static RegSP snap_renameref(GCtrace *T, SnapNo lim, IRRef ref, RegSP rs)
 350 {
 351   IRIns *ir;
 352   for (ir = &T->ir[T->nins-1]; ir->o == IR_RENAME; ir--)
 353     if (ir->op1 == ref && ir->op2 <= lim)
 354       rs = ir->prev;
 355   return rs;
 356 }
 357 
 358 /* Copy RegSP from parent snapshot to the parent links of the IR. */
 359 IRIns *lj_snap_regspmap(GCtrace *T, SnapNo snapno, IRIns *ir)
 360 {
 361   SnapShot *snap = &T->snap[snapno];
 362   SnapEntry *map = &T->snapmap[snap->mapofs];
 363   BloomFilter rfilt = snap_renamefilter(T, snapno);
 364   MSize n = 0;
 365   IRRef ref = 0;
 366   for ( ; ; ir++) {
 367     uint32_t rs;
 368     if (ir->o == IR_SLOAD) {
 369       if (!(ir->op2 & IRSLOAD_PARENT)) break;
 370       for ( ; ; n++) {
 371         lua_assert(n < snap->nent);
 372         if (snap_slot(map[n]) == ir->op1) {
 373           ref = snap_ref(map[n++]);
 374           break;
 375         }
 376       }
 377     } else if (LJ_SOFTFP32 && ir->o == IR_HIOP) {
 378       ref++;
 379     } else if (ir->o == IR_PVAL) {
 380       ref = ir->op1 + REF_BIAS;
 381     } else {
 382       break;
 383     }
 384     rs = T->ir[ref].prev;
 385     if (bloomtest(rfilt, ref))
 386       rs = snap_renameref(T, snapno, ref, rs);
 387     ir->prev = (uint16_t)rs;
 388     lua_assert(regsp_used(rs));
 389   }
 390   return ir;
 391 }
 392 
 393 /* -- Snapshot replay ----------------------------------------------------- */
 394 
 395 /* Replay constant from parent trace. */
 396 static TRef snap_replay_const(jit_State *J, IRIns *ir)
 397 {
 398   /* Only have to deal with constants that can occur in stack slots. */
 399   switch ((IROp)ir->o) {
 400   case IR_KPRI: return TREF_PRI(irt_type(ir->t));
 401   case IR_KINT: return lj_ir_kint(J, ir->i);
 402   case IR_KGC: return lj_ir_kgc(J, ir_kgc(ir), irt_t(ir->t));
 403   case IR_KNUM: case IR_KINT64:
 404     return lj_ir_k64(J, (IROp)ir->o, ir_k64(ir)->u64);
 405   case IR_KPTR: return lj_ir_kptr(J, ir_kptr(ir));  /* Continuation. */
 406   default: lua_assert(0); return TREF_NIL; break;
 407   }
 408 }
 409 
 410 /* De-duplicate parent reference. */
 411 static TRef snap_dedup(jit_State *J, SnapEntry *map, MSize nmax, IRRef ref)
 412 {
 413   MSize j;
 414   for (j = 0; j < nmax; j++)
 415     if (snap_ref(map[j]) == ref)
 416       return J->slot[snap_slot(map[j])] & ~(SNAP_CONT|SNAP_FRAME);
 417   return 0;
 418 }
 419 
 420 /* Emit parent reference with de-duplication. */
 421 static TRef snap_pref(jit_State *J, GCtrace *T, SnapEntry *map, MSize nmax,
 422                       BloomFilter seen, IRRef ref)
 423 {
 424   IRIns *ir = &T->ir[ref];
 425   TRef tr;
 426   if (irref_isk(ref))
 427     tr = snap_replay_const(J, ir);
 428   else if (!regsp_used(ir->prev))
 429     tr = 0;
 430   else if (!bloomtest(seen, ref) || (tr = snap_dedup(J, map, nmax, ref)) == 0)
 431     tr = emitir(IRT(IR_PVAL, irt_type(ir->t)), ref - REF_BIAS, 0);
 432   return tr;
 433 }
 434 
 435 /* Check whether a sunk store corresponds to an allocation. Slow path. */
 436 static int snap_sunk_store2(GCtrace *T, IRIns *ira, IRIns *irs)
 437 {
 438   if (irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
 439       irs->o == IR_FSTORE || irs->o == IR_XSTORE) {
 440     IRIns *irk = &T->ir[irs->op1];
 441     if (irk->o == IR_AREF || irk->o == IR_HREFK)
 442       irk = &T->ir[irk->op1];
 443     return (&T->ir[irk->op1] == ira);
 444   }
 445   return 0;
 446 }
 447 
 448 /* Check whether a sunk store corresponds to an allocation. Fast path. */
 449 static LJ_AINLINE int snap_sunk_store(GCtrace *T, IRIns *ira, IRIns *irs)
 450 {
 451   if (irs->s != 255)
 452     return (ira + irs->s == irs);  /* Fast check. */
 453   return snap_sunk_store2(T, ira, irs);
 454 }
 455 
 456 /* Replay snapshot state to setup side trace. */
 457 void lj_snap_replay(jit_State *J, GCtrace *T)
 458 {
 459   SnapShot *snap = &T->snap[J->exitno];
 460   SnapEntry *map = &T->snapmap[snap->mapofs];
 461   MSize n, nent = snap->nent;
 462   BloomFilter seen = 0;
 463   int pass23 = 0;
 464   J->framedepth = 0;
 465   /* Emit IR for slots inherited from parent snapshot. */
 466   for (n = 0; n < nent; n++) {
 467     SnapEntry sn = map[n];
 468     BCReg s = snap_slot(sn);
 469     IRRef ref = snap_ref(sn);
 470     IRIns *ir = &T->ir[ref];
 471     TRef tr;
 472     /* The bloom filter avoids O(nent^2) overhead for de-duping slots. */
 473     if (bloomtest(seen, ref) && (tr = snap_dedup(J, map, n, ref)) != 0)
 474       goto setslot;
 475     bloomset(seen, ref);
 476     if (irref_isk(ref)) {
 477       /* See special treatment of LJ_FR2 slot 1 in snapshot_slots() above. */
 478       if (LJ_FR2 && (sn == SNAP(1, SNAP_FRAME | SNAP_NORESTORE, REF_NIL)))
 479         tr = 0;
 480       else
 481         tr = snap_replay_const(J, ir);
 482     } else if (!regsp_used(ir->prev)) {
 483       pass23 = 1;
 484       lua_assert(s != 0);
 485       tr = s;
 486     } else {
 487       IRType t = irt_type(ir->t);
 488       uint32_t mode = IRSLOAD_INHERIT|IRSLOAD_PARENT;
 489       if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM)) t = IRT_NUM;
 490       if (ir->o == IR_SLOAD) mode |= (ir->op2 & IRSLOAD_READONLY);
 491       tr = emitir_raw(IRT(IR_SLOAD, t), s, mode);
 492     }
 493   setslot:
 494     J->slot[s] = tr | (sn&(SNAP_CONT|SNAP_FRAME));  /* Same as TREF_* flags. */
 495     J->framedepth += ((sn & (SNAP_CONT|SNAP_FRAME)) && (s != LJ_FR2));
 496     if ((sn & SNAP_FRAME))
 497       J->baseslot = s+1;
 498   }
 499   if (pass23) {
 500     IRIns *irlast = &T->ir[snap->ref];
 501     pass23 = 0;
 502     /* Emit dependent PVALs. */
 503     for (n = 0; n < nent; n++) {
 504       SnapEntry sn = map[n];
 505       IRRef refp = snap_ref(sn);
 506       IRIns *ir = &T->ir[refp];
 507       if (regsp_reg(ir->r) == RID_SUNK) {
 508         if (J->slot[snap_slot(sn)] != snap_slot(sn)) continue;
 509         pass23 = 1;
 510         lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
 511                    ir->o == IR_CNEW || ir->o == IR_CNEWI);
 512         if (ir->op1 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op1);
 513         if (ir->op2 >= T->nk) snap_pref(J, T, map, nent, seen, ir->op2);
 514         if (LJ_HASFFI && ir->o == IR_CNEWI) {
 515           if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP)
 516             snap_pref(J, T, map, nent, seen, (ir+1)->op2);
 517         } else {
 518           IRIns *irs;
 519           for (irs = ir+1; irs < irlast; irs++)
 520             if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 521               if (snap_pref(J, T, map, nent, seen, irs->op2) == 0)
 522                 snap_pref(J, T, map, nent, seen, T->ir[irs->op2].op1);
 523               else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
 524                        irs+1 < irlast && (irs+1)->o == IR_HIOP)
 525                 snap_pref(J, T, map, nent, seen, (irs+1)->op2);
 526             }
 527         }
 528       } else if (!irref_isk(refp) && !regsp_used(ir->prev)) {
 529         lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
 530         J->slot[snap_slot(sn)] = snap_pref(J, T, map, nent, seen, ir->op1);
 531       }
 532     }
 533     /* Replay sunk instructions. */
 534     for (n = 0; pass23 && n < nent; n++) {
 535       SnapEntry sn = map[n];
 536       IRRef refp = snap_ref(sn);
 537       IRIns *ir = &T->ir[refp];
 538       if (regsp_reg(ir->r) == RID_SUNK) {
 539         TRef op1, op2;
 540         if (J->slot[snap_slot(sn)] != snap_slot(sn)) {  /* De-dup allocs. */
 541           J->slot[snap_slot(sn)] = J->slot[J->slot[snap_slot(sn)]];
 542           continue;
 543         }
 544         op1 = ir->op1;
 545         if (op1 >= T->nk) op1 = snap_pref(J, T, map, nent, seen, op1);
 546         op2 = ir->op2;
 547         if (op2 >= T->nk) op2 = snap_pref(J, T, map, nent, seen, op2);
 548         if (LJ_HASFFI && ir->o == IR_CNEWI) {
 549           if (LJ_32 && refp+1 < T->nins && (ir+1)->o == IR_HIOP) {
 550             lj_needsplit(J);  /* Emit joining HIOP. */
 551             op2 = emitir_raw(IRT(IR_HIOP, IRT_I64), op2,
 552                              snap_pref(J, T, map, nent, seen, (ir+1)->op2));
 553           }
 554           J->slot[snap_slot(sn)] = emitir(ir->ot & ~(IRT_MARK|IRT_ISPHI), op1, op2);
 555         } else {
 556           IRIns *irs;
 557           TRef tr = emitir(ir->ot, op1, op2);
 558           J->slot[snap_slot(sn)] = tr;
 559           for (irs = ir+1; irs < irlast; irs++)
 560             if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 561               IRIns *irr = &T->ir[irs->op1];
 562               TRef val, key = irr->op2, tmp = tr;
 563               if (irr->o != IR_FREF) {
 564                 IRIns *irk = &T->ir[key];
 565                 if (irr->o == IR_HREFK)
 566                   key = lj_ir_kslot(J, snap_replay_const(J, &T->ir[irk->op1]),
 567                                     irk->op2);
 568                 else
 569                   key = snap_replay_const(J, irk);
 570                 if (irr->o == IR_HREFK || irr->o == IR_AREF) {
 571                   IRIns *irf = &T->ir[irr->op1];
 572                   tmp = emitir(irf->ot, tmp, irf->op2);
 573                 }
 574               }
 575               tmp = emitir(irr->ot, tmp, key);
 576               val = snap_pref(J, T, map, nent, seen, irs->op2);
 577               if (val == 0) {
 578                 IRIns *irc = &T->ir[irs->op2];
 579                 lua_assert(irc->o == IR_CONV && irc->op2 == IRCONV_NUM_INT);
 580                 val = snap_pref(J, T, map, nent, seen, irc->op1);
 581                 val = emitir(IRTN(IR_CONV), val, IRCONV_NUM_INT);
 582               } else if ((LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI)) &&
 583                          irs+1 < irlast && (irs+1)->o == IR_HIOP) {
 584                 IRType t = IRT_I64;
 585                 if (LJ_SOFTFP32 && irt_type((irs+1)->t) == IRT_SOFTFP)
 586                   t = IRT_NUM;
 587                 lj_needsplit(J);
 588                 if (irref_isk(irs->op2) && irref_isk((irs+1)->op2)) {
 589                   uint64_t k = (uint32_t)T->ir[irs->op2].i +
 590                                ((uint64_t)T->ir[(irs+1)->op2].i << 32);
 591                   val = lj_ir_k64(J, t == IRT_I64 ? IR_KINT64 : IR_KNUM, k);
 592                 } else {
 593                   val = emitir_raw(IRT(IR_HIOP, t), val,
 594                           snap_pref(J, T, map, nent, seen, (irs+1)->op2));
 595                 }
 596                 tmp = emitir(IRT(irs->o, t), tmp, val);
 597                 continue;
 598               }
 599               tmp = emitir(irs->ot, tmp, val);
 600             } else if (LJ_HASFFI && irs->o == IR_XBAR && ir->o == IR_CNEW) {
 601               emitir(IRT(IR_XBAR, IRT_NIL), 0, 0);
 602             }
 603         }
 604       }
 605     }
 606   }
 607   J->base = J->slot + J->baseslot;
 608   J->maxslot = snap->nslots - J->baseslot;
 609   lj_snap_add(J);
 610   if (pass23)  /* Need explicit GC step _after_ initial snapshot. */
 611     emitir_raw(IRTG(IR_GCSTEP, IRT_NIL), 0, 0);
 612 }
 613 
 614 /* -- Snapshot restore ---------------------------------------------------- */
 615 
 616 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
 617                         SnapNo snapno, BloomFilter rfilt,
 618                         IRIns *ir, TValue *o);
 619 
 620 /* Restore a value from the trace exit state. */
 621 static void snap_restoreval(jit_State *J, GCtrace *T, ExitState *ex,
 622                             SnapNo snapno, BloomFilter rfilt,
 623                             IRRef ref, TValue *o)
 624 {
 625   IRIns *ir = &T->ir[ref];
 626   IRType1 t = ir->t;
 627   RegSP rs = ir->prev;
 628   if (irref_isk(ref)) {  /* Restore constant slot. */
 629     lj_ir_kvalue(J->L, o, ir);
 630     return;
 631   }
 632   if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
 633     rs = snap_renameref(T, snapno, ref, rs);
 634   if (ra_hasspill(regsp_spill(rs))) {  /* Restore from spill slot. */
 635     int32_t *sps = &ex->spill[regsp_spill(rs)];
 636     if (irt_isinteger(t)) {
 637       setintV(o, *sps);
 638 #if !LJ_SOFTFP32
 639     } else if (irt_isnum(t)) {
 640       o->u64 = *(uint64_t *)sps;
 641 #endif
 642 #if LJ_64 && !LJ_GC64
 643     } else if (irt_islightud(t)) {
 644       /* 64 bit lightuserdata which may escape already has the tag bits. */
 645       o->u64 = *(uint64_t *)sps;
 646 #endif
 647     } else {
 648       lua_assert(!irt_ispri(t));  /* PRI refs never have a spill slot. */
 649       setgcV(J->L, o, (GCobj *)(uintptr_t)*(GCSize *)sps, irt_toitype(t));
 650     }
 651   } else {  /* Restore from register. */
 652     Reg r = regsp_reg(rs);
 653     if (ra_noreg(r)) {
 654       lua_assert(ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
 655       snap_restoreval(J, T, ex, snapno, rfilt, ir->op1, o);
 656       if (LJ_DUALNUM) setnumV(o, (lua_Number)intV(o));
 657       return;
 658     } else if (irt_isinteger(t)) {
 659       setintV(o, (int32_t)ex->gpr[r-RID_MIN_GPR]);
 660 #if !LJ_SOFTFP
 661     } else if (irt_isnum(t)) {
 662       setnumV(o, ex->fpr[r-RID_MIN_FPR]);
 663 #elif LJ_64  /* && LJ_SOFTFP */
 664     } else if (irt_isnum(t)) {
 665       o->u64 = ex->gpr[r-RID_MIN_GPR];
 666 #endif
 667 #if LJ_64 && !LJ_GC64
 668     } else if (irt_is64(t)) {
 669       /* 64 bit values that already have the tag bits. */
 670       o->u64 = ex->gpr[r-RID_MIN_GPR];
 671 #endif
 672     } else if (irt_ispri(t)) {
 673       setpriV(o, irt_toitype(t));
 674     } else {
 675       setgcV(J->L, o, (GCobj *)ex->gpr[r-RID_MIN_GPR], irt_toitype(t));
 676     }
 677   }
 678 }
 679 
 680 #if LJ_HASFFI
 681 /* Restore raw data from the trace exit state. */
 682 static void snap_restoredata(GCtrace *T, ExitState *ex,
 683                              SnapNo snapno, BloomFilter rfilt,
 684                              IRRef ref, void *dst, CTSize sz)
 685 {
 686   IRIns *ir = &T->ir[ref];
 687   RegSP rs = ir->prev;
 688   int32_t *src;
 689   uint64_t tmp;
 690   if (irref_isk(ref)) {
 691     if (ir->o == IR_KNUM || ir->o == IR_KINT64) {
 692       src = (int32_t *)&ir[1];
 693     } else if (sz == 8) {
 694       tmp = (uint64_t)(uint32_t)ir->i;
 695       src = (int32_t *)&tmp;
 696     } else {
 697       src = &ir->i;
 698     }
 699   } else {
 700     if (LJ_UNLIKELY(bloomtest(rfilt, ref)))
 701       rs = snap_renameref(T, snapno, ref, rs);
 702     if (ra_hasspill(regsp_spill(rs))) {
 703       src = &ex->spill[regsp_spill(rs)];
 704       if (sz == 8 && !irt_is64(ir->t)) {
 705         tmp = (uint64_t)(uint32_t)*src;
 706         src = (int32_t *)&tmp;
 707       }
 708     } else {
 709       Reg r = regsp_reg(rs);
 710       if (ra_noreg(r)) {
 711         /* Note: this assumes CNEWI is never used for SOFTFP split numbers. */
 712         lua_assert(sz == 8 && ir->o == IR_CONV && ir->op2 == IRCONV_NUM_INT);
 713         snap_restoredata(T, ex, snapno, rfilt, ir->op1, dst, 4);
 714         *(lua_Number *)dst = (lua_Number)*(int32_t *)dst;
 715         return;
 716       }
 717       src = (int32_t *)&ex->gpr[r-RID_MIN_GPR];
 718 #if !LJ_SOFTFP
 719       if (r >= RID_MAX_GPR) {
 720         src = (int32_t *)&ex->fpr[r-RID_MIN_FPR];
 721 #if LJ_TARGET_PPC
 722         if (sz == 4) {  /* PPC FPRs are always doubles. */
 723           *(float *)dst = (float)*(double *)src;
 724           return;
 725         }
 726 #else
 727         if (LJ_BE && sz == 4) src++;
 728 #endif
 729       } else
 730 #endif
 731       if (LJ_64 && LJ_BE && sz == 4) src++;
 732     }
 733   }
 734   lua_assert(sz == 1 || sz == 2 || sz == 4 || sz == 8);
 735   if (sz == 4) *(int32_t *)dst = *src;
 736   else if (sz == 8) *(int64_t *)dst = *(int64_t *)src;
 737   else if (sz == 1) *(int8_t *)dst = (int8_t)*src;
 738   else *(int16_t *)dst = (int16_t)*src;
 739 }
 740 #endif
 741 
 742 /* Unsink allocation from the trace exit state. Unsink sunk stores. */
 743 static void snap_unsink(jit_State *J, GCtrace *T, ExitState *ex,
 744                         SnapNo snapno, BloomFilter rfilt,
 745                         IRIns *ir, TValue *o)
 746 {
 747   lua_assert(ir->o == IR_TNEW || ir->o == IR_TDUP ||
 748              ir->o == IR_CNEW || ir->o == IR_CNEWI);
 749 #if LJ_HASFFI
 750   if (ir->o == IR_CNEW || ir->o == IR_CNEWI) {
 751     CTState *cts = ctype_cts(J->L);
 752     CTypeID id = (CTypeID)T->ir[ir->op1].i;
 753     CTSize sz;
 754     CTInfo info = lj_ctype_info(cts, id, &sz);
 755     GCcdata *cd = lj_cdata_newx(cts, id, sz, info);
 756     setcdataV(J->L, o, cd);
 757     if (ir->o == IR_CNEWI) {
 758       uint8_t *p = (uint8_t *)cdataptr(cd);
 759       lua_assert(sz == 4 || sz == 8);
 760       if (LJ_32 && sz == 8 && ir+1 < T->ir + T->nins && (ir+1)->o == IR_HIOP) {
 761         snap_restoredata(T, ex, snapno, rfilt, (ir+1)->op2, LJ_LE?p+4:p, 4);
 762         if (LJ_BE) p += 4;
 763         sz = 4;
 764       }
 765       snap_restoredata(T, ex, snapno, rfilt, ir->op2, p, sz);
 766     } else {
 767       IRIns *irs, *irlast = &T->ir[T->snap[snapno].ref];
 768       for (irs = ir+1; irs < irlast; irs++)
 769         if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 770           IRIns *iro = &T->ir[T->ir[irs->op1].op2];
 771           uint8_t *p = (uint8_t *)cd;
 772           CTSize szs;
 773           lua_assert(irs->o == IR_XSTORE && T->ir[irs->op1].o == IR_ADD);
 774           lua_assert(iro->o == IR_KINT || iro->o == IR_KINT64);
 775           if (irt_is64(irs->t)) szs = 8;
 776           else if (irt_isi8(irs->t) || irt_isu8(irs->t)) szs = 1;
 777           else if (irt_isi16(irs->t) || irt_isu16(irs->t)) szs = 2;
 778           else szs = 4;
 779           if (LJ_64 && iro->o == IR_KINT64)
 780             p += (int64_t)ir_k64(iro)->u64;
 781           else
 782             p += iro->i;
 783           lua_assert(p >= (uint8_t *)cdataptr(cd) &&
 784                      p + szs <= (uint8_t *)cdataptr(cd) + sz);
 785           if (LJ_32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
 786             lua_assert(szs == 4);
 787             snap_restoredata(T, ex, snapno, rfilt, (irs+1)->op2, LJ_LE?p+4:p,4);
 788             if (LJ_BE) p += 4;
 789           }
 790           snap_restoredata(T, ex, snapno, rfilt, irs->op2, p, szs);
 791         }
 792     }
 793   } else
 794 #endif
 795   {
 796     IRIns *irs, *irlast;
 797     GCtab *t = ir->o == IR_TNEW ? lj_tab_new(J->L, ir->op1, ir->op2) :
 798                                   lj_tab_dup(J->L, ir_ktab(&T->ir[ir->op1]));
 799     settabV(J->L, o, t);
 800     irlast = &T->ir[T->snap[snapno].ref];
 801     for (irs = ir+1; irs < irlast; irs++)
 802       if (irs->r == RID_SINK && snap_sunk_store(T, ir, irs)) {
 803         IRIns *irk = &T->ir[irs->op1];
 804         TValue tmp, *val;
 805         lua_assert(irs->o == IR_ASTORE || irs->o == IR_HSTORE ||
 806                    irs->o == IR_FSTORE);
 807         if (irk->o == IR_FREF) {
 808           lua_assert(irk->op2 == IRFL_TAB_META);
 809           snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, &tmp);
 810           /* NOBARRIER: The table is new (marked white). */
 811           setgcref(t->metatable, obj2gco(tabV(&tmp)));
 812         } else {
 813           irk = &T->ir[irk->op2];
 814           if (irk->o == IR_KSLOT) irk = &T->ir[irk->op1];
 815           lj_ir_kvalue(J->L, &tmp, irk);
 816           val = lj_tab_set(J->L, t, &tmp);
 817           /* NOBARRIER: The table is new (marked white). */
 818           snap_restoreval(J, T, ex, snapno, rfilt, irs->op2, val);
 819           if (LJ_SOFTFP32 && irs+1 < T->ir + T->nins && (irs+1)->o == IR_HIOP) {
 820             snap_restoreval(J, T, ex, snapno, rfilt, (irs+1)->op2, &tmp);
 821             val->u32.hi = tmp.u32.lo;
 822           }
 823         }
 824       }
 825   }
 826 }
 827 
 828 /* Restore interpreter state from exit state with the help of a snapshot. */
 829 const BCIns *lj_snap_restore(jit_State *J, void *exptr)
 830 {
 831   ExitState *ex = (ExitState *)exptr;
 832   SnapNo snapno = J->exitno;  /* For now, snapno == exitno. */
 833   GCtrace *T = traceref(J, J->parent);
 834   SnapShot *snap = &T->snap[snapno];
 835   MSize n, nent = snap->nent;
 836   SnapEntry *map = &T->snapmap[snap->mapofs];
 837 #if !LJ_FR2 || defined(LUA_USE_ASSERT)
 838   SnapEntry *flinks = &T->snapmap[snap_nextofs(T, snap)-1-LJ_FR2];
 839 #endif
 840 #if !LJ_FR2
 841   ptrdiff_t ftsz0;
 842 #endif
 843   TValue *frame;
 844   BloomFilter rfilt = snap_renamefilter(T, snapno);
 845   const BCIns *pc = snap_pc(&map[nent]);
 846   lua_State *L = J->L;
 847 
 848   /* Set interpreter PC to the next PC to get correct error messages. */
 849   setcframe_pc(cframe_raw(L->cframe), pc+1);
 850 
 851   /* Make sure the stack is big enough for the slots from the snapshot. */
 852   if (LJ_UNLIKELY(L->base + snap->topslot >= tvref(L->maxstack))) {
 853     L->top = curr_topL(L);
 854     lj_state_growstack(L, snap->topslot - curr_proto(L)->framesize);
 855   }
 856 
 857   /* Fill stack slots with data from the registers and spill slots. */
 858   frame = L->base-1-LJ_FR2;
 859 #if !LJ_FR2
 860   ftsz0 = frame_ftsz(frame);  /* Preserve link to previous frame in slot #0. */
 861 #endif
 862   for (n = 0; n < nent; n++) {
 863     SnapEntry sn = map[n];
 864     if (!(sn & SNAP_NORESTORE)) {
 865       TValue *o = &frame[snap_slot(sn)];
 866       IRRef ref = snap_ref(sn);
 867       IRIns *ir = &T->ir[ref];
 868       if (ir->r == RID_SUNK) {
 869         MSize j;
 870         for (j = 0; j < n; j++)
 871           if (snap_ref(map[j]) == ref) {  /* De-duplicate sunk allocations. */
 872             copyTV(L, o, &frame[snap_slot(map[j])]);
 873             goto dupslot;
 874           }
 875         snap_unsink(J, T, ex, snapno, rfilt, ir, o);
 876       dupslot:
 877         continue;
 878       }
 879       snap_restoreval(J, T, ex, snapno, rfilt, ref, o);
 880       if (LJ_SOFTFP32 && (sn & SNAP_SOFTFPNUM) && tvisint(o)) {
 881         TValue tmp;
 882         snap_restoreval(J, T, ex, snapno, rfilt, ref+1, &tmp);
 883         o->u32.hi = tmp.u32.lo;
 884 #if !LJ_FR2
 885       } else if ((sn & (SNAP_CONT|SNAP_FRAME))) {
 886         /* Overwrite tag with frame link. */
 887         setframe_ftsz(o, snap_slot(sn) != 0 ? (int32_t)*flinks-- : ftsz0);
 888         L->base = o+1;
 889 #endif
 890       }
 891     }
 892   }
 893 #if LJ_FR2
 894   L->base += (map[nent+LJ_BE] & 0xff);
 895 #endif
 896   lua_assert(map + nent == flinks);
 897 
 898   /* Compute current stack top. */
 899   switch (bc_op(*pc)) {
 900   default:
 901     if (bc_op(*pc) < BC_FUNCF) {
 902       L->top = curr_topL(L);
 903       break;
 904     }
 905     /* fallthrough */
 906   case BC_CALLM: case BC_CALLMT: case BC_RETM: case BC_TSETM:
 907     L->top = frame + snap->nslots;
 908     break;
 909   }
 910   return pc;
 911 }
 912 
 913 #undef emitir_raw
 914 #undef emitir
 915 
 916 #endif

/* [<][>][^][v][top][bottom][index][help] */