root/lj_opt_split.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. split_emit
  2. split_num2int
  3. split_call_l
  4. split_call_li
  5. split_call_ll
  6. split_ptr
  7. split_bitshift
  8. split_bitop
  9. split_subst_snap
  10. split_ir
  11. cpsplit
  12. split_needsplit
  13. lj_opt_split

   1 /*
   2 ** SPLIT: Split 64 bit IR instructions into 32 bit IR instructions.
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 */
   5 
   6 #define lj_opt_split_c
   7 #define LUA_CORE
   8 
   9 #include "lj_obj.h"
  10 
  11 #if LJ_HASJIT && (LJ_SOFTFP32 || (LJ_32 && LJ_HASFFI))
  12 
  13 #include "lj_err.h"
  14 #include "lj_buf.h"
  15 #include "lj_ir.h"
  16 #include "lj_jit.h"
  17 #include "lj_ircall.h"
  18 #include "lj_iropt.h"
  19 #include "lj_dispatch.h"
  20 #include "lj_vm.h"
  21 
  22 /* SPLIT pass:
  23 **
  24 ** This pass splits up 64 bit IR instructions into multiple 32 bit IR
  25 ** instructions. It's only active for soft-float targets or for 32 bit CPUs
  26 ** which lack native 64 bit integer operations (the FFI is currently the
  27 ** only emitter for 64 bit integer instructions).
  28 **
  29 ** Splitting the IR in a separate pass keeps each 32 bit IR assembler
  30 ** backend simple. Only a small amount of extra functionality needs to be
  31 ** implemented. This is much easier than adding support for allocating
  32 ** register pairs to each backend (believe me, I tried). A few simple, but
  33 ** important optimizations can be performed by the SPLIT pass, which would
  34 ** be tedious to do in the backend.
  35 **
  36 ** The basic idea is to replace each 64 bit IR instruction with its 32 bit
  37 ** equivalent plus an extra HIOP instruction. The splitted IR is not passed
  38 ** through FOLD or any other optimizations, so each HIOP is guaranteed to
  39 ** immediately follow it's counterpart. The actual functionality of HIOP is
  40 ** inferred from the previous instruction.
  41 **
  42 ** The operands of HIOP hold the hiword input references. The output of HIOP
  43 ** is the hiword output reference, which is also used to hold the hiword
  44 ** register or spill slot information. The register allocator treats this
  45 ** instruction independently of any other instruction, which improves code
  46 ** quality compared to using fixed register pairs.
  47 **
  48 ** It's easier to split up some instructions into two regular 32 bit
  49 ** instructions. E.g. XLOAD is split up into two XLOADs with two different
  50 ** addresses. Obviously 64 bit constants need to be split up into two 32 bit
  51 ** constants, too. Some hiword instructions can be entirely omitted, e.g.
  52 ** when zero-extending a 32 bit value to 64 bits. 64 bit arguments for calls
  53 ** are split up into two 32 bit arguments each.
  54 **
  55 ** On soft-float targets, floating-point instructions are directly converted
  56 ** to soft-float calls by the SPLIT pass (except for comparisons and MIN/MAX).
  57 ** HIOP for number results has the type IRT_SOFTFP ("sfp" in -jdump).
  58 **
  59 ** Here's the IR and x64 machine code for 'x.b = x.a + 1' for a struct with
  60 ** two int64_t fields:
  61 **
  62 ** 0100    p32 ADD    base  +8
  63 ** 0101    i64 XLOAD  0100
  64 ** 0102    i64 ADD    0101  +1
  65 ** 0103    p32 ADD    base  +16
  66 ** 0104    i64 XSTORE 0103  0102
  67 **
  68 **         mov rax, [esi+0x8]
  69 **         add rax, +0x01
  70 **         mov [esi+0x10], rax
  71 **
  72 ** Here's the transformed IR and the x86 machine code after the SPLIT pass:
  73 **
  74 ** 0100    p32 ADD    base  +8
  75 ** 0101    int XLOAD  0100
  76 ** 0102    p32 ADD    base  +12
  77 ** 0103    int XLOAD  0102
  78 ** 0104    int ADD    0101  +1
  79 ** 0105    int HIOP   0103  +0
  80 ** 0106    p32 ADD    base  +16
  81 ** 0107    int XSTORE 0106  0104
  82 ** 0108    int HIOP   0106  0105
  83 **
  84 **         mov eax, [esi+0x8]
  85 **         mov ecx, [esi+0xc]
  86 **         add eax, +0x01
  87 **         adc ecx, +0x00
  88 **         mov [esi+0x10], eax
  89 **         mov [esi+0x14], ecx
  90 **
  91 ** You may notice the reassociated hiword address computation, which is
  92 ** later fused into the mov operands by the assembler.
  93 */
  94 
  95 /* Some local macros to save typing. Undef'd at the end. */
  96 #define IR(ref)         (&J->cur.ir[(ref)])
  97 
  98 /* Directly emit the transformed IR without updating chains etc. */
  99 static IRRef split_emit(jit_State *J, uint16_t ot, IRRef1 op1, IRRef1 op2)
 100 {
 101   IRRef nref = lj_ir_nextins(J);
 102   IRIns *ir = IR(nref);
 103   ir->ot = ot;
 104   ir->op1 = op1;
 105   ir->op2 = op2;
 106   return nref;
 107 }
 108 
 109 #if LJ_SOFTFP
 110 /* Emit a (checked) number to integer conversion. */
 111 static IRRef split_num2int(jit_State *J, IRRef lo, IRRef hi, int check)
 112 {
 113   IRRef tmp, res;
 114 #if LJ_LE
 115   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), lo, hi);
 116 #else
 117   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hi, lo);
 118 #endif
 119   res = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_softfp_d2i);
 120   if (check) {
 121     tmp = split_emit(J, IRTI(IR_CALLN), res, IRCALL_softfp_i2d);
 122     split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 123     split_emit(J, IRTGI(IR_EQ), tmp, lo);
 124     split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP), tmp+1, hi);
 125   }
 126   return res;
 127 }
 128 
 129 /* Emit a CALLN with one split 64 bit argument. */
 130 static IRRef split_call_l(jit_State *J, IRRef1 *hisubst, IRIns *oir,
 131                           IRIns *ir, IRCallID id)
 132 {
 133   IRRef tmp, op1 = ir->op1;
 134   J->cur.nins--;
 135 #if LJ_LE
 136   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
 137 #else
 138   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
 139 #endif
 140   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
 141   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 142 }
 143 #endif
 144 
 145 /* Emit a CALLN with one split 64 bit argument and a 32 bit argument. */
 146 static IRRef split_call_li(jit_State *J, IRRef1 *hisubst, IRIns *oir,
 147                            IRIns *ir, IRCallID id)
 148 {
 149   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
 150   J->cur.nins--;
 151 #if LJ_LE
 152   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
 153 #else
 154   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
 155 #endif
 156   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
 157   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
 158   return split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 159 }
 160 
 161 /* Emit a CALLN with two split 64 bit arguments. */
 162 static IRRef split_call_ll(jit_State *J, IRRef1 *hisubst, IRIns *oir,
 163                            IRIns *ir, IRCallID id)
 164 {
 165   IRRef tmp, op1 = ir->op1, op2 = ir->op2;
 166   J->cur.nins--;
 167 #if LJ_LE
 168   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
 169   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
 170   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
 171 #else
 172   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
 173   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, hisubst[op2]);
 174   tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, oir[op2].prev);
 175 #endif
 176   ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, id);
 177   return split_emit(J,
 178     IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
 179     tmp, tmp);
 180 }
 181 
 182 /* Get a pointer to the other 32 bit word (LE: hiword, BE: loword). */
 183 static IRRef split_ptr(jit_State *J, IRIns *oir, IRRef ref)
 184 {
 185   IRRef nref = oir[ref].prev;
 186   IRIns *ir = IR(nref);
 187   int32_t ofs = 4;
 188   if (ir->o == IR_KPTR)
 189     return lj_ir_kptr(J, (char *)ir_kptr(ir) + ofs);
 190   if (ir->o == IR_ADD && irref_isk(ir->op2) && !irt_isphi(oir[ref].t)) {
 191     /* Reassociate address. */
 192     ofs += IR(ir->op2)->i;
 193     nref = ir->op1;
 194     if (ofs == 0) return nref;
 195   }
 196   return split_emit(J, IRT(IR_ADD, IRT_PTR), nref, lj_ir_kint(J, ofs));
 197 }
 198 
 199 #if LJ_HASFFI
 200 static IRRef split_bitshift(jit_State *J, IRRef1 *hisubst,
 201                             IRIns *oir, IRIns *nir, IRIns *ir)
 202 {
 203   IROp op = ir->o;
 204   IRRef kref = nir->op2;
 205   if (irref_isk(kref)) {  /* Optimize constant shifts. */
 206     int32_t k = (IR(kref)->i & 63);
 207     IRRef lo = nir->op1, hi = hisubst[ir->op1];
 208     if (op == IR_BROL || op == IR_BROR) {
 209       if (op == IR_BROR) k = (-k & 63);
 210       if (k >= 32) { IRRef t = lo; lo = hi; hi = t; k -= 32; }
 211       if (k == 0) {
 212       passthrough:
 213         J->cur.nins--;
 214         ir->prev = lo;
 215         return hi;
 216       } else {
 217         TRef k1, k2;
 218         IRRef t1, t2, t3, t4;
 219         J->cur.nins--;
 220         k1 = lj_ir_kint(J, k);
 221         k2 = lj_ir_kint(J, (-k & 31));
 222         t1 = split_emit(J, IRTI(IR_BSHL), lo, k1);
 223         t2 = split_emit(J, IRTI(IR_BSHL), hi, k1);
 224         t3 = split_emit(J, IRTI(IR_BSHR), lo, k2);
 225         t4 = split_emit(J, IRTI(IR_BSHR), hi, k2);
 226         ir->prev = split_emit(J, IRTI(IR_BOR), t1, t4);
 227         return split_emit(J, IRTI(IR_BOR), t2, t3);
 228       }
 229     } else if (k == 0) {
 230       goto passthrough;
 231     } else if (k < 32) {
 232       if (op == IR_BSHL) {
 233         IRRef t1 = split_emit(J, IRTI(IR_BSHL), hi, kref);
 234         IRRef t2 = split_emit(J, IRTI(IR_BSHR), lo, lj_ir_kint(J, (-k&31)));
 235         return split_emit(J, IRTI(IR_BOR), t1, t2);
 236       } else {
 237         IRRef t1 = ir->prev, t2;
 238         lua_assert(op == IR_BSHR || op == IR_BSAR);
 239         nir->o = IR_BSHR;
 240         t2 = split_emit(J, IRTI(IR_BSHL), hi, lj_ir_kint(J, (-k&31)));
 241         ir->prev = split_emit(J, IRTI(IR_BOR), t1, t2);
 242         return split_emit(J, IRTI(op), hi, kref);
 243       }
 244     } else {
 245       if (op == IR_BSHL) {
 246         if (k == 32)
 247           J->cur.nins--;
 248         else
 249           lo = ir->prev;
 250         ir->prev = lj_ir_kint(J, 0);
 251         return lo;
 252       } else {
 253         lua_assert(op == IR_BSHR || op == IR_BSAR);
 254         if (k == 32) {
 255           J->cur.nins--;
 256           ir->prev = hi;
 257         } else {
 258           nir->op1 = hi;
 259         }
 260         if (op == IR_BSHR)
 261           return lj_ir_kint(J, 0);
 262         else
 263           return split_emit(J, IRTI(IR_BSAR), hi, lj_ir_kint(J, 31));
 264       }
 265     }
 266   }
 267   return split_call_li(J, hisubst, oir, ir,
 268                        op - IR_BSHL + IRCALL_lj_carith_shl64);
 269 }
 270 
 271 static IRRef split_bitop(jit_State *J, IRRef1 *hisubst,
 272                          IRIns *nir, IRIns *ir)
 273 {
 274   IROp op = ir->o;
 275   IRRef hi, kref = nir->op2;
 276   if (irref_isk(kref)) {  /* Optimize bit operations with lo constant. */
 277     int32_t k = IR(kref)->i;
 278     if (k == 0 || k == -1) {
 279       if (op == IR_BAND) k = ~k;
 280       if (k == 0) {
 281         J->cur.nins--;
 282         ir->prev = nir->op1;
 283       } else if (op == IR_BXOR) {
 284         nir->o = IR_BNOT;
 285         nir->op2 = 0;
 286       } else {
 287         J->cur.nins--;
 288         ir->prev = kref;
 289       }
 290     }
 291   }
 292   hi = hisubst[ir->op1];
 293   kref = hisubst[ir->op2];
 294   if (irref_isk(kref)) {  /* Optimize bit operations with hi constant. */
 295     int32_t k = IR(kref)->i;
 296     if (k == 0 || k == -1) {
 297       if (op == IR_BAND) k = ~k;
 298       if (k == 0) {
 299         return hi;
 300       } else if (op == IR_BXOR) {
 301         return split_emit(J, IRTI(IR_BNOT), hi, 0);
 302       } else {
 303         return kref;
 304       }
 305     }
 306   }
 307   return split_emit(J, IRTI(op), hi, kref);
 308 }
 309 #endif
 310 
 311 /* Substitute references of a snapshot. */
 312 static void split_subst_snap(jit_State *J, SnapShot *snap, IRIns *oir)
 313 {
 314   SnapEntry *map = &J->cur.snapmap[snap->mapofs];
 315   MSize n, nent = snap->nent;
 316   for (n = 0; n < nent; n++) {
 317     SnapEntry sn = map[n];
 318     IRIns *ir = &oir[snap_ref(sn)];
 319     if (!(LJ_SOFTFP && (sn & SNAP_SOFTFPNUM) && irref_isk(snap_ref(sn))))
 320       map[n] = ((sn & 0xffff0000) | ir->prev);
 321   }
 322 }
 323 
 324 /* Transform the old IR to the new IR. */
 325 static void split_ir(jit_State *J)
 326 {
 327   IRRef nins = J->cur.nins, nk = J->cur.nk;
 328   MSize irlen = nins - nk;
 329   MSize need = (irlen+1)*(sizeof(IRIns) + sizeof(IRRef1));
 330   IRIns *oir = (IRIns *)lj_buf_tmp(J->L, need);
 331   IRRef1 *hisubst;
 332   IRRef ref, snref;
 333   SnapShot *snap;
 334 
 335   /* Copy old IR to buffer. */
 336   memcpy(oir, IR(nk), irlen*sizeof(IRIns));
 337   /* Bias hiword substitution table and old IR. Loword kept in field prev. */
 338   hisubst = (IRRef1 *)&oir[irlen] - nk;
 339   oir -= nk;
 340 
 341   /* Remove all IR instructions, but retain IR constants. */
 342   J->cur.nins = REF_FIRST;
 343   J->loopref = 0;
 344 
 345   /* Process constants and fixed references. */
 346   for (ref = nk; ref <= REF_BASE; ref++) {
 347     IRIns *ir = &oir[ref];
 348     if ((LJ_SOFTFP && ir->o == IR_KNUM) || ir->o == IR_KINT64) {
 349       /* Split up 64 bit constant. */
 350       TValue tv = *ir_k64(ir);
 351       ir->prev = lj_ir_kint(J, (int32_t)tv.u32.lo);
 352       hisubst[ref] = lj_ir_kint(J, (int32_t)tv.u32.hi);
 353     } else {
 354       ir->prev = ref;  /* Identity substitution for loword. */
 355       hisubst[ref] = 0;
 356     }
 357     if (irt_is64(ir->t) && ir->o != IR_KNULL)
 358       ref++;
 359   }
 360 
 361   /* Process old IR instructions. */
 362   snap = J->cur.snap;
 363   snref = snap->ref;
 364   for (ref = REF_FIRST; ref < nins; ref++) {
 365     IRIns *ir = &oir[ref];
 366     IRRef nref = lj_ir_nextins(J);
 367     IRIns *nir = IR(nref);
 368     IRRef hi = 0;
 369 
 370     if (ref >= snref) {
 371       snap->ref = nref;
 372       split_subst_snap(J, snap++, oir);
 373       snref = snap < &J->cur.snap[J->cur.nsnap] ? snap->ref : ~(IRRef)0;
 374     }
 375 
 376     /* Copy-substitute old instruction to new instruction. */
 377     nir->op1 = ir->op1 < nk ? ir->op1 : oir[ir->op1].prev;
 378     nir->op2 = ir->op2 < nk ? ir->op2 : oir[ir->op2].prev;
 379     ir->prev = nref;  /* Loword substitution. */
 380     nir->o = ir->o;
 381     nir->t.irt = ir->t.irt & ~(IRT_MARK|IRT_ISPHI);
 382     hisubst[ref] = 0;
 383 
 384     /* Split 64 bit instructions. */
 385 #if LJ_SOFTFP
 386     if (irt_isnum(ir->t)) {
 387       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
 388       /* Note: hi ref = lo ref + 1! Required for SNAP_SOFTFPNUM logic. */
 389       switch (ir->o) {
 390       case IR_ADD:
 391         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_add);
 392         break;
 393       case IR_SUB:
 394         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_sub);
 395         break;
 396       case IR_MUL:
 397         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_mul);
 398         break;
 399       case IR_DIV:
 400         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_softfp_div);
 401         break;
 402       case IR_POW:
 403         hi = split_call_li(J, hisubst, oir, ir, IRCALL_lj_vm_powi);
 404         break;
 405       case IR_FPMATH:
 406         /* Try to rejoin pow from EXP2, MUL and LOG2. */
 407         if (nir->op2 == IRFPM_EXP2 && nir->op1 > J->loopref) {
 408           IRIns *irp = IR(nir->op1);
 409           if (irp->o == IR_CALLN && irp->op2 == IRCALL_softfp_mul) {
 410             IRIns *irm4 = IR(irp->op1);
 411             IRIns *irm3 = IR(irm4->op1);
 412             IRIns *irm12 = IR(irm3->op1);
 413             IRIns *irl1 = IR(irm12->op1);
 414             if (irm12->op1 > J->loopref && irl1->o == IR_CALLN &&
 415                 irl1->op2 == IRCALL_lj_vm_log2) {
 416               IRRef tmp = irl1->op1;  /* Recycle first two args from LOG2. */
 417               IRRef arg3 = irm3->op2, arg4 = irm4->op2;
 418               J->cur.nins--;
 419               tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg3);
 420               tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), tmp, arg4);
 421               ir->prev = tmp = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_pow);
 422               hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), tmp, tmp);
 423               break;
 424             }
 425           }
 426         }
 427         hi = split_call_l(J, hisubst, oir, ir, IRCALL_lj_vm_floor + ir->op2);
 428         break;
 429       case IR_ATAN2:
 430         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_atan2);
 431         break;
 432       case IR_LDEXP:
 433         hi = split_call_li(J, hisubst, oir, ir, IRCALL_ldexp);
 434         break;
 435       case IR_NEG: case IR_ABS:
 436         nir->o = IR_CONV;  /* Pass through loword. */
 437         nir->op2 = (IRT_INT << 5) | IRT_INT;
 438         hi = split_emit(J, IRT(ir->o == IR_NEG ? IR_BXOR : IR_BAND, IRT_SOFTFP),
 439                hisubst[ir->op1],
 440                lj_ir_kint(J, (int32_t)(0x7fffffffu + (ir->o == IR_NEG))));
 441         break;
 442       case IR_SLOAD:
 443         if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from int to number. */
 444           nir->op2 &= ~IRSLOAD_CONVERT;
 445           ir->prev = nref = split_emit(J, IRTI(IR_CALLN), nref,
 446                                        IRCALL_softfp_i2d);
 447           hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
 448           break;
 449         }
 450         /* fallthrough */
 451       case IR_ALOAD: case IR_HLOAD: case IR_ULOAD: case IR_VLOAD:
 452       case IR_STRTO:
 453         hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
 454         break;
 455       case IR_FLOAD:
 456         lua_assert(ir->op1 == REF_NIL);
 457         hi = lj_ir_kint(J, *(int32_t*)((char*)J2GG(J) + ir->op2 + LJ_LE*4));
 458         nir->op2 += LJ_BE*4;
 459         break;
 460       case IR_XLOAD: {
 461         IRIns inslo = *nir;  /* Save/undo the emit of the lo XLOAD. */
 462         J->cur.nins--;
 463         hi = split_ptr(J, oir, ir->op1);  /* Insert the hiref ADD. */
 464 #if LJ_BE
 465         hi = split_emit(J, IRT(IR_XLOAD, IRT_INT), hi, ir->op2);
 466         inslo.t.irt = IRT_SOFTFP | (inslo.t.irt & IRT_GUARD);
 467 #endif
 468         nref = lj_ir_nextins(J);
 469         nir = IR(nref);
 470         *nir = inslo;  /* Re-emit lo XLOAD. */
 471 #if LJ_LE
 472         hi = split_emit(J, IRT(IR_XLOAD, IRT_SOFTFP), hi, ir->op2);
 473         ir->prev = nref;
 474 #else
 475         ir->prev = hi; hi = nref;
 476 #endif
 477         break;
 478         }
 479       case IR_ASTORE: case IR_HSTORE: case IR_USTORE: case IR_XSTORE:
 480         split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nir->op1, hisubst[ir->op2]);
 481         break;
 482       case IR_CONV: {  /* Conversion to number. Others handled below. */
 483         IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
 484         UNUSED(st);
 485 #if LJ_32 && LJ_HASFFI
 486         if (st == IRT_I64 || st == IRT_U64) {
 487           hi = split_call_l(J, hisubst, oir, ir,
 488                  st == IRT_I64 ? IRCALL_fp64_l2d : IRCALL_fp64_ul2d);
 489           break;
 490         }
 491 #endif
 492         lua_assert(st == IRT_INT ||
 493                    (LJ_32 && LJ_HASFFI && (st == IRT_U32 || st == IRT_FLOAT)));
 494         nir->o = IR_CALLN;
 495 #if LJ_32 && LJ_HASFFI
 496         nir->op2 = st == IRT_INT ? IRCALL_softfp_i2d :
 497                    st == IRT_FLOAT ? IRCALL_softfp_f2d :
 498                    IRCALL_softfp_ui2d;
 499 #else
 500         nir->op2 = IRCALL_softfp_i2d;
 501 #endif
 502         hi = split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
 503         break;
 504         }
 505       case IR_CALLN:
 506       case IR_CALLL:
 507       case IR_CALLS:
 508       case IR_CALLXS:
 509         goto split_call;
 510       case IR_PHI:
 511         if (nir->op1 == nir->op2)
 512           J->cur.nins--;  /* Drop useless PHIs. */
 513         if (hisubst[ir->op1] != hisubst[ir->op2])
 514           split_emit(J, IRT(IR_PHI, IRT_SOFTFP),
 515                      hisubst[ir->op1], hisubst[ir->op2]);
 516         break;
 517       case IR_HIOP:
 518         J->cur.nins--;  /* Drop joining HIOP. */
 519         ir->prev = nir->op1;
 520         hi = nir->op2;
 521         break;
 522       default:
 523         lua_assert(ir->o <= IR_NE || ir->o == IR_MIN || ir->o == IR_MAX);
 524         hi = split_emit(J, IRTG(IR_HIOP, IRT_SOFTFP),
 525                         hisubst[ir->op1], hisubst[ir->op2]);
 526         break;
 527       }
 528     } else
 529 #endif
 530 #if LJ_32 && LJ_HASFFI
 531     if (irt_isint64(ir->t)) {
 532       IRRef hiref = hisubst[ir->op1];
 533       nir->t.irt = IRT_INT | (nir->t.irt & IRT_GUARD);  /* Turn into INT op. */
 534       switch (ir->o) {
 535       case IR_ADD:
 536       case IR_SUB:
 537         /* Use plain op for hiword if loword cannot produce a carry/borrow. */
 538         if (irref_isk(nir->op2) && IR(nir->op2)->i == 0) {
 539           ir->prev = nir->op1;  /* Pass through loword. */
 540           nir->op1 = hiref; nir->op2 = hisubst[ir->op2];
 541           hi = nref;
 542           break;
 543         }
 544         /* fallthrough */
 545       case IR_NEG:
 546         hi = split_emit(J, IRTI(IR_HIOP), hiref, hisubst[ir->op2]);
 547         break;
 548       case IR_MUL:
 549         hi = split_call_ll(J, hisubst, oir, ir, IRCALL_lj_carith_mul64);
 550         break;
 551       case IR_DIV:
 552         hi = split_call_ll(J, hisubst, oir, ir,
 553                            irt_isi64(ir->t) ? IRCALL_lj_carith_divi64 :
 554                                               IRCALL_lj_carith_divu64);
 555         break;
 556       case IR_MOD:
 557         hi = split_call_ll(J, hisubst, oir, ir,
 558                            irt_isi64(ir->t) ? IRCALL_lj_carith_modi64 :
 559                                               IRCALL_lj_carith_modu64);
 560         break;
 561       case IR_POW:
 562         hi = split_call_ll(J, hisubst, oir, ir,
 563                            irt_isi64(ir->t) ? IRCALL_lj_carith_powi64 :
 564                                               IRCALL_lj_carith_powu64);
 565         break;
 566       case IR_BNOT:
 567         hi = split_emit(J, IRTI(IR_BNOT), hiref, 0);
 568         break;
 569       case IR_BSWAP:
 570         ir->prev = split_emit(J, IRTI(IR_BSWAP), hiref, 0);
 571         hi = nref;
 572         break;
 573       case IR_BAND: case IR_BOR: case IR_BXOR:
 574         hi = split_bitop(J, hisubst, nir, ir);
 575         break;
 576       case IR_BSHL: case IR_BSHR: case IR_BSAR: case IR_BROL: case IR_BROR:
 577         hi = split_bitshift(J, hisubst, oir, nir, ir);
 578         break;
 579       case IR_FLOAD:
 580         lua_assert(ir->op2 == IRFL_CDATA_INT64);
 581         hi = split_emit(J, IRTI(IR_FLOAD), nir->op1, IRFL_CDATA_INT64_4);
 582 #if LJ_BE
 583         ir->prev = hi; hi = nref;
 584 #endif
 585         break;
 586       case IR_XLOAD:
 587         hi = split_emit(J, IRTI(IR_XLOAD), split_ptr(J, oir, ir->op1), ir->op2);
 588 #if LJ_BE
 589         ir->prev = hi; hi = nref;
 590 #endif
 591         break;
 592       case IR_XSTORE:
 593         split_emit(J, IRTI(IR_HIOP), nir->op1, hisubst[ir->op2]);
 594         break;
 595       case IR_CONV: {  /* Conversion to 64 bit integer. Others handled below. */
 596         IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
 597 #if LJ_SOFTFP
 598         if (st == IRT_NUM) {  /* NUM to 64 bit int conv. */
 599           hi = split_call_l(J, hisubst, oir, ir,
 600                  irt_isi64(ir->t) ? IRCALL_fp64_d2l : IRCALL_fp64_d2ul);
 601         } else if (st == IRT_FLOAT) {  /* FLOAT to 64 bit int conv. */
 602           nir->o = IR_CALLN;
 603           nir->op2 = irt_isi64(ir->t) ? IRCALL_fp64_f2l : IRCALL_fp64_f2ul;
 604           hi = split_emit(J, IRTI(IR_HIOP), nref, nref);
 605         }
 606 #else
 607         if (st == IRT_NUM || st == IRT_FLOAT) {  /* FP to 64 bit int conv. */
 608           hi = split_emit(J, IRTI(IR_HIOP), nir->op1, nref);
 609         }
 610 #endif
 611         else if (st == IRT_I64 || st == IRT_U64) {  /* 64/64 bit cast. */
 612           /* Drop cast, since assembler doesn't care. But fwd both parts. */
 613           hi = hiref;
 614           goto fwdlo;
 615         } else if ((ir->op2 & IRCONV_SEXT)) {  /* Sign-extend to 64 bit. */
 616           IRRef k31 = lj_ir_kint(J, 31);
 617           nir = IR(nref);  /* May have been reallocated. */
 618           ir->prev = nir->op1;  /* Pass through loword. */
 619           nir->o = IR_BSAR;  /* hi = bsar(lo, 31). */
 620           nir->op2 = k31;
 621           hi = nref;
 622         } else {  /* Zero-extend to 64 bit. */
 623           hi = lj_ir_kint(J, 0);
 624           goto fwdlo;
 625         }
 626         break;
 627         }
 628       case IR_CALLXS:
 629         goto split_call;
 630       case IR_PHI: {
 631         IRRef hiref2;
 632         if ((irref_isk(nir->op1) && irref_isk(nir->op2)) ||
 633             nir->op1 == nir->op2)
 634           J->cur.nins--;  /* Drop useless PHIs. */
 635         hiref2 = hisubst[ir->op2];
 636         if (!((irref_isk(hiref) && irref_isk(hiref2)) || hiref == hiref2))
 637           split_emit(J, IRTI(IR_PHI), hiref, hiref2);
 638         break;
 639         }
 640       case IR_HIOP:
 641         J->cur.nins--;  /* Drop joining HIOP. */
 642         ir->prev = nir->op1;
 643         hi = nir->op2;
 644         break;
 645       default:
 646         lua_assert(ir->o <= IR_NE);  /* Comparisons. */
 647         split_emit(J, IRTGI(IR_HIOP), hiref, hisubst[ir->op2]);
 648         break;
 649       }
 650     } else
 651 #endif
 652 #if LJ_SOFTFP
 653     if (ir->o == IR_SLOAD) {
 654       if ((nir->op2 & IRSLOAD_CONVERT)) {  /* Convert from number to int. */
 655         nir->op2 &= ~IRSLOAD_CONVERT;
 656         if (!(nir->op2 & IRSLOAD_TYPECHECK))
 657           nir->t.irt = IRT_INT;  /* Drop guard. */
 658         split_emit(J, IRT(IR_HIOP, IRT_SOFTFP), nref, nref);
 659         ir->prev = split_num2int(J, nref, nref+1, irt_isguard(ir->t));
 660       }
 661     } else if (ir->o == IR_TOBIT) {
 662       IRRef tmp, op1 = ir->op1;
 663       J->cur.nins--;
 664 #if LJ_LE
 665       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), oir[op1].prev, hisubst[op1]);
 666 #else
 667       tmp = split_emit(J, IRT(IR_CARG, IRT_NIL), hisubst[op1], oir[op1].prev);
 668 #endif
 669       ir->prev = split_emit(J, IRTI(IR_CALLN), tmp, IRCALL_lj_vm_tobit);
 670     } else if (ir->o == IR_TOSTR) {
 671       if (hisubst[ir->op1]) {
 672         if (irref_isk(ir->op1))
 673           nir->op1 = ir->op1;
 674         else
 675           split_emit(J, IRT(IR_HIOP, IRT_NIL), hisubst[ir->op1], nref);
 676       }
 677     } else if (ir->o == IR_HREF || ir->o == IR_NEWREF) {
 678       if (irref_isk(ir->op2) && hisubst[ir->op2])
 679         nir->op2 = ir->op2;
 680     } else
 681 #endif
 682     if (ir->o == IR_CONV) {  /* See above, too. */
 683       IRType st = (IRType)(ir->op2 & IRCONV_SRCMASK);
 684 #if LJ_32 && LJ_HASFFI
 685       if (st == IRT_I64 || st == IRT_U64) {  /* Conversion from 64 bit int. */
 686 #if LJ_SOFTFP
 687         if (irt_isfloat(ir->t)) {
 688           split_call_l(J, hisubst, oir, ir,
 689                        st == IRT_I64 ? IRCALL_fp64_l2f : IRCALL_fp64_ul2f);
 690           J->cur.nins--;  /* Drop unused HIOP. */
 691         }
 692 #else
 693         if (irt_isfp(ir->t)) {  /* 64 bit integer to FP conversion. */
 694           ir->prev = split_emit(J, IRT(IR_HIOP, irt_type(ir->t)),
 695                                 hisubst[ir->op1], nref);
 696         }
 697 #endif
 698         else {  /* Truncate to lower 32 bits. */
 699         fwdlo:
 700           ir->prev = nir->op1;  /* Forward loword. */
 701           /* Replace with NOP to avoid messing up the snapshot logic. */
 702           nir->ot = IRT(IR_NOP, IRT_NIL);
 703           nir->op1 = nir->op2 = 0;
 704         }
 705       }
 706 #endif
 707 #if LJ_SOFTFP && LJ_32 && LJ_HASFFI
 708       else if (irt_isfloat(ir->t)) {
 709         if (st == IRT_NUM) {
 710           split_call_l(J, hisubst, oir, ir, IRCALL_softfp_d2f);
 711           J->cur.nins--;  /* Drop unused HIOP. */
 712         } else {
 713           nir->o = IR_CALLN;
 714           nir->op2 = st == IRT_INT ? IRCALL_softfp_i2f : IRCALL_softfp_ui2f;
 715         }
 716       } else if (st == IRT_FLOAT) {
 717         nir->o = IR_CALLN;
 718         nir->op2 = irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui;
 719       } else
 720 #endif
 721 #if LJ_SOFTFP
 722       if (st == IRT_NUM || (LJ_32 && LJ_HASFFI && st == IRT_FLOAT)) {
 723         if (irt_isguard(ir->t)) {
 724           lua_assert(st == IRT_NUM && irt_isint(ir->t));
 725           J->cur.nins--;
 726           ir->prev = split_num2int(J, nir->op1, hisubst[ir->op1], 1);
 727         } else {
 728           split_call_l(J, hisubst, oir, ir,
 729 #if LJ_32 && LJ_HASFFI
 730             st == IRT_NUM ?
 731               (irt_isint(ir->t) ? IRCALL_softfp_d2i : IRCALL_softfp_d2ui) :
 732               (irt_isint(ir->t) ? IRCALL_softfp_f2i : IRCALL_softfp_f2ui)
 733 #else
 734             IRCALL_softfp_d2i
 735 #endif
 736           );
 737           J->cur.nins--;  /* Drop unused HIOP. */
 738         }
 739       }
 740 #endif
 741     } else if (ir->o == IR_CALLXS) {
 742       IRRef hiref;
 743     split_call:
 744       hiref = hisubst[ir->op1];
 745       if (hiref) {
 746         IROpT ot = nir->ot;
 747         IRRef op2 = nir->op2;
 748         nir->ot = IRT(IR_CARG, IRT_NIL);
 749 #if LJ_LE
 750         nir->op2 = hiref;
 751 #else
 752         nir->op2 = nir->op1; nir->op1 = hiref;
 753 #endif
 754         ir->prev = nref = split_emit(J, ot, nref, op2);
 755       }
 756       if (LJ_SOFTFP ? irt_is64(ir->t) : irt_isint64(ir->t))
 757         hi = split_emit(J,
 758           IRT(IR_HIOP, (LJ_SOFTFP && irt_isnum(ir->t)) ? IRT_SOFTFP : IRT_INT),
 759           nref, nref);
 760     } else if (ir->o == IR_CARG) {
 761       IRRef hiref = hisubst[ir->op1];
 762       if (hiref) {
 763         IRRef op2 = nir->op2;
 764 #if LJ_LE
 765         nir->op2 = hiref;
 766 #else
 767         nir->op2 = nir->op1; nir->op1 = hiref;
 768 #endif
 769         ir->prev = nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
 770         nir = IR(nref);
 771       }
 772       hiref = hisubst[ir->op2];
 773       if (hiref) {
 774 #if !LJ_TARGET_X86
 775         int carg = 0;
 776         IRIns *cir;
 777         for (cir = IR(nir->op1); cir->o == IR_CARG; cir = IR(cir->op1))
 778           carg++;
 779         if ((carg & 1) == 0) {  /* Align 64 bit arguments. */
 780           IRRef op2 = nir->op2;
 781           nir->op2 = REF_NIL;
 782           nref = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, op2);
 783           nir = IR(nref);
 784         }
 785 #endif
 786 #if LJ_BE
 787         { IRRef tmp = nir->op2; nir->op2 = hiref; hiref = tmp; }
 788 #endif
 789         ir->prev = split_emit(J, IRT(IR_CARG, IRT_NIL), nref, hiref);
 790       }
 791     } else if (ir->o == IR_CNEWI) {
 792       if (hisubst[ir->op2])
 793         split_emit(J, IRT(IR_HIOP, IRT_NIL), nref, hisubst[ir->op2]);
 794     } else if (ir->o == IR_LOOP) {
 795       J->loopref = nref;  /* Needed by assembler. */
 796     }
 797     hisubst[ref] = hi;  /* Store hiword substitution. */
 798   }
 799   if (snref == nins) {  /* Substitution for last snapshot. */
 800     snap->ref = J->cur.nins;
 801     split_subst_snap(J, snap, oir);
 802   }
 803 
 804   /* Add PHI marks. */
 805   for (ref = J->cur.nins-1; ref >= REF_FIRST; ref--) {
 806     IRIns *ir = IR(ref);
 807     if (ir->o != IR_PHI) break;
 808     if (!irref_isk(ir->op1)) irt_setphi(IR(ir->op1)->t);
 809     if (ir->op2 > J->loopref) irt_setphi(IR(ir->op2)->t);
 810   }
 811 }
 812 
 813 /* Protected callback for split pass. */
 814 static TValue *cpsplit(lua_State *L, lua_CFunction dummy, void *ud)
 815 {
 816   jit_State *J = (jit_State *)ud;
 817   split_ir(J);
 818   UNUSED(L); UNUSED(dummy);
 819   return NULL;
 820 }
 821 
 822 #if defined(LUA_USE_ASSERT) || LJ_SOFTFP
 823 /* Slow, but sure way to check whether a SPLIT pass is needed. */
 824 static int split_needsplit(jit_State *J)
 825 {
 826   IRIns *ir, *irend;
 827   IRRef ref;
 828   for (ir = IR(REF_FIRST), irend = IR(J->cur.nins); ir < irend; ir++)
 829     if (LJ_SOFTFP ? irt_is64orfp(ir->t) : irt_isint64(ir->t))
 830       return 1;
 831   if (LJ_SOFTFP) {
 832     for (ref = J->chain[IR_SLOAD]; ref; ref = IR(ref)->prev)
 833       if ((IR(ref)->op2 & IRSLOAD_CONVERT))
 834         return 1;
 835     if (J->chain[IR_TOBIT])
 836       return 1;
 837   }
 838   for (ref = J->chain[IR_CONV]; ref; ref = IR(ref)->prev) {
 839     IRType st = (IR(ref)->op2 & IRCONV_SRCMASK);
 840     if ((LJ_SOFTFP && (st == IRT_NUM || st == IRT_FLOAT)) ||
 841         st == IRT_I64 || st == IRT_U64)
 842       return 1;
 843   }
 844   return 0;  /* Nope. */
 845 }
 846 #endif
 847 
 848 /* SPLIT pass. */
 849 void lj_opt_split(jit_State *J)
 850 {
 851 #if LJ_SOFTFP
 852   if (!J->needsplit)
 853     J->needsplit = split_needsplit(J);
 854 #else
 855   lua_assert(J->needsplit >= split_needsplit(J));  /* Verify flag. */
 856 #endif
 857   if (J->needsplit) {
 858     int errcode = lj_vm_cpcall(J->L, NULL, J, cpsplit);
 859     if (errcode) {
 860       /* Completely reset the trace to avoid inconsistent dump on abort. */
 861       J->cur.nins = J->cur.nk = REF_BASE;
 862       J->cur.nsnap = 0;
 863       lj_err_throw(J->L, errcode);  /* Propagate errors. */
 864     }
 865   }
 866 }
 867 
 868 #undef IR
 869 
 870 #endif

/* [<][>][^][v][top][bottom][index][help] */