root/lj_emit_arm64.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. get_k64val
  2. emit_isk12
  3. emit_isk13
  4. emit_isfpk64
  5. emit_dnma
  6. emit_dnm
  7. emit_dm
  8. emit_dn
  9. emit_nm
  10. emit_d
  11. emit_n
  12. emit_checkofs
  13. emit_lso
  14. emit_kdelta
  15. emit_loadk
  16. emit_lsptr
  17. emit_loadk64
  18. emit_cond_branch
  19. emit_branch
  20. emit_tnb
  21. emit_cnb
  22. emit_call
  23. emit_movrr
  24. emit_loadofs
  25. emit_storeofs
  26. emit_opk
  27. emit_addptr

   1 /*
   2 ** ARM64 instruction emitter.
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 **
   5 ** Contributed by Djordje Kovacevic and Stefan Pejic from RT-RK.com.
   6 ** Sponsored by Cisco Systems, Inc.
   7 */
   8 
   9 /* -- Constant encoding --------------------------------------------------- */
  10 
  11 static uint64_t get_k64val(IRIns *ir)
  12 {
  13   if (ir->o == IR_KINT64) {
  14     return ir_kint64(ir)->u64;
  15   } else if (ir->o == IR_KGC) {
  16     return (uint64_t)ir_kgc(ir);
  17   } else if (ir->o == IR_KPTR || ir->o == IR_KKPTR) {
  18     return (uint64_t)ir_kptr(ir);
  19   } else {
  20     lua_assert(ir->o == IR_KINT || ir->o == IR_KNULL);
  21     return ir->i;  /* Sign-extended. */
  22   }
  23 }
  24 
  25 /* Encode constant in K12 format for data processing instructions. */
  26 static uint32_t emit_isk12(int64_t n)
  27 {
  28   uint64_t k = (n < 0) ? -n : n;
  29   uint32_t m = (n < 0) ? 0x40000000 : 0;
  30   if (k < 0x1000) {
  31     return A64I_K12|m|A64F_U12(k);
  32   } else if ((k & 0xfff000) == k) {
  33     return A64I_K12|m|0x400000|A64F_U12(k>>12);
  34   }
  35   return 0;
  36 }
  37 
  38 #define emit_clz64(n)   __builtin_clzll(n)
  39 #define emit_ctz64(n)   __builtin_ctzll(n)
  40 
  41 /* Encode constant in K13 format for logical data processing instructions. */
  42 static uint32_t emit_isk13(uint64_t n, int is64)
  43 {
  44   int inv = 0, w = 128, lz, tz;
  45   if (n & 1) { n = ~n; w = 64; inv = 1; }  /* Avoid wrap-around of ones. */
  46   if (!n) return 0;  /* Neither all-zero nor all-ones are allowed. */
  47   do {  /* Find the repeat width. */
  48     if (is64 && (uint32_t)(n^(n>>32))) break;
  49     n = (uint32_t)n;
  50     if (!n) return 0;  /* Ditto when passing n=0xffffffff and is64=0. */
  51     w = 32; if ((n^(n>>16)) & 0xffff) break;
  52     n = n & 0xffff; w = 16; if ((n^(n>>8)) & 0xff) break;
  53     n = n & 0xff; w = 8; if ((n^(n>>4)) & 0xf) break;
  54     n = n & 0xf; w = 4; if ((n^(n>>2)) & 0x3) break;
  55     n = n & 0x3; w = 2;
  56   } while (0);
  57   lz = emit_clz64(n);
  58   tz = emit_ctz64(n);
  59   if ((int64_t)(n << lz) >> (lz+tz) != -1ll) return 0; /* Non-contiguous? */
  60   if (inv)
  61     return A64I_K13 | (((lz-w) & 127) << 16) | (((lz+tz-w-1) & 63) << 10);
  62   else
  63     return A64I_K13 | ((w-tz) << 16) | (((63-lz-tz-w-w) & 63) << 10);
  64 }
  65 
  66 static uint32_t emit_isfpk64(uint64_t n)
  67 {
  68   uint64_t etop9 = ((n >> 54) & 0x1ff);
  69   if ((n << 16) == 0 && (etop9 == 0x100 || etop9 == 0x0ff)) {
  70     return (uint32_t)(((n >> 48) & 0x7f) | ((n >> 56) & 0x80));
  71   }
  72   return ~0u;
  73 }
  74 
  75 /* -- Emit basic instructions --------------------------------------------- */
  76 
  77 static void emit_dnma(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm, Reg ra)
  78 {
  79   *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm) | A64F_A(ra);
  80 }
  81 
  82 static void emit_dnm(ASMState *as, A64Ins ai, Reg rd, Reg rn, Reg rm)
  83 {
  84   *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_M(rm);
  85 }
  86 
  87 static void emit_dm(ASMState *as, A64Ins ai, Reg rd, Reg rm)
  88 {
  89   *--as->mcp = ai | A64F_D(rd) | A64F_M(rm);
  90 }
  91 
  92 static void emit_dn(ASMState *as, A64Ins ai, Reg rd, Reg rn)
  93 {
  94   *--as->mcp = ai | A64F_D(rd) | A64F_N(rn);
  95 }
  96 
  97 static void emit_nm(ASMState *as, A64Ins ai, Reg rn, Reg rm)
  98 {
  99   *--as->mcp = ai | A64F_N(rn) | A64F_M(rm);
 100 }
 101 
 102 static void emit_d(ASMState *as, A64Ins ai, Reg rd)
 103 {
 104   *--as->mcp = ai | A64F_D(rd);
 105 }
 106 
 107 static void emit_n(ASMState *as, A64Ins ai, Reg rn)
 108 {
 109   *--as->mcp = ai | A64F_N(rn);
 110 }
 111 
 112 static int emit_checkofs(A64Ins ai, int64_t ofs)
 113 {
 114   int scale = (ai >> 30) & 3;
 115   if (ofs < 0 || (ofs & ((1<<scale)-1))) {
 116     return (ofs >= -256 && ofs <= 255) ? -1 : 0;
 117   } else {
 118     return (ofs < (4096<<scale)) ? 1 : 0;
 119   }
 120 }
 121 
 122 static void emit_lso(ASMState *as, A64Ins ai, Reg rd, Reg rn, int64_t ofs)
 123 {
 124   int ot = emit_checkofs(ai, ofs), sc = (ai >> 30) & 3;
 125   lua_assert(ot);
 126   /* Combine LDR/STR pairs to LDP/STP. */
 127   if ((sc == 2 || sc == 3) &&
 128       (!(ai & 0x400000) || rd != rn) &&
 129       as->mcp != as->mcloop) {
 130     uint32_t prev = *as->mcp & ~A64F_D(31);
 131     int ofsm = ofs - (1<<sc), ofsp = ofs + (1<<sc);
 132     A64Ins aip;
 133     if (prev == (ai | A64F_N(rn) | A64F_U12(ofsm>>sc)) ||
 134         prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsm&0x1ff))) {
 135       aip = (A64F_A(rd) | A64F_D(*as->mcp & 31));
 136     } else if (prev == (ai | A64F_N(rn) | A64F_U12(ofsp>>sc)) ||
 137                prev == ((ai^A64I_LS_U) | A64F_N(rn) | A64F_S9(ofsp&0x1ff))) {
 138       aip = (A64F_D(rd) | A64F_A(*as->mcp & 31));
 139       ofsm = ofs;
 140     } else {
 141       goto nopair;
 142     }
 143     if (ofsm >= (int)((unsigned int)-64<<sc) && ofsm <= (63<<sc)) {
 144       *as->mcp = aip | A64F_N(rn) | ((ofsm >> sc) << 15) |
 145         (ai ^ ((ai == A64I_LDRx || ai == A64I_STRx) ? 0x50000000 : 0x90000000));
 146       return;
 147     }
 148   }
 149 nopair:
 150   if (ot == 1)
 151     *--as->mcp = ai | A64F_D(rd) | A64F_N(rn) | A64F_U12(ofs >> sc);
 152   else
 153     *--as->mcp = (ai^A64I_LS_U) | A64F_D(rd) | A64F_N(rn) | A64F_S9(ofs & 0x1ff);
 154 }
 155 
 156 /* -- Emit loads/stores --------------------------------------------------- */
 157 
 158 /* Prefer rematerialization of BASE/L from global_State over spills. */
 159 #define emit_canremat(ref)      ((ref) <= ASMREF_L)
 160 
 161 /* Try to find an N-step delta relative to other consts with N < lim. */
 162 static int emit_kdelta(ASMState *as, Reg rd, uint64_t k, int lim)
 163 {
 164   RegSet work = ~as->freeset & RSET_GPR;
 165   if (lim <= 1) return 0;  /* Can't beat that. */
 166   while (work) {
 167     Reg r = rset_picktop(work);
 168     IRRef ref = regcost_ref(as->cost[r]);
 169     lua_assert(r != rd);
 170     if (ref < REF_TRUE) {
 171       uint64_t kx = ra_iskref(ref) ? (uint64_t)ra_krefk(as, ref) :
 172                                      get_k64val(IR(ref));
 173       int64_t delta = (int64_t)(k - kx);
 174       if (delta == 0) {
 175         emit_dm(as, A64I_MOVx, rd, r);
 176         return 1;
 177       } else {
 178         uint32_t k12 = emit_isk12(delta < 0 ? -delta : delta);
 179         if (k12) {
 180           emit_dn(as, (delta < 0 ? A64I_SUBx : A64I_ADDx)^k12, rd, r);
 181           return 1;
 182         }
 183         /* Do other ops or multi-step deltas pay off? Probably not.
 184         ** E.g. XOR rarely helps with pointer consts.
 185         */
 186       }
 187     }
 188     rset_clear(work, r);
 189   }
 190   return 0;  /* Failed. */
 191 }
 192 
 193 static void emit_loadk(ASMState *as, Reg rd, uint64_t u64, int is64)
 194 {
 195   uint32_t k13 = emit_isk13(u64, is64);
 196   if (k13) {  /* Can the constant be represented as a bitmask immediate? */
 197     emit_dn(as, (is64|A64I_ORRw)^k13, rd, RID_ZERO);
 198   } else {
 199     int i, zeros = 0, ones = 0, neg;
 200     if (!is64) u64 = (int64_t)(int32_t)u64;  /* Sign-extend. */
 201     /* Count homogeneous 16 bit fragments. */
 202     for (i = 0; i < 4; i++) {
 203       uint64_t frag = (u64 >> i*16) & 0xffff;
 204       zeros += (frag == 0);
 205       ones += (frag == 0xffff);
 206     }
 207     neg = ones > zeros;  /* Use MOVN if it pays off. */
 208     if (!emit_kdelta(as, rd, u64, 4 - (neg ? ones : zeros))) {
 209       int shift = 0, lshift = 0;
 210       uint64_t n64 = neg ? ~u64 : u64;
 211       if (n64 != 0) {
 212         /* Find first/last fragment to be filled. */
 213         shift = (63-emit_clz64(n64)) & ~15;
 214         lshift = emit_ctz64(n64) & ~15;
 215       }
 216       /* MOVK requires the original value (u64). */
 217       while (shift > lshift) {
 218         uint32_t u16 = (u64 >> shift) & 0xffff;
 219         /* Skip fragments that are correctly filled by MOVN/MOVZ. */
 220         if (u16 != (neg ? 0xffff : 0))
 221           emit_d(as, is64 | A64I_MOVKw | A64F_U16(u16) | A64F_LSL16(shift), rd);
 222         shift -= 16;
 223       }
 224       /* But MOVN needs an inverted value (n64). */
 225       emit_d(as, (neg ? A64I_MOVNx : A64I_MOVZx) |
 226                  A64F_U16((n64 >> lshift) & 0xffff) | A64F_LSL16(lshift), rd);
 227     }
 228   }
 229 }
 230 
 231 /* Load a 32 bit constant into a GPR. */
 232 #define emit_loadi(as, rd, i)   emit_loadk(as, rd, i, 0)
 233 
 234 /* Load a 64 bit constant into a GPR. */
 235 #define emit_loadu64(as, rd, i) emit_loadk(as, rd, i, A64I_X)
 236 
 237 #define emit_loada(as, r, addr) emit_loadu64(as, (r), (uintptr_t)(addr))
 238 
 239 #define glofs(as, k) \
 240   ((intptr_t)((uintptr_t)(k) - (uintptr_t)&J2GG(as->J)->g))
 241 #define mcpofs(as, k) \
 242   ((intptr_t)((uintptr_t)(k) - (uintptr_t)(as->mcp - 1)))
 243 #define checkmcpofs(as, k) \
 244   ((((mcpofs(as, k)>>2) + 0x00040000) >> 19) == 0)
 245 
 246 static Reg ra_allock(ASMState *as, intptr_t k, RegSet allow);
 247 
 248 /* Get/set from constant pointer. */
 249 static void emit_lsptr(ASMState *as, A64Ins ai, Reg r, void *p)
 250 {
 251   /* First, check if ip + offset is in range. */
 252   if ((ai & 0x00400000) && checkmcpofs(as, p)) {
 253     emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, p)>>2), r);
 254   } else {
 255     Reg base = RID_GL;  /* Next, try GL + offset. */
 256     int64_t ofs = glofs(as, p);
 257     if (!emit_checkofs(ai, ofs)) {  /* Else split up into base reg + offset. */
 258       int64_t i64 = i64ptr(p);
 259       base = ra_allock(as, (i64 & ~0x7fffull), rset_exclude(RSET_GPR, r));
 260       ofs = i64 & 0x7fffull;
 261     }
 262     emit_lso(as, ai, r, base, ofs);
 263   }
 264 }
 265 
 266 /* Load 64 bit IR constant into register. */
 267 static void emit_loadk64(ASMState *as, Reg r, IRIns *ir)
 268 {
 269   const uint64_t *k = &ir_k64(ir)->u64;
 270   int64_t ofs;
 271   if (r >= RID_MAX_GPR) {
 272     uint32_t fpk = emit_isfpk64(*k);
 273     if (fpk != ~0u) {
 274       emit_d(as, A64I_FMOV_DI | A64F_FP8(fpk), (r & 31));
 275       return;
 276     }
 277   }
 278   ofs = glofs(as, k);
 279   if (emit_checkofs(A64I_LDRx, ofs)) {
 280     emit_lso(as, r >= RID_MAX_GPR ? A64I_LDRd : A64I_LDRx,
 281              (r & 31), RID_GL, ofs);
 282   } else {
 283     if (r >= RID_MAX_GPR) {
 284       emit_dn(as, A64I_FMOV_D_R, (r & 31), RID_TMP);
 285       r = RID_TMP;
 286     }
 287     if (checkmcpofs(as, k))
 288       emit_d(as, A64I_LDRLx | A64F_S19(mcpofs(as, k)>>2), r);
 289     else
 290       emit_loadu64(as, r, *k);
 291   }
 292 }
 293 
 294 /* Get/set global_State fields. */
 295 #define emit_getgl(as, r, field) \
 296   emit_lsptr(as, A64I_LDRx, (r), (void *)&J2G(as->J)->field)
 297 #define emit_setgl(as, r, field) \
 298   emit_lsptr(as, A64I_STRx, (r), (void *)&J2G(as->J)->field)
 299 
 300 /* Trace number is determined from pc of exit instruction. */
 301 #define emit_setvmstate(as, i)  UNUSED(i)
 302 
 303 /* -- Emit control-flow instructions -------------------------------------- */
 304 
 305 /* Label for internal jumps. */
 306 typedef MCode *MCLabel;
 307 
 308 /* Return label pointing to current PC. */
 309 #define emit_label(as)          ((as)->mcp)
 310 
 311 static void emit_cond_branch(ASMState *as, A64CC cond, MCode *target)
 312 {
 313   MCode *p = --as->mcp;
 314   ptrdiff_t delta = target - p;
 315   lua_assert(((delta + 0x40000) >> 19) == 0);
 316   *p = A64I_BCC | A64F_S19(delta) | cond;
 317 }
 318 
 319 static void emit_branch(ASMState *as, A64Ins ai, MCode *target)
 320 {
 321   MCode *p = --as->mcp;
 322   ptrdiff_t delta = target - p;
 323   lua_assert(((delta + 0x02000000) >> 26) == 0);
 324   *p = ai | ((uint32_t)delta & 0x03ffffffu);
 325 }
 326 
 327 static void emit_tnb(ASMState *as, A64Ins ai, Reg r, uint32_t bit, MCode *target)
 328 {
 329   MCode *p = --as->mcp;
 330   ptrdiff_t delta = target - p;
 331   lua_assert(bit < 63 && ((delta + 0x2000) >> 14) == 0);
 332   if (bit > 31) ai |= A64I_X;
 333   *p = ai | A64F_BIT(bit & 31) | A64F_S14((uint32_t)delta & 0x3fffu) | r;
 334 }
 335 
 336 static void emit_cnb(ASMState *as, A64Ins ai, Reg r, MCode *target)
 337 {
 338   MCode *p = --as->mcp;
 339   ptrdiff_t delta = target - p;
 340   lua_assert(((delta + 0x40000) >> 19) == 0);
 341   *p = ai | A64F_S19(delta) | r;
 342 }
 343 
 344 #define emit_jmp(as, target)    emit_branch(as, A64I_B, (target))
 345 
 346 static void emit_call(ASMState *as, void *target)
 347 {
 348   MCode *p = --as->mcp;
 349   ptrdiff_t delta = (char *)target - (char *)p;
 350   if ((((delta>>2) + 0x02000000) >> 26) == 0) {
 351     *p = A64I_BL | ((uint32_t)(delta>>2) & 0x03ffffffu);
 352   } else {  /* Target out of range: need indirect call. But don't use R0-R7. */
 353     Reg r = ra_allock(as, i64ptr(target),
 354                       RSET_RANGE(RID_X8, RID_MAX_GPR)-RSET_FIXED);
 355     *p = A64I_BLR | A64F_N(r);
 356   }
 357 }
 358 
 359 /* -- Emit generic operations --------------------------------------------- */
 360 
 361 /* Generic move between two regs. */
 362 static void emit_movrr(ASMState *as, IRIns *ir, Reg dst, Reg src)
 363 {
 364   if (dst >= RID_MAX_GPR) {
 365     emit_dn(as, irt_isnum(ir->t) ? A64I_FMOV_D : A64I_FMOV_S,
 366             (dst & 31), (src & 31));
 367     return;
 368   }
 369   if (as->mcp != as->mcloop) {  /* Swap early registers for loads/stores. */
 370     MCode ins = *as->mcp, swp = (src^dst);
 371     if ((ins & 0xbf800000) == 0xb9000000) {
 372       if (!((ins ^ (dst << 5)) & 0x000003e0))
 373         *as->mcp = ins ^ (swp << 5);  /* Swap N in load/store. */
 374       if (!(ins & 0x00400000) && !((ins ^ dst) & 0x0000001f))
 375         *as->mcp = ins ^ swp;  /* Swap D in store. */
 376     }
 377   }
 378   emit_dm(as, A64I_MOVx, dst, src);
 379 }
 380 
 381 /* Generic load of register with base and (small) offset address. */
 382 static void emit_loadofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 383 {
 384   if (r >= RID_MAX_GPR)
 385     emit_lso(as, irt_isnum(ir->t) ? A64I_LDRd : A64I_LDRs, (r & 31), base, ofs);
 386   else
 387     emit_lso(as, irt_is64(ir->t) ? A64I_LDRx : A64I_LDRw, r, base, ofs);
 388 }
 389 
 390 /* Generic store of register with base and (small) offset address. */
 391 static void emit_storeofs(ASMState *as, IRIns *ir, Reg r, Reg base, int32_t ofs)
 392 {
 393   if (r >= RID_MAX_GPR)
 394     emit_lso(as, irt_isnum(ir->t) ? A64I_STRd : A64I_STRs, (r & 31), base, ofs);
 395   else
 396     emit_lso(as, irt_is64(ir->t) ? A64I_STRx : A64I_STRw, r, base, ofs);
 397 }
 398 
 399 /* Emit an arithmetic operation with a constant operand. */
 400 static void emit_opk(ASMState *as, A64Ins ai, Reg dest, Reg src,
 401                      int32_t i, RegSet allow)
 402 {
 403   uint32_t k = emit_isk12(i);
 404   if (k)
 405     emit_dn(as, ai^k, dest, src);
 406   else
 407     emit_dnm(as, ai, dest, src, ra_allock(as, i, allow));
 408 }
 409 
 410 /* Add offset to pointer. */
 411 static void emit_addptr(ASMState *as, Reg r, int32_t ofs)
 412 {
 413   if (ofs)
 414     emit_opk(as, ofs < 0 ? A64I_SUBx : A64I_ADDx, r, r,
 415                  ofs < 0 ? -ofs : ofs, rset_exclude(RSET_GPR, r));
 416 }
 417 
 418 #define emit_spsub(as, ofs)     emit_addptr(as, RID_SP, -(ofs))
 419 

/* [<][>][^][v][top][bottom][index][help] */