root/lj_jit.h

/* [<][>][^][v][top][bottom][index][help] */

INCLUDED FROM


DEFINITIONS

This source file includes following definitions.
  1. JIT_PARAMDEF
  2. snap_nextofs
  3. LJ_PRNG_BITS

   1 /*
   2 ** Common definitions for the JIT compiler.
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 */
   5 
   6 #ifndef _LJ_JIT_H
   7 #define _LJ_JIT_H
   8 
   9 #include "lj_obj.h"
  10 #include "lj_ir.h"
  11 
  12 /* JIT engine flags. */
  13 #define JIT_F_ON                0x00000001
  14 
  15 /* CPU-specific JIT engine flags. */
  16 #if LJ_TARGET_X86ORX64
  17 #define JIT_F_CMOV              0x00000010
  18 #define JIT_F_SSE2              0x00000020
  19 #define JIT_F_SSE3              0x00000040
  20 #define JIT_F_SSE4_1            0x00000080
  21 #define JIT_F_P4                0x00000100
  22 #define JIT_F_PREFER_IMUL       0x00000200
  23 #define JIT_F_SPLIT_XMM         0x00000400
  24 #define JIT_F_LEA_AGU           0x00000800
  25 
  26 /* Names for the CPU-specific flags. Must match the order above. */
  27 #define JIT_F_CPU_FIRST         JIT_F_CMOV
  28 #define JIT_F_CPUSTRING         "\4CMOV\4SSE2\4SSE3\6SSE4.1\2P4\3AMD\2K8\4ATOM"
  29 #elif LJ_TARGET_ARM
  30 #define JIT_F_ARMV6_            0x00000010
  31 #define JIT_F_ARMV6T2_          0x00000020
  32 #define JIT_F_ARMV7             0x00000040
  33 #define JIT_F_VFPV2             0x00000080
  34 #define JIT_F_VFPV3             0x00000100
  35 
  36 #define JIT_F_ARMV6             (JIT_F_ARMV6_|JIT_F_ARMV6T2_|JIT_F_ARMV7)
  37 #define JIT_F_ARMV6T2           (JIT_F_ARMV6T2_|JIT_F_ARMV7)
  38 #define JIT_F_VFP               (JIT_F_VFPV2|JIT_F_VFPV3)
  39 
  40 /* Names for the CPU-specific flags. Must match the order above. */
  41 #define JIT_F_CPU_FIRST         JIT_F_ARMV6_
  42 #define JIT_F_CPUSTRING         "\5ARMv6\7ARMv6T2\5ARMv7\5VFPv2\5VFPv3"
  43 #elif LJ_TARGET_PPC
  44 #define JIT_F_SQRT              0x00000010
  45 #define JIT_F_ROUND             0x00000020
  46 
  47 /* Names for the CPU-specific flags. Must match the order above. */
  48 #define JIT_F_CPU_FIRST         JIT_F_SQRT
  49 #define JIT_F_CPUSTRING         "\4SQRT\5ROUND"
  50 #elif LJ_TARGET_MIPS
  51 #define JIT_F_MIPS32R2          0x00000010
  52 
  53 /* Names for the CPU-specific flags. Must match the order above. */
  54 #define JIT_F_CPU_FIRST         JIT_F_MIPS32R2
  55 #define JIT_F_CPUSTRING         "\010MIPS32R2"
  56 #else
  57 #define JIT_F_CPU_FIRST         0
  58 #define JIT_F_CPUSTRING         ""
  59 #endif
  60 
  61 /* Optimization flags. */
  62 #define JIT_F_OPT_MASK          0x0fff0000
  63 
  64 #define JIT_F_OPT_FOLD          0x00010000
  65 #define JIT_F_OPT_CSE           0x00020000
  66 #define JIT_F_OPT_DCE           0x00040000
  67 #define JIT_F_OPT_FWD           0x00080000
  68 #define JIT_F_OPT_DSE           0x00100000
  69 #define JIT_F_OPT_NARROW        0x00200000
  70 #define JIT_F_OPT_LOOP          0x00400000
  71 #define JIT_F_OPT_ABC           0x00800000
  72 #define JIT_F_OPT_SINK          0x01000000
  73 #define JIT_F_OPT_FUSE          0x02000000
  74 
  75 /* Optimizations names for -O. Must match the order above. */
  76 #define JIT_F_OPT_FIRST         JIT_F_OPT_FOLD
  77 #define JIT_F_OPTSTRING \
  78   "\4fold\3cse\3dce\3fwd\3dse\6narrow\4loop\3abc\4sink\4fuse"
  79 
  80 /* Optimization levels set a fixed combination of flags. */
  81 #define JIT_F_OPT_0     0
  82 #define JIT_F_OPT_1     (JIT_F_OPT_FOLD|JIT_F_OPT_CSE|JIT_F_OPT_DCE)
  83 #define JIT_F_OPT_2     (JIT_F_OPT_1|JIT_F_OPT_NARROW|JIT_F_OPT_LOOP)
  84 #define JIT_F_OPT_3     (JIT_F_OPT_2|\
  85   JIT_F_OPT_FWD|JIT_F_OPT_DSE|JIT_F_OPT_ABC|JIT_F_OPT_SINK|JIT_F_OPT_FUSE)
  86 #define JIT_F_OPT_DEFAULT       JIT_F_OPT_3
  87 
  88 #if LJ_TARGET_WINDOWS || LJ_64
  89 /* See: http://blogs.msdn.com/oldnewthing/archive/2003/10/08/55239.aspx */
  90 #define JIT_P_sizemcode_DEFAULT         64
  91 #else
  92 /* Could go as low as 4K, but the mmap() overhead would be rather high. */
  93 #define JIT_P_sizemcode_DEFAULT         32
  94 #endif
  95 
  96 /* Optimization parameters and their defaults. Length is a char in octal! */
  97 #define JIT_PARAMDEF(_) \
  98   _(\010, maxtrace,     1000)   /* Max. # of traces in cache. */ \
  99   _(\011, maxrecord,    4000)   /* Max. # of recorded IR instructions. */ \
 100   _(\012, maxirconst,   500)    /* Max. # of IR constants of a trace. */ \
 101   _(\007, maxside,      100)    /* Max. # of side traces of a root trace. */ \
 102   _(\007, maxsnap,      500)    /* Max. # of snapshots for a trace. */ \
 103   \
 104   _(\007, hotloop,      56)     /* # of iter. to detect a hot loop/call. */ \
 105   _(\007, hotexit,      10)     /* # of taken exits to start a side trace. */ \
 106   _(\007, tryside,      4)      /* # of attempts to compile a side trace. */ \
 107   \
 108   _(\012, instunroll,   4)      /* Max. unroll for instable loops. */ \
 109   _(\012, loopunroll,   15)     /* Max. unroll for loop ops in side traces. */ \
 110   _(\012, callunroll,   3)      /* Max. unroll for recursive calls. */ \
 111   _(\011, recunroll,    2)      /* Min. unroll for true recursion. */ \
 112   \
 113   /* Size of each machine code area (in KBytes). */ \
 114   _(\011, sizemcode,    JIT_P_sizemcode_DEFAULT) \
 115   /* Max. total size of all machine code areas (in KBytes). */ \
 116   _(\010, maxmcode,     512) \
 117   /* End of list. */
 118 
 119 enum {
 120 #define JIT_PARAMENUM(len, name, value) JIT_P_##name,
 121 JIT_PARAMDEF(JIT_PARAMENUM)
 122 #undef JIT_PARAMENUM
 123   JIT_P__MAX
 124 };
 125 
 126 #define JIT_PARAMSTR(len, name, value)  #len #name
 127 #define JIT_P_STRING    JIT_PARAMDEF(JIT_PARAMSTR)
 128 
 129 /* Trace compiler state. */
 130 typedef enum {
 131   LJ_TRACE_IDLE,        /* Trace compiler idle. */
 132   LJ_TRACE_ACTIVE = 0x10,
 133   LJ_TRACE_RECORD,      /* Bytecode recording active. */
 134   LJ_TRACE_START,       /* New trace started. */
 135   LJ_TRACE_END,         /* End of trace. */
 136   LJ_TRACE_ASM,         /* Assemble trace. */
 137   LJ_TRACE_ERR          /* Trace aborted with error. */
 138 } TraceState;
 139 
 140 /* Post-processing action. */
 141 typedef enum {
 142   LJ_POST_NONE,         /* No action. */
 143   LJ_POST_FIXCOMP,      /* Fixup comparison and emit pending guard. */
 144   LJ_POST_FIXGUARD,     /* Fixup and emit pending guard. */
 145   LJ_POST_FIXGUARDSNAP, /* Fixup and emit pending guard and snapshot. */
 146   LJ_POST_FIXBOOL,      /* Fixup boolean result. */
 147   LJ_POST_FIXCONST,     /* Fixup constant results. */
 148   LJ_POST_FFRETRY       /* Suppress recording of retried fast functions. */
 149 } PostProc;
 150 
 151 /* Machine code type. */
 152 #if LJ_TARGET_X86ORX64
 153 typedef uint8_t MCode;
 154 #else
 155 typedef uint32_t MCode;
 156 #endif
 157 
 158 /* Linked list of MCode areas. */
 159 typedef struct MCLink {
 160   MCode *next;          /* Next area. */
 161   size_t size;          /* Size of current area. */
 162 } MCLink;
 163 
 164 /* Stack snapshot header. */
 165 typedef struct SnapShot {
 166   uint16_t mapofs;      /* Offset into snapshot map. */
 167   IRRef1 ref;           /* First IR ref for this snapshot. */
 168   uint8_t nslots;       /* Number of valid slots. */
 169   uint8_t topslot;      /* Maximum frame extent. */
 170   uint8_t nent;         /* Number of compressed entries. */
 171   uint8_t count;        /* Count of taken exits for this snapshot. */
 172 } SnapShot;
 173 
 174 #define SNAPCOUNT_DONE  255     /* Already compiled and linked a side trace. */
 175 
 176 /* Compressed snapshot entry. */
 177 typedef uint32_t SnapEntry;
 178 
 179 #define SNAP_FRAME              0x010000        /* Frame slot. */
 180 #define SNAP_CONT               0x020000        /* Continuation slot. */
 181 #define SNAP_NORESTORE          0x040000        /* No need to restore slot. */
 182 #define SNAP_SOFTFPNUM          0x080000        /* Soft-float number. */
 183 LJ_STATIC_ASSERT(SNAP_FRAME == TREF_FRAME);
 184 LJ_STATIC_ASSERT(SNAP_CONT == TREF_CONT);
 185 
 186 #define SNAP(slot, flags, ref)  (((SnapEntry)(slot) << 24) + (flags) + (ref))
 187 #define SNAP_TR(slot, tr) \
 188   (((SnapEntry)(slot) << 24) + ((tr) & (TREF_CONT|TREF_FRAME|TREF_REFMASK)))
 189 #define SNAP_MKPC(pc)           ((SnapEntry)u32ptr(pc))
 190 #define SNAP_MKFTSZ(ftsz)       ((SnapEntry)(ftsz))
 191 #define snap_ref(sn)            ((sn) & 0xffff)
 192 #define snap_slot(sn)           ((BCReg)((sn) >> 24))
 193 #define snap_isframe(sn)        ((sn) & SNAP_FRAME)
 194 #define snap_pc(sn)             ((const BCIns *)(uintptr_t)(sn))
 195 #define snap_setref(sn, ref)    (((sn) & (0xffff0000&~SNAP_NORESTORE)) | (ref))
 196 
 197 /* Snapshot and exit numbers. */
 198 typedef uint32_t SnapNo;
 199 typedef uint32_t ExitNo;
 200 
 201 /* Trace number. */
 202 typedef uint32_t TraceNo;       /* Used to pass around trace numbers. */
 203 typedef uint16_t TraceNo1;      /* Stored trace number. */
 204 
 205 /* Type of link. ORDER LJ_TRLINK */
 206 typedef enum {
 207   LJ_TRLINK_NONE,               /* Incomplete trace. No link, yet. */
 208   LJ_TRLINK_ROOT,               /* Link to other root trace. */
 209   LJ_TRLINK_LOOP,               /* Loop to same trace. */
 210   LJ_TRLINK_TAILREC,            /* Tail-recursion. */
 211   LJ_TRLINK_UPREC,              /* Up-recursion. */
 212   LJ_TRLINK_DOWNREC,            /* Down-recursion. */
 213   LJ_TRLINK_INTERP,             /* Fallback to interpreter. */
 214   LJ_TRLINK_RETURN              /* Return to interpreter. */
 215 } TraceLink;
 216 
 217 /* Trace object. */
 218 typedef struct GCtrace {
 219   GCHeader;
 220   uint8_t topslot;      /* Top stack slot already checked to be allocated. */
 221   uint8_t linktype;     /* Type of link. */
 222   IRRef nins;           /* Next IR instruction. Biased with REF_BIAS. */
 223   GCRef gclist;
 224   IRIns *ir;            /* IR instructions/constants. Biased with REF_BIAS. */
 225   IRRef nk;             /* Lowest IR constant. Biased with REF_BIAS. */
 226   uint16_t nsnap;       /* Number of snapshots. */
 227   uint16_t nsnapmap;    /* Number of snapshot map elements. */
 228   SnapShot *snap;       /* Snapshot array. */
 229   SnapEntry *snapmap;   /* Snapshot map. */
 230   GCRef startpt;        /* Starting prototype. */
 231   MRef startpc;         /* Bytecode PC of starting instruction. */
 232   BCIns startins;       /* Original bytecode of starting instruction. */
 233   MSize szmcode;        /* Size of machine code. */
 234   MCode *mcode;         /* Start of machine code. */
 235   MSize mcloop;         /* Offset of loop start in machine code. */
 236   uint16_t nchild;      /* Number of child traces (root trace only). */
 237   uint16_t spadjust;    /* Stack pointer adjustment (offset in bytes). */
 238   TraceNo1 traceno;     /* Trace number. */
 239   TraceNo1 link;        /* Linked trace (or self for loops). */
 240   TraceNo1 root;        /* Root trace of side trace (or 0 for root traces). */
 241   TraceNo1 nextroot;    /* Next root trace for same prototype. */
 242   TraceNo1 nextside;    /* Next side trace of same root trace. */
 243   uint8_t sinktags;     /* Trace has SINK tags. */
 244   uint8_t unused1;
 245 #ifdef LUAJIT_USE_GDBJIT
 246   void *gdbjit_entry;   /* GDB JIT entry. */
 247 #endif
 248 } GCtrace;
 249 
 250 #define gco2trace(o)    check_exp((o)->gch.gct == ~LJ_TTRACE, (GCtrace *)(o))
 251 #define traceref(J, n) \
 252   check_exp((n)>0 && (MSize)(n)<J->sizetrace, (GCtrace *)gcref(J->trace[(n)]))
 253 
 254 LJ_STATIC_ASSERT(offsetof(GChead, gclist) == offsetof(GCtrace, gclist));
 255 
 256 static LJ_AINLINE MSize snap_nextofs(GCtrace *T, SnapShot *snap)
 257 {
 258   if (snap+1 == &T->snap[T->nsnap])
 259     return T->nsnapmap;
 260   else
 261     return (snap+1)->mapofs;
 262 }
 263 
 264 /* Round-robin penalty cache for bytecodes leading to aborted traces. */
 265 typedef struct HotPenalty {
 266   MRef pc;              /* Starting bytecode PC. */
 267   uint16_t val;         /* Penalty value, i.e. hotcount start. */
 268   uint16_t reason;      /* Abort reason (really TraceErr). */
 269 } HotPenalty;
 270 
 271 #define PENALTY_SLOTS   64      /* Penalty cache slot. Must be a power of 2. */
 272 #define PENALTY_MIN     (36*2)  /* Minimum penalty value. */
 273 #define PENALTY_MAX     60000   /* Maximum penalty value. */
 274 #define PENALTY_RNDBITS 4       /* # of random bits to add to penalty value. */
 275 
 276 /* Round-robin backpropagation cache for narrowing conversions. */
 277 typedef struct BPropEntry {
 278   IRRef1 key;           /* Key: original reference. */
 279   IRRef1 val;           /* Value: reference after conversion. */
 280   IRRef mode;           /* Mode for this entry (currently IRCONV_*). */
 281 } BPropEntry;
 282 
 283 /* Number of slots for the backpropagation cache. Must be a power of 2. */
 284 #define BPROP_SLOTS     16
 285 
 286 /* Scalar evolution analysis cache. */
 287 typedef struct ScEvEntry {
 288   MRef pc;              /* Bytecode PC of FORI. */
 289   IRRef1 idx;           /* Index reference. */
 290   IRRef1 start;         /* Constant start reference. */
 291   IRRef1 stop;          /* Constant stop reference. */
 292   IRRef1 step;          /* Constant step reference. */
 293   IRType1 t;            /* Scalar type. */
 294   uint8_t dir;          /* Direction. 1: +, 0: -. */
 295 } ScEvEntry;
 296 
 297 /* 128 bit SIMD constants. */
 298 enum {
 299   LJ_KSIMD_ABS,
 300   LJ_KSIMD_NEG,
 301   LJ_KSIMD__MAX
 302 };
 303 
 304 /* Get 16 byte aligned pointer to SIMD constant. */
 305 #define LJ_KSIMD(J, n) \
 306   ((TValue *)(((intptr_t)&J->ksimd[2*(n)] + 15) & ~(intptr_t)15))
 307 
 308 /* Set/reset flag to activate the SPLIT pass for the current trace. */
 309 #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
 310 #define lj_needsplit(J)         (J->needsplit = 1)
 311 #define lj_resetsplit(J)        (J->needsplit = 0)
 312 #else
 313 #define lj_needsplit(J)         UNUSED(J)
 314 #define lj_resetsplit(J)        UNUSED(J)
 315 #endif
 316 
 317 /* Fold state is used to fold instructions on-the-fly. */
 318 typedef struct FoldState {
 319   IRIns ins;            /* Currently emitted instruction. */
 320   IRIns left;           /* Instruction referenced by left operand. */
 321   IRIns right;          /* Instruction referenced by right operand. */
 322 } FoldState;
 323 
 324 /* JIT compiler state. */
 325 typedef struct jit_State {
 326   GCtrace cur;          /* Current trace. */
 327 
 328   lua_State *L;         /* Current Lua state. */
 329   const BCIns *pc;      /* Current PC. */
 330   GCfunc *fn;           /* Current function. */
 331   GCproto *pt;          /* Current prototype. */
 332   TRef *base;           /* Current frame base, points into J->slots. */
 333 
 334   uint32_t flags;       /* JIT engine flags. */
 335   BCReg maxslot;        /* Relative to baseslot. */
 336   BCReg baseslot;       /* Current frame base, offset into J->slots. */
 337 
 338   uint8_t mergesnap;    /* Allowed to merge with next snapshot. */
 339   uint8_t needsnap;     /* Need snapshot before recording next bytecode. */
 340   IRType1 guardemit;    /* Accumulated IRT_GUARD for emitted instructions. */
 341   uint8_t bcskip;       /* Number of bytecode instructions to skip. */
 342 
 343   FoldState fold;       /* Fold state. */
 344 
 345   const BCIns *bc_min;  /* Start of allowed bytecode range for root trace. */
 346   MSize bc_extent;      /* Extent of the range. */
 347 
 348   TraceState state;     /* Trace compiler state. */
 349 
 350   int32_t instunroll;   /* Unroll counter for instable loops. */
 351   int32_t loopunroll;   /* Unroll counter for loop ops in side traces. */
 352   int32_t tailcalled;   /* Number of successive tailcalls. */
 353   int32_t framedepth;   /* Current frame depth. */
 354   int32_t retdepth;     /* Return frame depth (count of RETF). */
 355 
 356   MRef k64;             /* Pointer to chained array of 64 bit constants. */
 357   TValue ksimd[LJ_KSIMD__MAX*2+1];  /* 16 byte aligned SIMD constants. */
 358 
 359   IRIns *irbuf;         /* Temp. IR instruction buffer. Biased with REF_BIAS. */
 360   IRRef irtoplim;       /* Upper limit of instuction buffer (biased). */
 361   IRRef irbotlim;       /* Lower limit of instuction buffer (biased). */
 362   IRRef loopref;        /* Last loop reference or ref of final LOOP (or 0). */
 363 
 364   MSize sizesnap;       /* Size of temp. snapshot buffer. */
 365   SnapShot *snapbuf;    /* Temp. snapshot buffer. */
 366   SnapEntry *snapmapbuf;  /* Temp. snapshot map buffer. */
 367   MSize sizesnapmap;    /* Size of temp. snapshot map buffer. */
 368 
 369   PostProc postproc;    /* Required post-processing after execution. */
 370 #if LJ_SOFTFP || (LJ_32 && LJ_HASFFI)
 371   int needsplit;        /* Need SPLIT pass. */
 372 #endif
 373 
 374   GCRef *trace;         /* Array of traces. */
 375   TraceNo freetrace;    /* Start of scan for next free trace. */
 376   MSize sizetrace;      /* Size of trace array. */
 377 
 378   IRRef1 chain[IR__MAX];  /* IR instruction skip-list chain anchors. */
 379   TRef slot[LJ_MAX_JSLOTS+LJ_STACK_EXTRA];  /* Stack slot map. */
 380 
 381   int32_t param[JIT_P__MAX];  /* JIT engine parameters. */
 382 
 383   MCode *exitstubgroup[LJ_MAX_EXITSTUBGR];  /* Exit stub group addresses. */
 384 
 385   HotPenalty penalty[PENALTY_SLOTS];  /* Penalty slots. */
 386   uint32_t penaltyslot; /* Round-robin index into penalty slots. */
 387   uint32_t prngstate;   /* PRNG state. */
 388 
 389   BPropEntry bpropcache[BPROP_SLOTS];  /* Backpropagation cache slots. */
 390   uint32_t bpropslot;   /* Round-robin index into bpropcache slots. */
 391 
 392   ScEvEntry scev;       /* Scalar evolution analysis cache slots. */
 393 
 394   const BCIns *startpc; /* Bytecode PC of starting instruction. */
 395   TraceNo parent;       /* Parent of current side trace (0 for root traces). */
 396   ExitNo exitno;        /* Exit number in parent of current side trace. */
 397 
 398   BCIns *patchpc;       /* PC for pending re-patch. */
 399   BCIns patchins;       /* Instruction for pending re-patch. */
 400 
 401   int mcprot;           /* Protection of current mcode area. */
 402   MCode *mcarea;        /* Base of current mcode area. */
 403   MCode *mctop;         /* Top of current mcode area. */
 404   MCode *mcbot;         /* Bottom of current mcode area. */
 405   size_t szmcarea;      /* Size of current mcode area. */
 406   size_t szallmcarea;   /* Total size of all allocated mcode areas. */
 407 
 408   TValue errinfo;       /* Additional info element for trace errors. */
 409 }
 410 #if LJ_TARGET_ARM
 411 LJ_ALIGN(16)            /* For DISPATCH-relative addresses in assembler part. */
 412 #endif
 413 jit_State;
 414 
 415 /* Trivial PRNG e.g. used for penalty randomization. */
 416 static LJ_AINLINE uint32_t LJ_PRNG_BITS(jit_State *J, int bits)
 417 {
 418   /* Yes, this LCG is very weak, but that doesn't matter for our use case. */
 419   J->prngstate = J->prngstate * 1103515245 + 12345;
 420   return J->prngstate >> (32-bits);
 421 }
 422 
 423 #endif

/* [<][>][^][v][top][bottom][index][help] */