root/lj_str.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lj_str_cmp
  2. str_fastcmp
  3. lj_str_resize
  4. lj_str_new
  5. lj_str_free
  6. lj_str_bufnum
  7. lj_str_bufint
  8. lj_str_fromnum
  9. lj_str_fromint
  10. lj_str_fromnumber
  11. addstr
  12. addchar
  13. lj_str_pushvf
  14. lj_str_pushf
  15. lj_str_needbuf

   1 /*
   2 ** String handling.
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 **
   5 ** Portions taken verbatim or adapted from the Lua interpreter.
   6 ** Copyright (C) 1994-2008 Lua.org, PUC-Rio. See Copyright Notice in lua.h
   7 */
   8 
   9 #include <stdio.h>
  10 
  11 #define lj_str_c
  12 #define LUA_CORE
  13 
  14 #include "lj_obj.h"
  15 #include "lj_gc.h"
  16 #include "lj_err.h"
  17 #include "lj_str.h"
  18 #include "lj_state.h"
  19 #include "lj_char.h"
  20 
  21 /* -- String interning ---------------------------------------------------- */
  22 
  23 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
  24 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
  25 {
  26   MSize i, n = a->len > b->len ? b->len : a->len;
  27   for (i = 0; i < n; i += 4) {
  28     /* Note: innocuous access up to end of string + 3. */
  29     uint32_t va = *(const uint32_t *)(strdata(a)+i);
  30     uint32_t vb = *(const uint32_t *)(strdata(b)+i);
  31     if (va != vb) {
  32 #if LJ_LE
  33       va = lj_bswap(va); vb = lj_bswap(vb);
  34 #endif
  35       i -= n;
  36       if ((int32_t)i >= -3) {
  37         va >>= 32+(i<<3); vb >>= 32+(i<<3);
  38         if (va == vb) break;
  39       }
  40       return va < vb ? -1 : 1;
  41     }
  42   }
  43   return (int32_t)(a->len - b->len);
  44 }
  45 
  46 /* Fast string data comparison. Caveat: unaligned access to 1st string! */
  47 static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
  48 {
  49   MSize i = 0;
  50   lua_assert(len > 0);
  51   lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
  52   do {  /* Note: innocuous access up to end of string + 3. */
  53     uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
  54     if (v) {
  55       i -= len;
  56 #if LJ_LE
  57       return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
  58 #else
  59       return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
  60 #endif
  61     }
  62     i += 4;
  63   } while (i < len);
  64   return 0;
  65 }
  66 
  67 /* Resize the string hash table (grow and shrink). */
  68 void lj_str_resize(lua_State *L, MSize newmask)
  69 {
  70   global_State *g = G(L);
  71   GCRef *newhash;
  72   MSize i;
  73   if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
  74     return;  /* No resizing during GC traversal or if already too big. */
  75   newhash = lj_mem_newvec(L, newmask+1, GCRef);
  76   memset(newhash, 0, (newmask+1)*sizeof(GCRef));
  77   for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
  78     GCobj *p = gcref(g->strhash[i]);
  79     while (p) {  /* Follow each hash chain and reinsert all strings. */
  80       MSize h = gco2str(p)->hash & newmask;
  81       GCobj *next = gcnext(p);
  82       /* NOBARRIER: The string table is a GC root. */
  83       setgcrefr(p->gch.nextgc, newhash[h]);
  84       setgcref(newhash[h], p);
  85       p = next;
  86     }
  87   }
  88   lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
  89   g->strmask = newmask;
  90   g->strhash = newhash;
  91 }
  92 
  93 /* Intern a string and return string object. */
  94 GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
  95 {
  96   global_State *g;
  97   GCstr *s;
  98   GCobj *o;
  99   MSize len = (MSize)lenx;
 100   MSize a, b, h = len;
 101   if (lenx >= LJ_MAX_STR)
 102     lj_err_msg(L, LJ_ERR_STROV);
 103   g = G(L);
 104   /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
 105   if (len >= 4) {  /* Caveat: unaligned access! */
 106     a = lj_getu32(str);
 107     h ^= lj_getu32(str+len-4);
 108     b = lj_getu32(str+(len>>1)-2);
 109     h ^= b; h -= lj_rol(b, 14);
 110     b += lj_getu32(str+(len>>2)-1);
 111   } else if (len > 0) {
 112     a = *(const uint8_t *)str;
 113     h ^= *(const uint8_t *)(str+len-1);
 114     b = *(const uint8_t *)(str+(len>>1));
 115     h ^= b; h -= lj_rol(b, 14);
 116   } else {
 117     return &g->strempty;
 118   }
 119   a ^= h; a -= lj_rol(h, 11);
 120   b ^= a; b -= lj_rol(a, 25);
 121   h ^= b; h -= lj_rol(b, 16);
 122   /* Check if the string has already been interned. */
 123   o = gcref(g->strhash[h & g->strmask]);
 124   if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
 125     while (o != NULL) {
 126       GCstr *sx = gco2str(o);
 127       if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
 128         /* Resurrect if dead. Can only happen with fixstring() (keywords). */
 129         if (isdead(g, o)) flipwhite(o);
 130         return sx;  /* Return existing string. */
 131       }
 132       o = gcnext(o);
 133     }
 134   } else {  /* Slow path: end of string is too close to a page boundary. */
 135     while (o != NULL) {
 136       GCstr *sx = gco2str(o);
 137       if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
 138         /* Resurrect if dead. Can only happen with fixstring() (keywords). */
 139         if (isdead(g, o)) flipwhite(o);
 140         return sx;  /* Return existing string. */
 141       }
 142       o = gcnext(o);
 143     }
 144   }
 145   /* Nope, create a new string. */
 146   s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
 147   newwhite(g, s);
 148   s->gct = ~LJ_TSTR;
 149   s->len = len;
 150   s->hash = h;
 151   s->reserved = 0;
 152   memcpy(strdatawr(s), str, len);
 153   strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
 154   /* Add it to string hash table. */
 155   h &= g->strmask;
 156   s->nextgc = g->strhash[h];
 157   /* NOBARRIER: The string table is a GC root. */
 158   setgcref(g->strhash[h], obj2gco(s));
 159   if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
 160     lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
 161   return s;  /* Return newly interned string. */
 162 }
 163 
 164 void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
 165 {
 166   g->strnum--;
 167   lj_mem_free(g, s, sizestring(s));
 168 }
 169 
 170 /* -- Type conversions ---------------------------------------------------- */
 171 
 172 /* Print number to buffer. Canonicalizes non-finite values. */
 173 size_t LJ_FASTCALL lj_str_bufnum(char *s, cTValue *o)
 174 {
 175   if (LJ_LIKELY((o->u32.hi << 1) < 0xffe00000)) {  /* Finite? */
 176     lua_Number n = o->n;
 177 #if __BIONIC__
 178     if (tvismzero(o)) { s[0] = '-'; s[1] = '0'; return 2; }
 179 #endif
 180     return (size_t)lua_number2str(s, n);
 181   } else if (((o->u32.hi & 0x000fffff) | o->u32.lo) != 0) {
 182     s[0] = 'n'; s[1] = 'a'; s[2] = 'n'; return 3;
 183   } else if ((o->u32.hi & 0x80000000) == 0) {
 184     s[0] = 'i'; s[1] = 'n'; s[2] = 'f'; return 3;
 185   } else {
 186     s[0] = '-'; s[1] = 'i'; s[2] = 'n'; s[3] = 'f'; return 4;
 187   }
 188 }
 189 
 190 /* Print integer to buffer. Returns pointer to start. */
 191 char * LJ_FASTCALL lj_str_bufint(char *p, int32_t k)
 192 {
 193   uint32_t u = (uint32_t)(k < 0 ? -k : k);
 194   p += 1+10;
 195   do { *--p = (char)('0' + u % 10); } while (u /= 10);
 196   if (k < 0) *--p = '-';
 197   return p;
 198 }
 199 
 200 /* Convert number to string. */
 201 GCstr * LJ_FASTCALL lj_str_fromnum(lua_State *L, const lua_Number *np)
 202 {
 203   char buf[LJ_STR_NUMBUF];
 204   size_t len = lj_str_bufnum(buf, (TValue *)np);
 205   return lj_str_new(L, buf, len);
 206 }
 207 
 208 /* Convert integer to string. */
 209 GCstr * LJ_FASTCALL lj_str_fromint(lua_State *L, int32_t k)
 210 {
 211   char s[1+10];
 212   char *p = lj_str_bufint(s, k);
 213   return lj_str_new(L, p, (size_t)(s+sizeof(s)-p));
 214 }
 215 
 216 GCstr * LJ_FASTCALL lj_str_fromnumber(lua_State *L, cTValue *o)
 217 {
 218   return tvisint(o) ? lj_str_fromint(L, intV(o)) : lj_str_fromnum(L, &o->n);
 219 }
 220 
 221 /* -- String formatting --------------------------------------------------- */
 222 
 223 static void addstr(lua_State *L, SBuf *sb, const char *str, MSize len)
 224 {
 225   char *p;
 226   MSize i;
 227   if (sb->n + len > sb->sz) {
 228     MSize sz = sb->sz * 2;
 229     while (sb->n + len > sz) sz = sz * 2;
 230     lj_str_resizebuf(L, sb, sz);
 231   }
 232   p = sb->buf + sb->n;
 233   sb->n += len;
 234   for (i = 0; i < len; i++) p[i] = str[i];
 235 }
 236 
 237 static void addchar(lua_State *L, SBuf *sb, int c)
 238 {
 239   if (sb->n + 1 > sb->sz) {
 240     MSize sz = sb->sz * 2;
 241     lj_str_resizebuf(L, sb, sz);
 242   }
 243   sb->buf[sb->n++] = (char)c;
 244 }
 245 
 246 /* Push formatted message as a string object to Lua stack. va_list variant. */
 247 const char *lj_str_pushvf(lua_State *L, const char *fmt, va_list argp)
 248 {
 249   SBuf *sb = &G(L)->tmpbuf;
 250   lj_str_needbuf(L, sb, (MSize)strlen(fmt));
 251   lj_str_resetbuf(sb);
 252   for (;;) {
 253     const char *e = strchr(fmt, '%');
 254     if (e == NULL) break;
 255     addstr(L, sb, fmt, (MSize)(e-fmt));
 256     /* This function only handles %s, %c, %d, %f and %p formats. */
 257     switch (e[1]) {
 258     case 's': {
 259       const char *s = va_arg(argp, char *);
 260       if (s == NULL) s = "(null)";
 261       addstr(L, sb, s, (MSize)strlen(s));
 262       break;
 263       }
 264     case 'c':
 265       addchar(L, sb, va_arg(argp, int));
 266       break;
 267     case 'd': {
 268       char buf[LJ_STR_INTBUF];
 269       char *p = lj_str_bufint(buf, va_arg(argp, int32_t));
 270       addstr(L, sb, p, (MSize)(buf+LJ_STR_INTBUF-p));
 271       break;
 272       }
 273     case 'f': {
 274       char buf[LJ_STR_NUMBUF];
 275       TValue tv;
 276       MSize len;
 277       tv.n = (lua_Number)(va_arg(argp, LUAI_UACNUMBER));
 278       len = (MSize)lj_str_bufnum(buf, &tv);
 279       addstr(L, sb, buf, len);
 280       break;
 281       }
 282     case 'p': {
 283 #define FMTP_CHARS      (2*sizeof(ptrdiff_t))
 284       char buf[2+FMTP_CHARS];
 285       ptrdiff_t p = (ptrdiff_t)(va_arg(argp, void *));
 286       ptrdiff_t i, lasti = 2+FMTP_CHARS;
 287       if (p == 0) {
 288         addstr(L, sb, "NULL", 4);
 289         break;
 290       }
 291 #if LJ_64
 292       /* Shorten output for 64 bit pointers. */
 293       lasti = 2+2*4+((p >> 32) ? 2+2*(lj_fls((uint32_t)(p >> 32))>>3) : 0);
 294 #endif
 295       buf[0] = '0';
 296       buf[1] = 'x';
 297       for (i = lasti-1; i >= 2; i--, p >>= 4)
 298         buf[i] = "0123456789abcdef"[(p & 15)];
 299       addstr(L, sb, buf, (MSize)lasti);
 300       break;
 301       }
 302     case '%':
 303       addchar(L, sb, '%');
 304       break;
 305     default:
 306       addchar(L, sb, '%');
 307       addchar(L, sb, e[1]);
 308       break;
 309     }
 310     fmt = e+2;
 311   }
 312   addstr(L, sb, fmt, (MSize)strlen(fmt));
 313   setstrV(L, L->top, lj_str_new(L, sb->buf, sb->n));
 314   incr_top(L);
 315   return strVdata(L->top - 1);
 316 }
 317 
 318 /* Push formatted message as a string object to Lua stack. Vararg variant. */
 319 const char *lj_str_pushf(lua_State *L, const char *fmt, ...)
 320 {
 321   const char *msg;
 322   va_list argp;
 323   va_start(argp, fmt);
 324   msg = lj_str_pushvf(L, fmt, argp);
 325   va_end(argp);
 326   return msg;
 327 }
 328 
 329 /* -- Buffer handling ----------------------------------------------------- */
 330 
 331 char *lj_str_needbuf(lua_State *L, SBuf *sb, MSize sz)
 332 {
 333   if (sz > sb->sz) {
 334     if (sz < LJ_MIN_SBUF) sz = LJ_MIN_SBUF;
 335     lj_str_resizebuf(L, sb, sz);
 336   }
 337   return sb->buf;
 338 }
 339 

/* [<][>][^][v][top][bottom][index][help] */