root/lj_str.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. lj_str_cmp
  2. str_fastcmp
  3. lj_str_find
  4. lj_str_haspattern
  5. lj_str_resize
  6. lj_str_new
  7. lj_str_free

   1 /*
   2 ** String handling.
   3 ** Copyright (C) 2005-2017 Mike Pall. See Copyright Notice in luajit.h
   4 */
   5 
   6 #define lj_str_c
   7 #define LUA_CORE
   8 
   9 #include "lj_obj.h"
  10 #include "lj_gc.h"
  11 #include "lj_err.h"
  12 #include "lj_str.h"
  13 #include "lj_char.h"
  14 
  15 /* -- String helpers ------------------------------------------------------ */
  16 
  17 /* Ordered compare of strings. Assumes string data is 4-byte aligned. */
  18 int32_t LJ_FASTCALL lj_str_cmp(GCstr *a, GCstr *b)
  19 {
  20   MSize i, n = a->len > b->len ? b->len : a->len;
  21   for (i = 0; i < n; i += 4) {
  22     /* Note: innocuous access up to end of string + 3. */
  23     uint32_t va = *(const uint32_t *)(strdata(a)+i);
  24     uint32_t vb = *(const uint32_t *)(strdata(b)+i);
  25     if (va != vb) {
  26 #if LJ_LE
  27       va = lj_bswap(va); vb = lj_bswap(vb);
  28 #endif
  29       i -= n;
  30       if ((int32_t)i >= -3) {
  31         va >>= 32+(i<<3); vb >>= 32+(i<<3);
  32         if (va == vb) break;
  33       }
  34       return va < vb ? -1 : 1;
  35     }
  36   }
  37   return (int32_t)(a->len - b->len);
  38 }
  39 
  40 /* Fast string data comparison. Caveat: unaligned access to 1st string! */
  41 static LJ_AINLINE int str_fastcmp(const char *a, const char *b, MSize len)
  42 {
  43   MSize i = 0;
  44   lua_assert(len > 0);
  45   lua_assert((((uintptr_t)a+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4);
  46   do {  /* Note: innocuous access up to end of string + 3. */
  47     uint32_t v = lj_getu32(a+i) ^ *(const uint32_t *)(b+i);
  48     if (v) {
  49       i -= len;
  50 #if LJ_LE
  51       return (int32_t)i >= -3 ? (v << (32+(i<<3))) : 1;
  52 #else
  53       return (int32_t)i >= -3 ? (v >> (32+(i<<3))) : 1;
  54 #endif
  55     }
  56     i += 4;
  57   } while (i < len);
  58   return 0;
  59 }
  60 
  61 /* Find fixed string p inside string s. */
  62 const char *lj_str_find(const char *s, const char *p, MSize slen, MSize plen)
  63 {
  64   if (plen <= slen) {
  65     if (plen == 0) {
  66       return s;
  67     } else {
  68       int c = *(const uint8_t *)p++;
  69       plen--; slen -= plen;
  70       while (slen) {
  71         const char *q = (const char *)memchr(s, c, slen);
  72         if (!q) break;
  73         if (memcmp(q+1, p, plen) == 0) return q;
  74         q++; slen -= (MSize)(q-s); s = q;
  75       }
  76     }
  77   }
  78   return NULL;
  79 }
  80 
  81 /* Check whether a string has a pattern matching character. */
  82 int lj_str_haspattern(GCstr *s)
  83 {
  84   const char *p = strdata(s), *q = p + s->len;
  85   while (p < q) {
  86     int c = *(const uint8_t *)p++;
  87     if (lj_char_ispunct(c) && strchr("^$*+?.([%-", c))
  88       return 1;  /* Found a pattern matching char. */
  89   }
  90   return 0;  /* No pattern matching chars found. */
  91 }
  92 
  93 /* -- String interning ---------------------------------------------------- */
  94 
  95 /* Resize the string hash table (grow and shrink). */
  96 void lj_str_resize(lua_State *L, MSize newmask)
  97 {
  98   global_State *g = G(L);
  99   GCRef *newhash;
 100   MSize i;
 101   if (g->gc.state == GCSsweepstring || newmask >= LJ_MAX_STRTAB-1)
 102     return;  /* No resizing during GC traversal or if already too big. */
 103   newhash = lj_mem_newvec(L, newmask+1, GCRef);
 104   memset(newhash, 0, (newmask+1)*sizeof(GCRef));
 105   for (i = g->strmask; i != ~(MSize)0; i--) {  /* Rehash old table. */
 106     GCobj *p = gcref(g->strhash[i]);
 107     while (p) {  /* Follow each hash chain and reinsert all strings. */
 108       MSize h = gco2str(p)->hash & newmask;
 109       GCobj *next = gcnext(p);
 110       /* NOBARRIER: The string table is a GC root. */
 111       setgcrefr(p->gch.nextgc, newhash[h]);
 112       setgcref(newhash[h], p);
 113       p = next;
 114     }
 115   }
 116   lj_mem_freevec(g, g->strhash, g->strmask+1, GCRef);
 117   g->strmask = newmask;
 118   g->strhash = newhash;
 119 }
 120 
 121 /* Intern a string and return string object. */
 122 GCstr *lj_str_new(lua_State *L, const char *str, size_t lenx)
 123 {
 124   global_State *g;
 125   GCstr *s;
 126   GCobj *o;
 127   MSize len = (MSize)lenx;
 128   MSize a, b, h = len;
 129   if (lenx >= LJ_MAX_STR)
 130     lj_err_msg(L, LJ_ERR_STROV);
 131   g = G(L);
 132   /* Compute string hash. Constants taken from lookup3 hash by Bob Jenkins. */
 133   if (len >= 4) {  /* Caveat: unaligned access! */
 134     a = lj_getu32(str);
 135     h ^= lj_getu32(str+len-4);
 136     b = lj_getu32(str+(len>>1)-2);
 137     h ^= b; h -= lj_rol(b, 14);
 138     b += lj_getu32(str+(len>>2)-1);
 139   } else if (len > 0) {
 140     a = *(const uint8_t *)str;
 141     h ^= *(const uint8_t *)(str+len-1);
 142     b = *(const uint8_t *)(str+(len>>1));
 143     h ^= b; h -= lj_rol(b, 14);
 144   } else {
 145     return &g->strempty;
 146   }
 147   a ^= h; a -= lj_rol(h, 11);
 148   b ^= a; b -= lj_rol(a, 25);
 149   h ^= b; h -= lj_rol(b, 16);
 150   /* Check if the string has already been interned. */
 151   o = gcref(g->strhash[h & g->strmask]);
 152   if (LJ_LIKELY((((uintptr_t)str+len-1) & (LJ_PAGESIZE-1)) <= LJ_PAGESIZE-4)) {
 153     while (o != NULL) {
 154       GCstr *sx = gco2str(o);
 155       if (sx->len == len && str_fastcmp(str, strdata(sx), len) == 0) {
 156         /* Resurrect if dead. Can only happen with fixstring() (keywords). */
 157         if (isdead(g, o)) flipwhite(o);
 158         return sx;  /* Return existing string. */
 159       }
 160       o = gcnext(o);
 161     }
 162   } else {  /* Slow path: end of string is too close to a page boundary. */
 163     while (o != NULL) {
 164       GCstr *sx = gco2str(o);
 165       if (sx->len == len && memcmp(str, strdata(sx), len) == 0) {
 166         /* Resurrect if dead. Can only happen with fixstring() (keywords). */
 167         if (isdead(g, o)) flipwhite(o);
 168         return sx;  /* Return existing string. */
 169       }
 170       o = gcnext(o);
 171     }
 172   }
 173   /* Nope, create a new string. */
 174   s = lj_mem_newt(L, sizeof(GCstr)+len+1, GCstr);
 175   newwhite(g, s);
 176   s->gct = ~LJ_TSTR;
 177   s->len = len;
 178   s->hash = h;
 179   s->reserved = 0;
 180   memcpy(strdatawr(s), str, len);
 181   strdatawr(s)[len] = '\0';  /* Zero-terminate string. */
 182   /* Add it to string hash table. */
 183   h &= g->strmask;
 184   s->nextgc = g->strhash[h];
 185   /* NOBARRIER: The string table is a GC root. */
 186   setgcref(g->strhash[h], obj2gco(s));
 187   if (g->strnum++ > g->strmask)  /* Allow a 100% load factor. */
 188     lj_str_resize(L, (g->strmask<<1)+1);  /* Grow string table. */
 189   return s;  /* Return newly interned string. */
 190 }
 191 
 192 void LJ_FASTCALL lj_str_free(global_State *g, GCstr *s)
 193 {
 194   g->strnum--;
 195   lj_mem_free(g, s, sizestring(s));
 196 }
 197 

/* [<][>][^][v][top][bottom][index][help] */