Logo Search packages:      
Sourcecode: nmap version File versions  Download package

nse_pcrelib.cc

/* lrexlib.c - PCRE regular expression library */
/* Reuben Thomas   nov00-18dec04 */
/* Shmuel Zeigerman   may04-18dec04 */


#include <stdio.h>
#include <stdlib.h>
#include <string.h>

extern "C" {
  #include "lua.h"
  #include "lauxlib.h"
}

#include <locale.h>

#include "nbase.h"
#include "nmap_error.h"

#ifdef HAVE_PCRE_PCRE_H
# include <pcre/pcre.h>
#else
# include <pcre.h>
#endif

#include "nse_pcrelib.h"

static int get_startoffset(lua_State *L, int stackpos, size_t len)
{
      int startoffset = luaL_optint(L, stackpos, 1);
      if(startoffset > 0)
            startoffset--;
      else if(startoffset < 0) {
            startoffset += (int) len;
            if(startoffset < 0)
                  startoffset = 0;
      }
      return startoffset;
}

static int udata_tostring (lua_State *L, const char* type_handle,
            const char* type_name)
{
      char buf[256];
      void *udata = luaL_checkudata(L, 1, type_handle);

      if(udata) {
            (void)Snprintf(buf, 255, "%s (%p)", type_name, udata);
            lua_pushstring(L, buf);
      }
      else {
            (void)Snprintf(buf, 255, "must be userdata of type '%s'", type_name);
            (void)luaL_argerror(L, 1, buf);
      }

      free(udata);
      return 1;
}

00060 typedef struct { const char* key; lua_Number val; } flags_pair;

static int get_flags (lua_State *L, const flags_pair *arr)
{
      const flags_pair *p;
      lua_newtable(L);
      for(p=arr; p->key != NULL; p++) {
            lua_pushstring(L, p->key);
            lua_pushnumber(L, p->val);
            lua_rawset(L, -3);
      }
      return 1;
}

const char pcre_handle[] = "pcre_regex_handle";
const char pcre_typename[] = "pcre_regex";

00077 typedef struct {
      pcre *pr;
      pcre_extra *extra;
      int *match;
      int ncapt;
      const unsigned char *tables;
} pcre2;      /* a better name is needed */

static const unsigned char *Lpcre_maketables(lua_State *L, int stackpos)
{
      const unsigned char *tables;
      char old_locale[256];
      char *locale = strdup(luaL_checkstring(L, stackpos));

      if(locale == NULL)
            luaL_error(L, "cannot set locale");

      strncpy(old_locale, setlocale(LC_CTYPE, NULL), 255); /* store the locale */

      if(setlocale(LC_CTYPE, locale) == NULL)        /* set new locale */
            luaL_error(L, "cannot set locale");

      tables = pcre_maketables();              /* make tables with new locale */
      (void)setlocale(LC_CTYPE, old_locale);         /* restore the old locale */

      free(locale);
      return tables;
}

static int Lpcre_comp(lua_State *L)
{
      char buf[256];
      const char *error;
      int erroffset;
      pcre2 *ud;
      char *pattern = strdup(luaL_checkstring(L, 1));
      int cflags = luaL_optint(L, 2, 0);
      const unsigned char *tables = NULL;

      if(lua_gettop(L) > 2 && !lua_isnil(L, 3))
            tables = Lpcre_maketables(L, 3);
      if(tables == NULL)
            luaL_error(L, "PCRE compilation failed");

      ud = (pcre2*)lua_newuserdata(L, sizeof(pcre2));
      luaL_getmetatable(L, pcre_handle);
      (void)lua_setmetatable(L, -2);
      ud->match = NULL;
      ud->extra = NULL;
      ud->tables = tables; /* keep this for eventual freeing */

      ud->pr = pcre_compile(pattern, cflags, &error, &erroffset, tables);
      if(!ud->pr) {
            (void)Snprintf(buf, 255, "%s (pattern offset: %d)", error, erroffset+1);
            /* show offset 1-based as it's common in Lua */
            luaL_error(L, buf);
      }

      ud->extra = pcre_study(ud->pr, 0, &error);
      if(error) luaL_error(L, error);

      pcre_fullinfo(ud->pr, ud->extra, PCRE_INFO_CAPTURECOUNT, &ud->ncapt);
      /* need (2 ints per capture, plus one for substring match) * 3/2 */
      ud->match = (int *) safe_malloc((ud->ncapt + 1) * 3 * sizeof(int));

      return 1;
}

static void Lpcre_getargs(lua_State *L, pcre2 **pud, const char **text,
            size_t *text_len)
{
      *pud = (pcre2 *)luaL_checkudata(L, 1, pcre_handle);
      if(*pud == NULL)
            (void)luaL_argerror(L, 1, ("compiled regexp expected"));
      *text = luaL_checklstring(L, 2, text_len);
}

typedef void (*Lpcre_push_matches) (lua_State *L, const char *text, pcre2 *ud);

static void Lpcre_push_substrings (lua_State *L, const char *text, pcre2 *ud)
{
      unsigned int i, j;
      unsigned int namecount;
      unsigned char *name_table;
      int name_entry_size;
      unsigned char *tabptr;
      const int *match = ud->match;

      lua_newtable(L);
      for (i = 1; i <= (unsigned) ud->ncapt; i++) {
            j = i * 2;
            if (match[j] >= 0)
                  lua_pushlstring(L, text + match[j], (size_t)(match[j + 1] - match[j]));
            else
                  lua_pushboolean(L, 0);
            lua_rawseti(L, -2, i);
      }

      /* now do named subpatterns - NJG */
      (void)pcre_fullinfo(ud->pr, ud->extra, PCRE_INFO_NAMECOUNT, &namecount);
      if (namecount <= 0)
            return;
      (void)pcre_fullinfo(ud->pr, ud->extra, PCRE_INFO_NAMETABLE, &name_table);
      (void)pcre_fullinfo(ud->pr, ud->extra, PCRE_INFO_NAMEENTRYSIZE, &name_entry_size);
      tabptr = name_table;
      for (i = 0; i < namecount; i++) {
            unsigned int n = (tabptr[0] << 8) | tabptr[1]; /* number of the capturing parenthesis */
            if (n > 0 && n <= (unsigned) ud->ncapt) {   /* check range */
                  unsigned int j = n * 2;
                  lua_pushstring(L, (char*)tabptr + 2); /* name of the capture, zero terminated */
                  if (match[j] >= 0)
                        lua_pushlstring(L, text + match[j], match[j + 1] - match[j]);
                  else
                        lua_pushboolean(L, 0);
                  lua_rawset(L, -3);
            }
            tabptr += name_entry_size;
      }
}

static void Lpcre_push_offsets (lua_State *L, const char *text, pcre2 * ud)
{
      unsigned int i, j, k;
      if(text) {
            /* suppress compiler warning */
      }
      lua_newtable(L);
      for (i=1, j=1; i <= (unsigned) ud->ncapt; i++) {
            k = i * 2;
            if (ud->match[k] >= 0) {
                  lua_pushnumber(L, ud->match[k] + 1);
                  lua_rawseti(L, -2, j++);
                  lua_pushnumber(L, ud->match[k+1]);
                  lua_rawseti(L, -2, j++);
            }
            else {
                  lua_pushboolean(L, 0);
                  lua_rawseti(L, -2, j++);
                  lua_pushboolean(L, 0);
                  lua_rawseti(L, -2, j++);
            }
      }
}

static int Lpcre_match_generic(lua_State *L, Lpcre_push_matches push_matches)
{
      int res;
      const char *text;
      pcre2 *ud;
      size_t elen;
      int startoffset;
      int eflags = luaL_optint(L, 4, 0);

      Lpcre_getargs(L, &ud, &text, &elen);
      startoffset = get_startoffset(L, 3, elen);

      res = pcre_exec(ud->pr, ud->extra, text, (int)elen, startoffset, eflags,
                  ud->match, (ud->ncapt + 1) * 3);
      if (res >= 0) {
            lua_pushnumber(L, (lua_Number) ud->match[0] + 1);
            lua_pushnumber(L, (lua_Number) ud->match[1]);
            (*push_matches)(L, text, ud);
            return 3;
      }
      return 0;
}

static int Lpcre_match(lua_State *L)
{
      return Lpcre_match_generic(L, Lpcre_push_substrings);
}

static int Lpcre_exec(lua_State *L)
{
      return Lpcre_match_generic(L, Lpcre_push_offsets);
}

static int Lpcre_gmatch(lua_State *L)
{
      int res;
      size_t len;
      int nmatch = 0, limit = 0;
      const char *text;
      pcre2 *ud;
      int maxmatch = luaL_optint(L, 4, 0);
      int eflags = luaL_optint(L, 5, 0);
      int startoffset = 0;
      Lpcre_getargs(L, &ud, &text, &len);
      luaL_checktype(L, 3, LUA_TFUNCTION);

      if(maxmatch > 0) /* this must be stated in the docs */
            limit = 1;

      while (!limit || nmatch < maxmatch) {
            res = pcre_exec(ud->pr, ud->extra, text, (int)len, startoffset, eflags,
                        ud->match, (ud->ncapt + 1) * 3);
            if (res >= 0) {
                  nmatch++;
                  lua_pushvalue(L, 3);
                  lua_pushlstring(L, text + ud->match[0], ud->match[1] - ud->match[0]);
                  Lpcre_push_substrings(L, text, ud);
                  lua_call(L, 2, 1);
                  if(lua_toboolean(L, -1))
                        break;
                  lua_pop(L, 1);
                  startoffset = ud->match[1];
            } else
                  break;
      }
      lua_pushnumber(L, nmatch);
      return 1;
}

static int Lpcre_gc (lua_State *L)
{
      pcre2 *ud = (pcre2 *)luaL_checkudata(L, 1, pcre_handle);
      if (ud) {
            if(ud->pr)      pcre_free(ud->pr);
            if(ud->extra)   pcre_free(ud->extra);
            if(ud->tables)  pcre_free((void *)ud->tables);
            if(ud->match)   free(ud->match);
      }
      return 0;
}

static int Lpcre_tostring (lua_State *L) {
      return udata_tostring(L, pcre_handle, pcre_typename);
}

static int Lpcre_vers (lua_State *L)
{
      lua_pushstring(L, pcre_version());
      return 1;
}

static flags_pair pcre_flags[] =
{
      { "CASELESS",        PCRE_CASELESS },
      { "MULTILINE",       PCRE_MULTILINE },
      { "DOTALL",          PCRE_DOTALL },
      { "EXTENDED",        PCRE_EXTENDED },
      { "ANCHORED",        PCRE_ANCHORED },
      { "DOLLAR_ENDONLY",  PCRE_DOLLAR_ENDONLY },
      { "EXTRA",           PCRE_EXTRA },
      { "NOTBOL",          PCRE_NOTBOL },
      { "NOTEOL",          PCRE_NOTEOL },
      { "UNGREEDY",        PCRE_UNGREEDY },
      { "NOTEMPTY",        PCRE_NOTEMPTY },
      { "UTF8",            PCRE_UTF8 },
#if PCRE_MAJOR >= 4
      { "NO_AUTO_CAPTURE", PCRE_NO_AUTO_CAPTURE },
      { "NO_UTF8_CHECK",   PCRE_NO_UTF8_CHECK },
#endif
#ifdef PCRE_AUTO_CALLOUT
      { "AUTO_CALLOUT",    PCRE_AUTO_CALLOUT },
#endif
#ifdef PCRE_PARTIAL
      { "PARTIAL",         PCRE_PARTIAL },
#endif
#ifdef PCRE_DFA_SHORTEST
      { "DFA_SHORTEST",    PCRE_DFA_SHORTEST },
#endif
#ifdef PCRE_DFA_RESTART
      { "DFA_RESTART",     PCRE_DFA_RESTART },
#endif
#ifdef PCRE_FIRSTLINE
      { "FIRSTLINE",       PCRE_FIRSTLINE },
#endif
#ifdef PCRE_DUPNAMES
      { "DUPNAMES",        PCRE_DUPNAMES },
#endif
#ifdef PCRE_NEWLINE_CR
      { "NEWLINE_CR",      PCRE_NEWLINE_CR },
#endif
#ifdef PCRE_NEWLINE_LF
      { "NEWLINE_LF",      PCRE_NEWLINE_LF },
#endif
#ifdef PCRE_NEWLINE_CRLF
      { "NEWLINE_CRLF",    PCRE_NEWLINE_CRLF },
#endif
#ifdef PCRE_NEWLINE_ANY
      { "NEWLINE_ANY",     PCRE_NEWLINE_ANY },
#endif
#ifdef PCRE_NEWLINE_ANYCRLF
      { "NEWLINE_ANYCRLF", PCRE_NEWLINE_ANYCRLF },
#endif
#ifdef PCRE_BSR_ANYCRLF
      { "BSR_ANYCRLF",     PCRE_BSR_ANYCRLF },
#endif
#ifdef PCRE_BSR_UNICODE
      { "BSR_UNICODE",     PCRE_BSR_UNICODE },
#endif
      { NULL, 0 }
};

static int Lpcre_get_flags (lua_State *L) {
      return get_flags(L, pcre_flags);
}

static const luaL_reg pcremeta[] = {
      {"exec",       Lpcre_exec},
      {"match",      Lpcre_match},
      {"gmatch",     Lpcre_gmatch},
      {"__gc",       Lpcre_gc},
      {"__tostring", Lpcre_tostring},
      {NULL, NULL}
};

/* Open the library */
static const luaL_reg pcrelib[] = {
      {"new",     Lpcre_comp},
      {"flags", Lpcre_get_flags},
      {"version", Lpcre_vers},
      {NULL, NULL}
};

LUALIB_API int luaopen_pcrelib(lua_State *L)
{
      luaL_newmetatable(L, pcre_handle);
      lua_pushliteral(L, "__index");
      lua_pushvalue(L, -2); 
      lua_rawset(L, -3);
    luaL_register(L, NULL, pcremeta);
      lua_pop(L, 1);
    luaL_register(L, NSE_PCRELIBNAME, pcrelib);
      
      return 1;
}


Generated by  Doxygen 1.6.0   Back to index