tentative regex support

This commit is contained in:
Kyle Maxwell 2008-12-27 14:46:00 -08:00
parent b7e3814191
commit f724ec232d
5 changed files with 392 additions and 3 deletions

View File

@ -2,7 +2,7 @@ AM_YFLAGS = -d
BUILT_SOURCES=parser.h
lib_LTLIBRARIES = libdexter.la
libdexter_la_SOURCES = printbuf.c kstring.c obstack.c scanner.l parser.y dexter.c xml2json.c
libdexter_la_SOURCES = xml2json.c regexp.c printbuf.c kstring.c obstack.c scanner.l parser.y dexter.c
include_HEADERS = dexter.h obstack.h xml2json.h
bin_PROGRAMS = dexterc dexter

5
TODO
View File

@ -11,4 +11,7 @@
- dexter executable bus errors
- fix not()/set-difference
- runtime no-match errors
- CSS equations
- CSS equations
- saxon compatibility?!
- text functions inside magic groups?!?!
- XML input converter?!

View File

@ -28,6 +28,7 @@ fi
CPPFLAGS="$CPPFLAGS $XSLT_CFLAGS"
LIBS="$LIBS $XSLT_LIBS"
AC_CHECK_LIB(pcre, pcre_compile)
AC_CHECK_LIB(argp, argp_parse)
AC_CHECK_LIB(json, json_object_new_string, , AC_MSG_ERROR([could not find the json library]))

View File

@ -78,6 +78,7 @@ dexPtr dex_compile(char* dex_str, char* incl) {
if(!dex_exslt_registered) {
exsltRegisterAll();
exslt_org_regular_expressions_init();
dex_exslt_registered = true;
}
@ -102,7 +103,8 @@ dexPtr dex_compile(char* dex_str, char* incl) {
sprintbuf(buf, " xmlns:date=\"http://exslt.org/dates-and-times\"");
sprintbuf(buf, " xmlns:exsl=\"http://exslt.org/common\"");
sprintbuf(buf, " xmlns:saxon=\"http://icl.com/saxon\"");
sprintbuf(buf, " extension-element-prefixes=\"str math set func dyn exsl saxon user date\"");
sprintbuf(buf, " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
sprintbuf(buf, " extension-element-prefixes=\"str math set func dyn exsl saxon user date regexp\"");
sprintbuf(buf, ">\n");
sprintbuf(buf, "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
sprintbuf(buf, "<xsl:strip-space elements=\"*\"/>\n");
@ -239,6 +241,7 @@ char* __filter(char* key) {
}
void __dex_recurse(contextPtr context) {
printf("a\n");
char* tmp;
struct printbuf * buf;
keyPtr keys;
@ -256,11 +259,17 @@ void __dex_recurse(contextPtr context) {
}
} else { // if c->object !string
if(c->array) { // scoped
printf("d\n");
if(c->filter != NULL) {
printf("e\n");
sprintbuf(c->buf, "<dexter:groups><xsl:for-each select=\"%s\"><dexter:group>\n", c->filter);
__dex_recurse(c);
sprintbuf(c->buf, "</dexter:group></xsl:for-each></dexter:groups>\n");
} else { // magic
printf("f\n");
sprintbuf(c->buf, "<xsl:variable name=\"%s__context\" select=\".\"/>\n", c->name);
tmp = myparse(astrdup(inner_key_of(c->json)));
sprintbuf(c->buf, "<dexter:groups><xsl:for-each select=\"%s\">\n", filter_intersection(context->magic, tmp));
@ -295,6 +304,8 @@ void __dex_recurse(contextPtr context) {
sprintbuf(c->buf, "</dexter:group></xsl:for-each></xsl:for-each></dexter:groups>\n");
}
} else {
printf("c\n");
if(c->filter == NULL) {
__dex_recurse(c);
} else {

374
regexp.c Normal file
View File

@ -0,0 +1,374 @@
/*
* regexp.c: Implementation of the EXSLT -- Regular Expressions module
*
* References:
* http://exslt.org/regexp/index.html
*
* See Copyright for the status of this software.
*
* Authors:
* Joel W. Reed <joelwreed@gmail.com>
*
* TODO:
* functions:
* regexp:match
* regexp:replace
* regexp:test
*/
#include <libxml/tree.h>
#include <libxml/xpath.h>
#include <libxml/xpathInternals.h>
#include <libxslt/xsltconfig.h>
#include <libxslt/xsltutils.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/extensions.h>
#include <libexslt/exsltexports.h>
#include <pcre.h>
#include <string.h>
/* make sure init function is exported on win32 */
#if defined(_WIN32)
#define PLUGINPUBFUN __declspec(dllexport)
#else
#define PLUGINPUBFUN
#endif
/**
* EXSLT_REGEXP_NAMESPACE:
*
* Namespace for EXSLT regexp functions
*/
#define EXSLT_REGEXP_NAMESPACE ((const xmlChar *) "http://exslt.org/regular-expressions")
static void
exsltRegexpFlagsFromString(const xmlChar* flagstr,
int* global, int* flags)
{
const xmlChar* i = flagstr;
/* defaults */
(*flags) = PCRE_UTF8;
(*global) = 0;
while (*i != '\0')
{
if (*i == 'i') (*flags) |= PCRE_CASELESS;
else if (*i == 'g') (*global)= 1;
/* TODO: support other flags? */
i++;
}
}
static int
exsltRegexpExecute(xmlXPathParserContextPtr ctxt,
const xmlChar* haystack, const xmlChar* regexp,
int flags, int ovector[], int ovector_len)
{
int haystack_len = 0;
pcre *compiled_regexp = NULL;
int rc = 0, erroffset = 0;
const char *error = 0;
compiled_regexp = pcre_compile(regexp, /* the pattern */
flags, /* default options */
&error, /* for error message */
&erroffset, /* for error offset */
NULL); /* use default character tables */
if (compiled_regexp == NULL) {
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
"exslt:regexp failed to compile %s (char: %d). %s", regexp, erroffset, error);
return -1;
}
haystack_len = xmlUTF8Strlen (haystack);
rc = pcre_exec(compiled_regexp, /* result of pcre_compile() */
NULL, /* we didn't study the pattern */
haystack, /* the subject string */
haystack_len, /* the length of the subject string */
0, /* start at offset 0 in the subject */
0, /* default options */
(int*)ovector, /* vector of integers for substring information */
ovector_len); /* number of elements in the vector (NOT size in bytes) */
if (rc < -1) {
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
"exslt:regexp failed to execute %s for %s", regexp, haystack);
rc = 0;
}
if (compiled_regexp != NULL)
pcre_free(compiled_regexp);
return rc;
}
/**
* exsltRegexpMatchFunction:
* @ns:
*
* Returns a node set of string matches
*/
static void
exsltRegexpMatchFunction (xmlXPathParserContextPtr ctxt, int nargs)
{
xsltTransformContextPtr tctxt;
xmlNodePtr node;
xmlDocPtr container;
xmlXPathObjectPtr ret = NULL;
xmlChar *haystack, *regexp, *flagstr, *working, *match;
int rc, x, flags, global, ovector[3];
if ((nargs < 1) || (nargs > 3)) {
xmlXPathSetArityError(ctxt);
return;
}
flagstr = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
return;
}
regexp = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
xmlFree(flagstr);
return;
}
haystack = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
xmlFree(regexp);
xmlFree(flagstr);
return;
}
/* Return a result tree fragment */
tctxt = xsltXPathGetTransformContext(ctxt);
if (tctxt == NULL) {
xsltTransformError(xsltXPathGetTransformContext(ctxt), NULL, NULL,
"exslt:regexp : internal error tctxt == NULL\n");
goto fail;
}
container = xsltCreateRVT(tctxt);
if (container != NULL) {
xsltRegisterTmpRVT(tctxt, container);
ret = xmlXPathNewNodeSet(NULL);
if (ret != NULL) {
ret->boolval = 0;
exsltRegexpFlagsFromString(flagstr, &global, &flags);
working = haystack;
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
ovector, sizeof(ovector)/sizeof(int));
while (rc > 0) {
match = xmlStrsub(working, ovector[0], ovector[1]-ovector[0]);
if (NULL == match) goto fail;
node = xmlNewDocRawNode(container, NULL, "match", match);
xmlFree(match);
xmlAddChild((xmlNodePtr) container, node);
xmlXPathNodeSetAddUnique(ret->nodesetval, node);
if (!global) break;
working = working + ovector[1];
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
ovector, sizeof(ovector)/sizeof(int));
}
}
}
fail:
if (flagstr != NULL)
xmlFree(flagstr);
if (regexp != NULL)
xmlFree(regexp);
if (haystack != NULL)
xmlFree(haystack);
if (ret != NULL)
valuePush(ctxt, ret);
else
valuePush(ctxt, xmlXPathNewNodeSet(NULL));
}
/**
* exsltRegexpReplaceFunction:
* @ns:
*
* Returns a node set of string matches
*/
static void
exsltRegexpReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs)
{
xmlChar *haystack, *regexp, *flagstr, *replace, *tmp;
xmlChar *result = NULL, *working, *end;
int rc, x, flags, global, ovector[3];
if ((nargs < 1) || (nargs > 4)) {
xmlXPathSetArityError(ctxt);
return;
}
replace = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (replace == NULL)) {
return;
}
flagstr = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
xmlFree(replace);
return;
}
regexp = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
xmlFree(flagstr);
xmlFree(replace);
return;
}
haystack = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
xmlFree(regexp);
xmlFree(flagstr);
xmlFree(replace);
return;
}
exsltRegexpFlagsFromString(flagstr, &global, &flags);
working = haystack;
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
ovector, sizeof(ovector)/sizeof(int));
while (rc > 0 ) {
if (0==ovector[0]) {
if (NULL==result) result = xmlStrdup(replace);
else result = xmlStrcat(result, replace);
}
else {
tmp = xmlStrsub(working, 0, ovector[0]);
if (NULL==result) result = tmp;
else {
result = xmlStrcat(result, tmp);
xmlFree(tmp);
}
result = xmlStrcat(result, replace);
}
working = working + ovector[1];
if (!global) break;
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
ovector, sizeof(ovector)/sizeof(int));
}
end = haystack + xmlUTF8Strlen(haystack);
if (working < end ) {
if (NULL==result) result = xmlStrdup(working);
else {
result = xmlStrcat(result, working);
}
}
fail:
if (replace != NULL)
xmlFree(replace);
if (flagstr != NULL)
xmlFree(flagstr);
if (regexp != NULL)
xmlFree(regexp);
if (haystack != NULL)
xmlFree(haystack);
xmlXPathReturnString(ctxt, result);
}
/**
* exsltRegexpTestFunction:
* @ns:
*
* returns true if the string given as the first argument
* matches the regular expression given as the second argument
*
*/
static void
exsltRegexpTestFunction (xmlXPathParserContextPtr ctxt, int nargs)
{
xmlChar *haystack, *regexp_middle, *regexp, *flagstr;
int rc = 0, flags, global, ovector[3];
if ((nargs < 1) || (nargs > 3)) {
xmlXPathSetArityError(ctxt);
return;
}
flagstr = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
return;
}
regexp_middle = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (regexp_middle == NULL)) {
xmlFree(flagstr);
return;
}
haystack = xmlXPathPopString(ctxt);
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
xmlFree(regexp_middle);
xmlFree(flagstr);
return;
}
/* build the regexp */
regexp = xmlStrdup("\\A");
regexp = xmlStrcat(regexp, regexp_middle);
regexp = xmlStrcat(regexp, "\\Z");
exsltRegexpFlagsFromString(flagstr, &global, &flags);
rc = exsltRegexpExecute(ctxt, haystack, regexp, flags,
ovector, sizeof(ovector)/sizeof(int));
fail:
if (flagstr != NULL)
xmlFree(flagstr);
if (regexp != NULL)
xmlFree(regexp);
if (regexp_middle != NULL)
xmlFree(regexp_middle);
if (haystack != NULL)
xmlFree(haystack);
xmlXPathReturnBoolean(ctxt, (rc > 0));
}
/**
* exsltRegexpRegister:
*
* Registers the EXSLT - Regexp module
*/
void
PLUGINPUBFUN exslt_org_regular_expressions_init (void)
{
xsltRegisterExtModuleFunction ((const xmlChar *) "match",
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
exsltRegexpMatchFunction);
xsltRegisterExtModuleFunction ((const xmlChar *) "replace",
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
exsltRegexpReplaceFunction);
xsltRegisterExtModuleFunction ((const xmlChar *) "test",
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
exsltRegexpTestFunction);
}