tentative regex support
This commit is contained in:
parent
b7e3814191
commit
f724ec232d
|
@ -2,7 +2,7 @@ AM_YFLAGS = -d
|
|||
BUILT_SOURCES=parser.h
|
||||
|
||||
lib_LTLIBRARIES = libdexter.la
|
||||
libdexter_la_SOURCES = printbuf.c kstring.c obstack.c scanner.l parser.y dexter.c xml2json.c
|
||||
libdexter_la_SOURCES = xml2json.c regexp.c printbuf.c kstring.c obstack.c scanner.l parser.y dexter.c
|
||||
include_HEADERS = dexter.h obstack.h xml2json.h
|
||||
|
||||
bin_PROGRAMS = dexterc dexter
|
||||
|
|
5
TODO
5
TODO
|
@ -11,4 +11,7 @@
|
|||
- dexter executable bus errors
|
||||
- fix not()/set-difference
|
||||
- runtime no-match errors
|
||||
- CSS equations
|
||||
- CSS equations
|
||||
- saxon compatibility?!
|
||||
- text functions inside magic groups?!?!
|
||||
- XML input converter?!
|
|
@ -28,6 +28,7 @@ fi
|
|||
CPPFLAGS="$CPPFLAGS $XSLT_CFLAGS"
|
||||
LIBS="$LIBS $XSLT_LIBS"
|
||||
|
||||
AC_CHECK_LIB(pcre, pcre_compile)
|
||||
AC_CHECK_LIB(argp, argp_parse)
|
||||
AC_CHECK_LIB(json, json_object_new_string, , AC_MSG_ERROR([could not find the json library]))
|
||||
|
||||
|
|
13
dexter.c
13
dexter.c
|
@ -78,6 +78,7 @@ dexPtr dex_compile(char* dex_str, char* incl) {
|
|||
|
||||
if(!dex_exslt_registered) {
|
||||
exsltRegisterAll();
|
||||
exslt_org_regular_expressions_init();
|
||||
dex_exslt_registered = true;
|
||||
}
|
||||
|
||||
|
@ -102,7 +103,8 @@ dexPtr dex_compile(char* dex_str, char* incl) {
|
|||
sprintbuf(buf, " xmlns:date=\"http://exslt.org/dates-and-times\"");
|
||||
sprintbuf(buf, " xmlns:exsl=\"http://exslt.org/common\"");
|
||||
sprintbuf(buf, " xmlns:saxon=\"http://icl.com/saxon\"");
|
||||
sprintbuf(buf, " extension-element-prefixes=\"str math set func dyn exsl saxon user date\"");
|
||||
sprintbuf(buf, " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
|
||||
sprintbuf(buf, " extension-element-prefixes=\"str math set func dyn exsl saxon user date regexp\"");
|
||||
sprintbuf(buf, ">\n");
|
||||
sprintbuf(buf, "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
|
||||
sprintbuf(buf, "<xsl:strip-space elements=\"*\"/>\n");
|
||||
|
@ -239,6 +241,7 @@ char* __filter(char* key) {
|
|||
}
|
||||
|
||||
void __dex_recurse(contextPtr context) {
|
||||
printf("a\n");
|
||||
char* tmp;
|
||||
struct printbuf * buf;
|
||||
keyPtr keys;
|
||||
|
@ -256,11 +259,17 @@ void __dex_recurse(contextPtr context) {
|
|||
}
|
||||
} else { // if c->object !string
|
||||
if(c->array) { // scoped
|
||||
|
||||
printf("d\n");
|
||||
if(c->filter != NULL) {
|
||||
|
||||
printf("e\n");
|
||||
sprintbuf(c->buf, "<dexter:groups><xsl:for-each select=\"%s\"><dexter:group>\n", c->filter);
|
||||
__dex_recurse(c);
|
||||
sprintbuf(c->buf, "</dexter:group></xsl:for-each></dexter:groups>\n");
|
||||
} else { // magic
|
||||
|
||||
printf("f\n");
|
||||
sprintbuf(c->buf, "<xsl:variable name=\"%s__context\" select=\".\"/>\n", c->name);
|
||||
tmp = myparse(astrdup(inner_key_of(c->json)));
|
||||
sprintbuf(c->buf, "<dexter:groups><xsl:for-each select=\"%s\">\n", filter_intersection(context->magic, tmp));
|
||||
|
@ -295,6 +304,8 @@ void __dex_recurse(contextPtr context) {
|
|||
sprintbuf(c->buf, "</dexter:group></xsl:for-each></xsl:for-each></dexter:groups>\n");
|
||||
}
|
||||
} else {
|
||||
|
||||
printf("c\n");
|
||||
if(c->filter == NULL) {
|
||||
__dex_recurse(c);
|
||||
} else {
|
||||
|
|
|
@ -0,0 +1,374 @@
|
|||
/*
|
||||
* regexp.c: Implementation of the EXSLT -- Regular Expressions module
|
||||
*
|
||||
* References:
|
||||
* http://exslt.org/regexp/index.html
|
||||
*
|
||||
* See Copyright for the status of this software.
|
||||
*
|
||||
* Authors:
|
||||
* Joel W. Reed <joelwreed@gmail.com>
|
||||
*
|
||||
* TODO:
|
||||
* functions:
|
||||
* regexp:match
|
||||
* regexp:replace
|
||||
* regexp:test
|
||||
*/
|
||||
|
||||
#include <libxml/tree.h>
|
||||
#include <libxml/xpath.h>
|
||||
#include <libxml/xpathInternals.h>
|
||||
|
||||
#include <libxslt/xsltconfig.h>
|
||||
#include <libxslt/xsltutils.h>
|
||||
#include <libxslt/xsltInternals.h>
|
||||
#include <libxslt/extensions.h>
|
||||
#include <libexslt/exsltexports.h>
|
||||
|
||||
#include <pcre.h>
|
||||
#include <string.h>
|
||||
|
||||
/* make sure init function is exported on win32 */
|
||||
#if defined(_WIN32)
|
||||
#define PLUGINPUBFUN __declspec(dllexport)
|
||||
#else
|
||||
#define PLUGINPUBFUN
|
||||
#endif
|
||||
|
||||
/**
|
||||
* EXSLT_REGEXP_NAMESPACE:
|
||||
*
|
||||
* Namespace for EXSLT regexp functions
|
||||
*/
|
||||
#define EXSLT_REGEXP_NAMESPACE ((const xmlChar *) "http://exslt.org/regular-expressions")
|
||||
|
||||
static void
|
||||
exsltRegexpFlagsFromString(const xmlChar* flagstr,
|
||||
int* global, int* flags)
|
||||
{
|
||||
const xmlChar* i = flagstr;
|
||||
|
||||
/* defaults */
|
||||
(*flags) = PCRE_UTF8;
|
||||
(*global) = 0;
|
||||
|
||||
while (*i != '\0')
|
||||
{
|
||||
if (*i == 'i') (*flags) |= PCRE_CASELESS;
|
||||
else if (*i == 'g') (*global)= 1;
|
||||
/* TODO: support other flags? */
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
exsltRegexpExecute(xmlXPathParserContextPtr ctxt,
|
||||
const xmlChar* haystack, const xmlChar* regexp,
|
||||
int flags, int ovector[], int ovector_len)
|
||||
{
|
||||
int haystack_len = 0;
|
||||
pcre *compiled_regexp = NULL;
|
||||
int rc = 0, erroffset = 0;
|
||||
const char *error = 0;
|
||||
|
||||
compiled_regexp = pcre_compile(regexp, /* the pattern */
|
||||
flags, /* default options */
|
||||
&error, /* for error message */
|
||||
&erroffset, /* for error offset */
|
||||
NULL); /* use default character tables */
|
||||
|
||||
if (compiled_regexp == NULL) {
|
||||
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
|
||||
"exslt:regexp failed to compile %s (char: %d). %s", regexp, erroffset, error);
|
||||
return -1;
|
||||
}
|
||||
|
||||
haystack_len = xmlUTF8Strlen (haystack);
|
||||
|
||||
rc = pcre_exec(compiled_regexp, /* result of pcre_compile() */
|
||||
NULL, /* we didn't study the pattern */
|
||||
haystack, /* the subject string */
|
||||
haystack_len, /* the length of the subject string */
|
||||
0, /* start at offset 0 in the subject */
|
||||
0, /* default options */
|
||||
(int*)ovector, /* vector of integers for substring information */
|
||||
ovector_len); /* number of elements in the vector (NOT size in bytes) */
|
||||
|
||||
if (rc < -1) {
|
||||
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
|
||||
"exslt:regexp failed to execute %s for %s", regexp, haystack);
|
||||
rc = 0;
|
||||
}
|
||||
|
||||
if (compiled_regexp != NULL)
|
||||
pcre_free(compiled_regexp);
|
||||
|
||||
return rc;
|
||||
}
|
||||
|
||||
/**
|
||||
* exsltRegexpMatchFunction:
|
||||
* @ns:
|
||||
*
|
||||
* Returns a node set of string matches
|
||||
*/
|
||||
|
||||
static void
|
||||
exsltRegexpMatchFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
||||
{
|
||||
xsltTransformContextPtr tctxt;
|
||||
xmlNodePtr node;
|
||||
xmlDocPtr container;
|
||||
xmlXPathObjectPtr ret = NULL;
|
||||
xmlChar *haystack, *regexp, *flagstr, *working, *match;
|
||||
int rc, x, flags, global, ovector[3];
|
||||
|
||||
if ((nargs < 1) || (nargs > 3)) {
|
||||
xmlXPathSetArityError(ctxt);
|
||||
return;
|
||||
}
|
||||
|
||||
flagstr = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
||||
return;
|
||||
}
|
||||
|
||||
regexp = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
|
||||
xmlFree(flagstr);
|
||||
return;
|
||||
}
|
||||
|
||||
haystack = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
||||
xmlFree(regexp);
|
||||
xmlFree(flagstr);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Return a result tree fragment */
|
||||
tctxt = xsltXPathGetTransformContext(ctxt);
|
||||
if (tctxt == NULL) {
|
||||
xsltTransformError(xsltXPathGetTransformContext(ctxt), NULL, NULL,
|
||||
"exslt:regexp : internal error tctxt == NULL\n");
|
||||
goto fail;
|
||||
}
|
||||
|
||||
container = xsltCreateRVT(tctxt);
|
||||
if (container != NULL) {
|
||||
xsltRegisterTmpRVT(tctxt, container);
|
||||
ret = xmlXPathNewNodeSet(NULL);
|
||||
if (ret != NULL) {
|
||||
ret->boolval = 0;
|
||||
|
||||
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
||||
working = haystack;
|
||||
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
||||
ovector, sizeof(ovector)/sizeof(int));
|
||||
|
||||
while (rc > 0) {
|
||||
match = xmlStrsub(working, ovector[0], ovector[1]-ovector[0]);
|
||||
if (NULL == match) goto fail;
|
||||
|
||||
node = xmlNewDocRawNode(container, NULL, "match", match);
|
||||
xmlFree(match);
|
||||
|
||||
xmlAddChild((xmlNodePtr) container, node);
|
||||
xmlXPathNodeSetAddUnique(ret->nodesetval, node);
|
||||
|
||||
if (!global) break;
|
||||
|
||||
working = working + ovector[1];
|
||||
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
||||
ovector, sizeof(ovector)/sizeof(int));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
if (flagstr != NULL)
|
||||
xmlFree(flagstr);
|
||||
if (regexp != NULL)
|
||||
xmlFree(regexp);
|
||||
if (haystack != NULL)
|
||||
xmlFree(haystack);
|
||||
|
||||
if (ret != NULL)
|
||||
valuePush(ctxt, ret);
|
||||
else
|
||||
valuePush(ctxt, xmlXPathNewNodeSet(NULL));
|
||||
}
|
||||
|
||||
/**
|
||||
* exsltRegexpReplaceFunction:
|
||||
* @ns:
|
||||
*
|
||||
* Returns a node set of string matches
|
||||
*/
|
||||
|
||||
static void
|
||||
exsltRegexpReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
||||
{
|
||||
xmlChar *haystack, *regexp, *flagstr, *replace, *tmp;
|
||||
xmlChar *result = NULL, *working, *end;
|
||||
int rc, x, flags, global, ovector[3];
|
||||
|
||||
if ((nargs < 1) || (nargs > 4)) {
|
||||
xmlXPathSetArityError(ctxt);
|
||||
return;
|
||||
}
|
||||
|
||||
replace = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (replace == NULL)) {
|
||||
return;
|
||||
}
|
||||
|
||||
flagstr = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
||||
xmlFree(replace);
|
||||
return;
|
||||
}
|
||||
|
||||
regexp = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
|
||||
xmlFree(flagstr);
|
||||
xmlFree(replace);
|
||||
return;
|
||||
}
|
||||
|
||||
haystack = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
||||
xmlFree(regexp);
|
||||
xmlFree(flagstr);
|
||||
xmlFree(replace);
|
||||
return;
|
||||
}
|
||||
|
||||
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
||||
|
||||
working = haystack;
|
||||
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
||||
ovector, sizeof(ovector)/sizeof(int));
|
||||
|
||||
while (rc > 0 ) {
|
||||
if (0==ovector[0]) {
|
||||
if (NULL==result) result = xmlStrdup(replace);
|
||||
else result = xmlStrcat(result, replace);
|
||||
}
|
||||
else {
|
||||
tmp = xmlStrsub(working, 0, ovector[0]);
|
||||
if (NULL==result) result = tmp;
|
||||
else {
|
||||
result = xmlStrcat(result, tmp);
|
||||
xmlFree(tmp);
|
||||
}
|
||||
result = xmlStrcat(result, replace);
|
||||
}
|
||||
|
||||
working = working + ovector[1];
|
||||
|
||||
if (!global) break;
|
||||
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
||||
ovector, sizeof(ovector)/sizeof(int));
|
||||
}
|
||||
|
||||
end = haystack + xmlUTF8Strlen(haystack);
|
||||
if (working < end ) {
|
||||
if (NULL==result) result = xmlStrdup(working);
|
||||
else {
|
||||
result = xmlStrcat(result, working);
|
||||
}
|
||||
}
|
||||
|
||||
fail:
|
||||
if (replace != NULL)
|
||||
xmlFree(replace);
|
||||
if (flagstr != NULL)
|
||||
xmlFree(flagstr);
|
||||
if (regexp != NULL)
|
||||
xmlFree(regexp);
|
||||
if (haystack != NULL)
|
||||
xmlFree(haystack);
|
||||
|
||||
xmlXPathReturnString(ctxt, result);
|
||||
}
|
||||
|
||||
/**
|
||||
* exsltRegexpTestFunction:
|
||||
* @ns:
|
||||
*
|
||||
* returns true if the string given as the first argument
|
||||
* matches the regular expression given as the second argument
|
||||
*
|
||||
*/
|
||||
|
||||
static void
|
||||
exsltRegexpTestFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
||||
{
|
||||
xmlChar *haystack, *regexp_middle, *regexp, *flagstr;
|
||||
int rc = 0, flags, global, ovector[3];
|
||||
|
||||
if ((nargs < 1) || (nargs > 3)) {
|
||||
xmlXPathSetArityError(ctxt);
|
||||
return;
|
||||
}
|
||||
|
||||
flagstr = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
||||
return;
|
||||
}
|
||||
|
||||
regexp_middle = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (regexp_middle == NULL)) {
|
||||
xmlFree(flagstr);
|
||||
return;
|
||||
}
|
||||
|
||||
haystack = xmlXPathPopString(ctxt);
|
||||
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
||||
xmlFree(regexp_middle);
|
||||
xmlFree(flagstr);
|
||||
return;
|
||||
}
|
||||
|
||||
/* build the regexp */
|
||||
regexp = xmlStrdup("\\A");
|
||||
regexp = xmlStrcat(regexp, regexp_middle);
|
||||
regexp = xmlStrcat(regexp, "\\Z");
|
||||
|
||||
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
||||
rc = exsltRegexpExecute(ctxt, haystack, regexp, flags,
|
||||
ovector, sizeof(ovector)/sizeof(int));
|
||||
|
||||
fail:
|
||||
if (flagstr != NULL)
|
||||
xmlFree(flagstr);
|
||||
if (regexp != NULL)
|
||||
xmlFree(regexp);
|
||||
if (regexp_middle != NULL)
|
||||
xmlFree(regexp_middle);
|
||||
if (haystack != NULL)
|
||||
xmlFree(haystack);
|
||||
|
||||
xmlXPathReturnBoolean(ctxt, (rc > 0));
|
||||
}
|
||||
|
||||
/**
|
||||
* exsltRegexpRegister:
|
||||
*
|
||||
* Registers the EXSLT - Regexp module
|
||||
*/
|
||||
void
|
||||
PLUGINPUBFUN exslt_org_regular_expressions_init (void)
|
||||
{
|
||||
xsltRegisterExtModuleFunction ((const xmlChar *) "match",
|
||||
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
||||
exsltRegexpMatchFunction);
|
||||
xsltRegisterExtModuleFunction ((const xmlChar *) "replace",
|
||||
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
||||
exsltRegexpReplaceFunction);
|
||||
xsltRegisterExtModuleFunction ((const xmlChar *) "test",
|
||||
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
||||
exsltRegexpTestFunction);
|
||||
}
|
Loading…
Reference in New Issue