384 lines
10 KiB
C
384 lines
10 KiB
C
/*
|
|
* regexp.c: Implementation of the EXSLT -- Regular Expressions module
|
|
*
|
|
* References:
|
|
* http://exslt.org/regexp/index.html
|
|
*
|
|
* See Copyright for the status of this software.
|
|
*
|
|
* Authors:
|
|
* Joel W. Reed <joelwreed@gmail.com>
|
|
*
|
|
* TODO:
|
|
* functions:
|
|
* regexp:match
|
|
* regexp:replace
|
|
* regexp:test
|
|
*/
|
|
|
|
#include <libxml/tree.h>
|
|
#include <libxml/xpath.h>
|
|
#include <libxml/xpathInternals.h>
|
|
|
|
#include <libxslt/xsltconfig.h>
|
|
#include <libxslt/xsltutils.h>
|
|
#include <libxslt/xsltInternals.h>
|
|
#include <libxslt/extensions.h>
|
|
#include <libexslt/exsltexports.h>
|
|
|
|
#include <pcre.h>
|
|
#include <string.h>
|
|
|
|
/* make sure init function is exported on win32 */
|
|
#if defined(_WIN32)
|
|
#define PLUGINPUBFUN __declspec(dllexport)
|
|
#else
|
|
#define PLUGINPUBFUN
|
|
#endif
|
|
|
|
/**
|
|
* EXSLT_REGEXP_NAMESPACE:
|
|
*
|
|
* Namespace for EXSLT regexp functions
|
|
*/
|
|
#define EXSLT_REGEXP_NAMESPACE ((const xmlChar *) "http://exslt.org/regular-expressions")
|
|
|
|
static void
|
|
exsltRegexpFlagsFromString(const xmlChar* flagstr,
|
|
int* global, int* flags)
|
|
{
|
|
const xmlChar* i = flagstr;
|
|
|
|
/* defaults */
|
|
(*flags) = PCRE_UTF8;
|
|
(*global) = 0;
|
|
|
|
while (*i != '\0')
|
|
{
|
|
if (*i == 'i') (*flags) |= PCRE_CASELESS;
|
|
else if (*i == 'g') (*global)= 1;
|
|
/* TODO: support other flags? */
|
|
i++;
|
|
}
|
|
}
|
|
|
|
static int
|
|
exsltRegexpExecute(xmlXPathParserContextPtr ctxt,
|
|
const xmlChar* haystack, const xmlChar* regexp,
|
|
int flags, int ovector[], int ovector_len)
|
|
{
|
|
int haystack_len = 0;
|
|
pcre *compiled_regexp = NULL;
|
|
int rc = 0, erroffset = 0;
|
|
const char *error = 0;
|
|
|
|
compiled_regexp = pcre_compile(regexp, /* the pattern */
|
|
flags, /* default options */
|
|
&error, /* for error message */
|
|
&erroffset, /* for error offset */
|
|
NULL); /* use default character tables */
|
|
|
|
if (compiled_regexp == NULL) {
|
|
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
|
|
"exslt:regexp failed to compile %s (char: %d). %s", regexp, erroffset, error);
|
|
return -1;
|
|
}
|
|
|
|
haystack_len = xmlUTF8Strlen (haystack);
|
|
|
|
rc = pcre_exec(compiled_regexp, /* result of pcre_compile() */
|
|
NULL, /* we didn't study the pattern */
|
|
haystack, /* the subject string */
|
|
haystack_len, /* the length of the subject string */
|
|
0, /* start at offset 0 in the subject */
|
|
0, /* default options */
|
|
(int*)ovector, /* vector of integers for substring information */
|
|
ovector_len); /* number of elements in the vector (NOT size in bytes) */
|
|
|
|
if (rc < -1) {
|
|
xsltTransformError (xsltXPathGetTransformContext (ctxt), NULL, NULL,
|
|
"exslt:regexp failed to execute %s for %s", regexp, haystack);
|
|
rc = 0;
|
|
}
|
|
|
|
if (compiled_regexp != NULL)
|
|
pcre_free(compiled_regexp);
|
|
|
|
return rc;
|
|
}
|
|
|
|
/**
|
|
* exsltRegexpMatchFunction:
|
|
* @ns:
|
|
*
|
|
* Returns a node set of string matches
|
|
*/
|
|
|
|
static void
|
|
exsltRegexpMatchFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
|
{
|
|
xsltTransformContextPtr tctxt;
|
|
xmlNodePtr node;
|
|
xmlDocPtr container;
|
|
xmlXPathObjectPtr ret = NULL;
|
|
xmlChar *haystack, *regexp, *flagstr, *working, *match;
|
|
int rc, x, flags, global, ovector[3];
|
|
|
|
if ((nargs < 1) || (nargs > 3)) {
|
|
xmlXPathSetArityError(ctxt);
|
|
return;
|
|
}
|
|
|
|
|
|
if (nargs > 2) {
|
|
flagstr = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
|
return;
|
|
}
|
|
} else {
|
|
flagstr = xmlStrdup("");
|
|
}
|
|
|
|
regexp = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
|
|
xmlFree(flagstr);
|
|
return;
|
|
}
|
|
|
|
haystack = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
|
xmlFree(regexp);
|
|
xmlFree(flagstr);
|
|
return;
|
|
}
|
|
|
|
/* Return a result tree fragment */
|
|
tctxt = xsltXPathGetTransformContext(ctxt);
|
|
if (tctxt == NULL) {
|
|
xsltTransformError(xsltXPathGetTransformContext(ctxt), NULL, NULL,
|
|
"exslt:regexp : internal error tctxt == NULL\n");
|
|
goto fail;
|
|
}
|
|
|
|
container = xsltCreateRVT(tctxt);
|
|
if (container != NULL) {
|
|
xsltRegisterTmpRVT(tctxt, container);
|
|
ret = xmlXPathNewNodeSet(NULL);
|
|
if (ret != NULL) {
|
|
ret->boolval = 0;
|
|
|
|
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
|
working = haystack;
|
|
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
ovector, sizeof(ovector)/sizeof(int));
|
|
|
|
while (rc > 0) {
|
|
match = xmlStrsub(working, ovector[0], ovector[1]-ovector[0]);
|
|
if (NULL == match) goto fail;
|
|
|
|
node = xmlNewDocRawNode(container, NULL, "match", match);
|
|
xmlFree(match);
|
|
|
|
xmlAddChild((xmlNodePtr) container, node);
|
|
xmlXPathNodeSetAddUnique(ret->nodesetval, node);
|
|
|
|
if (!global) break;
|
|
|
|
working = working + ovector[1];
|
|
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
ovector, sizeof(ovector)/sizeof(int));
|
|
}
|
|
}
|
|
}
|
|
|
|
fail:
|
|
if (flagstr != NULL)
|
|
xmlFree(flagstr);
|
|
if (regexp != NULL)
|
|
xmlFree(regexp);
|
|
if (haystack != NULL)
|
|
xmlFree(haystack);
|
|
|
|
if (ret != NULL)
|
|
valuePush(ctxt, ret);
|
|
else
|
|
valuePush(ctxt, xmlXPathNewNodeSet(NULL));
|
|
}
|
|
|
|
/**
|
|
* exsltRegexpReplaceFunction:
|
|
* @ns:
|
|
*
|
|
* Returns a node set of string matches
|
|
*/
|
|
|
|
static void
|
|
exsltRegexpReplaceFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
|
{
|
|
xmlChar *haystack, *regexp, *flagstr, *replace, *tmp;
|
|
xmlChar *result = NULL, *working, *end;
|
|
int rc, x, flags, global, ovector[3];
|
|
|
|
if ((nargs < 1) || (nargs > 4)) {
|
|
xmlXPathSetArityError(ctxt);
|
|
return;
|
|
}
|
|
|
|
replace = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (replace == NULL)) {
|
|
return;
|
|
}
|
|
|
|
flagstr = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
|
xmlFree(replace);
|
|
return;
|
|
}
|
|
|
|
regexp = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (regexp == NULL)) {
|
|
xmlFree(flagstr);
|
|
xmlFree(replace);
|
|
return;
|
|
}
|
|
|
|
haystack = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
|
xmlFree(regexp);
|
|
xmlFree(flagstr);
|
|
xmlFree(replace);
|
|
return;
|
|
}
|
|
|
|
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
|
|
|
working = haystack;
|
|
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
ovector, sizeof(ovector)/sizeof(int));
|
|
|
|
while (rc > 0 ) {
|
|
if (0==ovector[0]) {
|
|
if (NULL==result) result = xmlStrdup(replace);
|
|
else result = xmlStrcat(result, replace);
|
|
}
|
|
else {
|
|
tmp = xmlStrsub(working, 0, ovector[0]);
|
|
if (NULL==result) result = tmp;
|
|
else {
|
|
result = xmlStrcat(result, tmp);
|
|
xmlFree(tmp);
|
|
}
|
|
result = xmlStrcat(result, replace);
|
|
}
|
|
|
|
working = working + ovector[1];
|
|
|
|
if (!global) break;
|
|
rc = exsltRegexpExecute(ctxt, working, regexp, flags,
|
|
ovector, sizeof(ovector)/sizeof(int));
|
|
}
|
|
|
|
end = haystack + xmlUTF8Strlen(haystack);
|
|
if (working < end ) {
|
|
if (NULL==result) result = xmlStrdup(working);
|
|
else {
|
|
result = xmlStrcat(result, working);
|
|
}
|
|
}
|
|
|
|
fail:
|
|
if (replace != NULL)
|
|
xmlFree(replace);
|
|
if (flagstr != NULL)
|
|
xmlFree(flagstr);
|
|
if (regexp != NULL)
|
|
xmlFree(regexp);
|
|
if (haystack != NULL)
|
|
xmlFree(haystack);
|
|
|
|
xmlXPathReturnString(ctxt, result);
|
|
}
|
|
|
|
/**
|
|
* exsltRegexpTestFunction:
|
|
* @ns:
|
|
*
|
|
* returns true if the string given as the first argument
|
|
* matches the regular expression given as the second argument
|
|
*
|
|
*/
|
|
|
|
static void
|
|
exsltRegexpTestFunction (xmlXPathParserContextPtr ctxt, int nargs)
|
|
{
|
|
xmlChar *haystack, *regexp_middle, *regexp, *flagstr;
|
|
int rc = 0, flags, global, ovector[3];
|
|
|
|
if ((nargs < 1) || (nargs > 3)) {
|
|
xmlXPathSetArityError(ctxt);
|
|
return;
|
|
}
|
|
|
|
if(nargs > 2) {
|
|
flagstr = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (flagstr == NULL)) {
|
|
return;
|
|
}
|
|
} else {
|
|
flagstr = xmlStrdup("");
|
|
}
|
|
|
|
regexp_middle = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (regexp_middle == NULL)) {
|
|
xmlFree(flagstr);
|
|
return;
|
|
}
|
|
|
|
haystack = xmlXPathPopString(ctxt);
|
|
if (xmlXPathCheckError(ctxt) || (haystack == NULL)) {
|
|
xmlFree(regexp_middle);
|
|
xmlFree(flagstr);
|
|
return;
|
|
}
|
|
|
|
/* build the regexp */
|
|
regexp = xmlStrdup("\\A");
|
|
regexp = xmlStrcat(regexp, regexp_middle);
|
|
regexp = xmlStrcat(regexp, "\\Z");
|
|
|
|
exsltRegexpFlagsFromString(flagstr, &global, &flags);
|
|
rc = exsltRegexpExecute(ctxt, haystack, regexp, flags,
|
|
ovector, sizeof(ovector)/sizeof(int));
|
|
|
|
fail:
|
|
if (flagstr != NULL)
|
|
xmlFree(flagstr);
|
|
if (regexp != NULL)
|
|
xmlFree(regexp);
|
|
if (regexp_middle != NULL)
|
|
xmlFree(regexp_middle);
|
|
if (haystack != NULL)
|
|
xmlFree(haystack);
|
|
|
|
xmlXPathReturnBoolean(ctxt, (rc > 0));
|
|
}
|
|
|
|
/**
|
|
* exsltRegexpRegister:
|
|
*
|
|
* Registers the EXSLT - Regexp module
|
|
*/
|
|
void
|
|
PLUGINPUBFUN exslt_org_regular_expressions_init (void)
|
|
{
|
|
xsltRegisterExtModuleFunction ((const xmlChar *) "match",
|
|
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
|
exsltRegexpMatchFunction);
|
|
xsltRegisterExtModuleFunction ((const xmlChar *) "replace",
|
|
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
|
exsltRegexpReplaceFunction);
|
|
xsltRegisterExtModuleFunction ((const xmlChar *) "test",
|
|
(const xmlChar *) EXSLT_REGEXP_NAMESPACE,
|
|
exsltRegexpTestFunction);
|
|
}
|