282 lines
7.8 KiB
C
282 lines
7.8 KiB
C
#include <Python.h>
|
|
#include <parsley.h>
|
|
#include <libxslt/xslt.h>
|
|
#include <libexslt/exslt.h>
|
|
#include <libxslt/xsltInternals.h>
|
|
#include <libxslt/transform.h>
|
|
#include <libxml/parser.h>
|
|
#include <libxml/HTMLparser.h>
|
|
#include <libxml/HTMLtree.h>
|
|
#include <libxml/xmlwriter.h>
|
|
#include <string.h>
|
|
#include <stdio.h>
|
|
#include <json/json.h>
|
|
#include <xml2json.h>
|
|
|
|
typedef struct {
|
|
PyObject_HEAD
|
|
parsleyPtr parsley;
|
|
} PyParsley;
|
|
|
|
static PyTypeObject pyparsley_PyParsleyType;
|
|
|
|
static PyMethodDef pyparsley_methods[] = {
|
|
{NULL} /* Sentinel */
|
|
};
|
|
|
|
#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
|
|
#define PyMODINIT_FUNC void
|
|
#endif
|
|
|
|
static PyObject *jsonmodule;
|
|
|
|
PyMODINIT_FUNC
|
|
initpyparsley(void)
|
|
{
|
|
jsonmodule = PyImport_ImportModule("json");
|
|
if(jsonmodule == NULL) {
|
|
PyErr_Clear();
|
|
jsonmodule = PyImport_ImportModule("simplejson");
|
|
}
|
|
if(jsonmodule == NULL) return;
|
|
|
|
PyObject* m;
|
|
|
|
pyparsley_PyParsleyType.tp_new = PyType_GenericNew;
|
|
if (PyType_Ready(&pyparsley_PyParsleyType) < 0)
|
|
return;
|
|
|
|
m = Py_InitModule3("pyparsley", pyparsley_methods,
|
|
"Python binding for parsley");
|
|
|
|
Py_INCREF(&pyparsley_PyParsleyType);
|
|
PyModule_AddObject(m, "PyParsley", (PyObject *)&pyparsley_PyParsleyType);
|
|
}
|
|
|
|
static void
|
|
PyParsley_dealloc(PyParsley* self)
|
|
{
|
|
if(self->parsley != NULL) parsley_free(self->parsley);
|
|
self->ob_type->tp_free((PyObject*)self);
|
|
}
|
|
|
|
static PyObject *
|
|
PyParsley_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
|
{
|
|
PyParsley *self;
|
|
self = (PyParsley *)type->tp_alloc(type, 0);
|
|
return (PyObject *)self;
|
|
}
|
|
|
|
static int
|
|
PyParsley_init(PyParsley *self, PyObject *args, PyObject *kwds)
|
|
{
|
|
PyObject *script;
|
|
char *string = "";
|
|
char *incl = "";
|
|
|
|
if (!PyArg_ParseTuple(args, "O|S", &script, &incl)) {
|
|
Py_DECREF(self);
|
|
return -1;
|
|
}
|
|
|
|
PyObject *dumps = PyObject_GetAttrString(jsonmodule, "dumps");
|
|
if(dumps == NULL) return -1;
|
|
|
|
if(PyDict_Check(script)){
|
|
script = PyObject_CallFunctionObjArgs(dumps, script, NULL);
|
|
if(script == NULL) return -1;
|
|
}
|
|
|
|
string = PyString_AsString(script);
|
|
if(string == NULL) return -1;
|
|
|
|
self->parsley = parsley_compile(string, incl);
|
|
|
|
if(self->parsley->error != NULL) {
|
|
PyErr_SetString(PyExc_RuntimeError, self->parsley->error);
|
|
Py_DECREF(self);
|
|
return -1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static PyObject *
|
|
pythonize_recurse(xmlNodePtr xml) {
|
|
if(xml == NULL) return NULL;
|
|
xmlNodePtr child;
|
|
PyObject * obj = NULL;
|
|
|
|
switch(xml->type) {
|
|
case XML_ELEMENT_NODE:
|
|
child = xml->children;
|
|
if(xml->ns == NULL) {
|
|
child = xml;
|
|
obj = PyDict_New();
|
|
while(child != NULL) {
|
|
PyDict_SetItemString(obj, child->name, pythonize_recurse(child->children));
|
|
child = child->next;
|
|
}
|
|
} else if(!strcmp(xml->ns->prefix, "parsley")) {
|
|
if(!strcmp(xml->name, "groups")) {
|
|
obj = PyList_New(0);
|
|
while(child != NULL) {
|
|
PyList_Append(obj, pythonize_recurse(child->children));
|
|
child = child->next;
|
|
}
|
|
} else if(!strcmp(xml->name, "group")) {
|
|
// Implicitly handled by parsley:groups handler
|
|
}
|
|
}
|
|
break;
|
|
case XML_TEXT_NODE:
|
|
obj = Py_BuildValue("s", xml->content);
|
|
break;
|
|
}
|
|
if(obj == NULL) {
|
|
Py_INCREF(Py_None);
|
|
return Py_None;
|
|
}
|
|
return obj;
|
|
}
|
|
|
|
static PyObject *
|
|
PyParsley_parse_doc(parsedParsleyPtr ptr, char *type) {
|
|
if(ptr->error != NULL || ptr->xml == NULL) {
|
|
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
|
PyErr_SetString(PyExc_RuntimeError, ptr->error);
|
|
parsed_parsley_free(ptr);
|
|
return NULL;
|
|
}
|
|
|
|
PyObject *output;
|
|
if(!strcmp(type, "json")) {
|
|
struct json_object *json = xml2json(ptr->xml->children->children);
|
|
char* str = json_object_to_json_string(json);
|
|
output = Py_BuildValue("s", str);
|
|
json_object_put(json);
|
|
} else if(!strcmp(type, "xml")) {
|
|
xmlChar* str;
|
|
int size;
|
|
xmlDocDumpMemory(ptr->xml, &str, &size);
|
|
output = Py_BuildValue("s", str);
|
|
} else {
|
|
output = pythonize_recurse(ptr->xml->children->children);
|
|
if(output == NULL){
|
|
Py_INCREF(Py_None);
|
|
return Py_None;
|
|
}
|
|
}
|
|
parsed_parsley_free(ptr);
|
|
return output;
|
|
}
|
|
|
|
#define SET_FLAG(C, B) if(B) flags |= C
|
|
|
|
|
|
static PyObject *
|
|
PyParsley_parse(PyParsley *self, PyObject *args, PyObject *keywords)
|
|
{
|
|
char *file = NULL;
|
|
char *string = NULL;
|
|
char *input = "html";
|
|
char *output = "python";
|
|
char *base = NULL;
|
|
int prune = 1;
|
|
int collate = 1;
|
|
int allow_net = 1;
|
|
int allow_local = 1;
|
|
int utf8 = 0;
|
|
int len;
|
|
int flags = 0;
|
|
parsedParsleyPtr ptr;
|
|
|
|
static char * list[] = { "file", "string", "input",
|
|
"output", "base", "prune", "collate",
|
|
"allow_net", "allow_local", "utf8", NULL };
|
|
|
|
if (!PyArg_ParseTupleAndKeywords(args, keywords,
|
|
"|ss#sssiiiii", list, &file, &string, &len, &input, &output, &base, &prune, &collate, &allow_net, &allow_local, &utf8)) {
|
|
return NULL;
|
|
}
|
|
|
|
if(!strcmp(input, "html")) flags |= PARSLEY_OPTIONS_HTML;
|
|
SET_FLAG(PARSLEY_OPTIONS_PRUNE, prune);
|
|
SET_FLAG(PARSLEY_OPTIONS_COLLATE, collate);
|
|
SET_FLAG(PARSLEY_OPTIONS_ALLOW_NET, allow_net);
|
|
SET_FLAG(PARSLEY_OPTIONS_ALLOW_LOCAL, allow_local);
|
|
SET_FLAG(PARSLEY_OPTIONS_FORCE_UTF8, utf8);
|
|
|
|
// printf("%d %d %d %d %d \n", prune, collate, allow_net, allow_local, flags);
|
|
|
|
if(self->parsley == NULL) {
|
|
PyErr_SetString(PyExc_RuntimeError, "parsley data is NULL");
|
|
return NULL;
|
|
}
|
|
|
|
if(file != NULL) {
|
|
ptr = parsley_parse_file(self->parsley, file, flags);
|
|
} else {
|
|
ptr = parsley_parse_string(self->parsley, string, len, base, flags);
|
|
}
|
|
|
|
return PyParsley_parse_doc(ptr, output);
|
|
}
|
|
|
|
|
|
static PyMethodDef PyParsley_methods[] = {
|
|
{"parse", (PyCFunction)PyParsley_parse, METH_VARARGS | METH_KEYWORDS,
|
|
"Parses with a variety of options"
|
|
},
|
|
// {"parse_string", (PyCFunction)PyParsley_parse_string, METH_VARARGS,
|
|
// "Parses an in-memory string with the current parslet"
|
|
// },
|
|
// {"parse_file", (PyCFunction)PyParsley_parse_file, METH_VARARGS,
|
|
// "Parses file or url with the current parslet"
|
|
// },
|
|
{NULL} /* Sentinel */
|
|
};
|
|
|
|
static PyTypeObject pyparsley_PyParsleyType = {
|
|
PyObject_HEAD_INIT(NULL)
|
|
0, /*ob_size*/
|
|
"pyparsley.PyParsley", /*tp_name*/
|
|
sizeof(PyParsley), /*tp_basicsize*/
|
|
0, /*tp_itemsize*/
|
|
(destructor) PyParsley_dealloc, /*tp_dealloc*/
|
|
0, /*tp_print*/
|
|
0, /*tp_getattr*/
|
|
0, /*tp_setattr*/
|
|
0, /*tp_compare*/
|
|
0, /*tp_repr*/
|
|
0, /*tp_as_number*/
|
|
0, /*tp_as_sequence*/
|
|
0, /*tp_as_mapping*/
|
|
0, /*tp_hash */
|
|
0, /*tp_call*/
|
|
0, /*tp_str*/
|
|
0, /*tp_getattro*/
|
|
0, /*tp_setattro*/
|
|
0, /*tp_as_buffer*/
|
|
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
|
"PyParsley objects", /* tp_doc */
|
|
0, /* tp_traverse */
|
|
0, /* tp_clear */
|
|
0, /* tp_richcompare */
|
|
0, /* tp_weaklistoffset */
|
|
0, /* tp_iter */
|
|
0, /* tp_iternext */
|
|
PyParsley_methods, /* tp_methods */
|
|
0, /* tp_members */
|
|
0, /* tp_getset */
|
|
0, /* tp_base */
|
|
0, /* tp_dict */
|
|
0, /* tp_descr_get */
|
|
0, /* tp_descr_set */
|
|
0, /* tp_dictoffset */
|
|
(initproc)PyParsley_init, /* tp_init */
|
|
0, /* tp_alloc */
|
|
PyParsley_new, /* tp_new */
|
|
}; |