pyparsley/pyparsleymodule.c

282 lines
7.8 KiB
C
Raw Permalink Normal View History

2009-01-04 22:00:17 +00:00
#include <Python.h>
2009-03-04 05:31:24 +00:00
#include <parsley.h>
2009-01-04 22:00:17 +00:00
#include <libxslt/xslt.h>
#include <libexslt/exslt.h>
#include <libxslt/xsltInternals.h>
#include <libxslt/transform.h>
#include <libxml/parser.h>
#include <libxml/HTMLparser.h>
#include <libxml/HTMLtree.h>
#include <libxml/xmlwriter.h>
#include <string.h>
2009-01-05 00:32:38 +00:00
#include <stdio.h>
2009-01-04 22:00:17 +00:00
#include <json/json.h>
#include <xml2json.h>
typedef struct {
PyObject_HEAD
2009-03-04 05:31:24 +00:00
parsleyPtr parsley;
} PyParsley;
2009-01-04 22:00:17 +00:00
2009-03-04 05:31:24 +00:00
static PyTypeObject pyparsley_PyParsleyType;
2009-01-04 22:00:17 +00:00
2009-03-04 05:31:24 +00:00
static PyMethodDef pyparsley_methods[] = {
2009-01-04 22:00:17 +00:00
{NULL} /* Sentinel */
};
#ifndef PyMODINIT_FUNC /* declarations for DLL import/export */
#define PyMODINIT_FUNC void
#endif
2009-01-05 00:32:38 +00:00
static PyObject *jsonmodule;
2009-01-04 22:00:17 +00:00
PyMODINIT_FUNC
2009-03-04 05:31:24 +00:00
initpyparsley(void)
2009-01-04 22:00:17 +00:00
{
2009-01-05 00:32:38 +00:00
jsonmodule = PyImport_ImportModule("json");
2009-03-06 23:05:18 +00:00
if(jsonmodule == NULL) {
PyErr_Clear();
jsonmodule = PyImport_ImportModule("simplejson");
}
if(jsonmodule == NULL) return;
2009-01-05 00:32:38 +00:00
2009-01-04 22:00:17 +00:00
PyObject* m;
2009-03-04 05:31:24 +00:00
pyparsley_PyParsleyType.tp_new = PyType_GenericNew;
if (PyType_Ready(&pyparsley_PyParsleyType) < 0)
2009-01-04 22:00:17 +00:00
return;
2009-03-04 05:31:24 +00:00
m = Py_InitModule3("pyparsley", pyparsley_methods,
"Python binding for parsley");
2009-01-04 22:00:17 +00:00
2009-03-04 05:31:24 +00:00
Py_INCREF(&pyparsley_PyParsleyType);
PyModule_AddObject(m, "PyParsley", (PyObject *)&pyparsley_PyParsleyType);
2009-01-04 22:00:17 +00:00
}
static void
2009-03-04 05:31:24 +00:00
PyParsley_dealloc(PyParsley* self)
2009-01-04 22:00:17 +00:00
{
2009-03-04 05:31:24 +00:00
if(self->parsley != NULL) parsley_free(self->parsley);
2009-01-04 22:00:17 +00:00
self->ob_type->tp_free((PyObject*)self);
}
static PyObject *
2009-03-04 05:31:24 +00:00
PyParsley_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2009-01-04 22:00:17 +00:00
{
2009-03-04 05:31:24 +00:00
PyParsley *self;
self = (PyParsley *)type->tp_alloc(type, 0);
2009-01-04 22:00:17 +00:00
return (PyObject *)self;
}
static int
2009-03-04 05:31:24 +00:00
PyParsley_init(PyParsley *self, PyObject *args, PyObject *kwds)
2009-01-04 22:00:17 +00:00
{
2009-01-05 00:32:38 +00:00
PyObject *script;
char *string = "";
2009-01-04 22:00:17 +00:00
char *incl = "";
2009-01-05 00:32:38 +00:00
if (!PyArg_ParseTuple(args, "O|S", &script, &incl)) {
2009-01-04 22:00:17 +00:00
Py_DECREF(self);
2009-01-05 00:32:38 +00:00
return -1;
2009-01-04 22:00:17 +00:00
}
2009-01-05 00:32:38 +00:00
PyObject *dumps = PyObject_GetAttrString(jsonmodule, "dumps");
if(dumps == NULL) return -1;
if(PyDict_Check(script)){
script = PyObject_CallFunctionObjArgs(dumps, script, NULL);
if(script == NULL) return -1;
}
string = PyString_AsString(script);
if(string == NULL) return -1;
2009-03-04 05:31:24 +00:00
self->parsley = parsley_compile(string, incl);
2009-01-04 22:00:17 +00:00
2009-03-04 05:31:24 +00:00
if(self->parsley->error != NULL) {
PyErr_SetString(PyExc_RuntimeError, self->parsley->error);
2009-01-04 22:00:17 +00:00
Py_DECREF(self);
2009-01-05 00:32:38 +00:00
return -1;
2009-01-04 22:00:17 +00:00
}
return 0;
}
static PyObject *
pythonize_recurse(xmlNodePtr xml) {
if(xml == NULL) return NULL;
xmlNodePtr child;
2009-01-05 00:32:38 +00:00
PyObject * obj = NULL;
2009-01-04 22:00:17 +00:00
switch(xml->type) {
case XML_ELEMENT_NODE:
child = xml->children;
if(xml->ns == NULL) {
2009-01-05 00:32:38 +00:00
child = xml;
2009-01-04 22:00:17 +00:00
obj = PyDict_New();
while(child != NULL) {
2009-01-05 00:32:38 +00:00
PyDict_SetItemString(obj, child->name, pythonize_recurse(child->children));
2009-01-04 22:00:17 +00:00
child = child->next;
}
2009-03-04 05:31:24 +00:00
} else if(!strcmp(xml->ns->prefix, "parsley")) {
2009-01-04 22:00:17 +00:00
if(!strcmp(xml->name, "groups")) {
obj = PyList_New(0);
while(child != NULL) {
PyList_Append(obj, pythonize_recurse(child->children));
child = child->next;
}
} else if(!strcmp(xml->name, "group")) {
2009-03-04 05:31:24 +00:00
// Implicitly handled by parsley:groups handler
2009-01-04 22:00:17 +00:00
}
}
break;
case XML_TEXT_NODE:
2009-01-05 00:32:38 +00:00
obj = Py_BuildValue("s", xml->content);
2009-01-04 22:00:17 +00:00
break;
}
if(obj == NULL) {
Py_INCREF(Py_None);
return Py_None;
}
return obj;
}
static PyObject *
2009-03-04 05:31:24 +00:00
PyParsley_parse_doc(parsedParsleyPtr ptr, char *type) {
2009-01-07 04:03:52 +00:00
if(ptr->error != NULL || ptr->xml == NULL) {
2009-03-04 05:31:24 +00:00
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
2009-01-07 04:03:52 +00:00
PyErr_SetString(PyExc_RuntimeError, ptr->error);
2009-03-04 05:31:24 +00:00
parsed_parsley_free(ptr);
2009-01-04 22:00:17 +00:00
return NULL;
}
PyObject *output;
if(!strcmp(type, "json")) {
2009-01-07 04:03:52 +00:00
struct json_object *json = xml2json(ptr->xml->children->children);
2009-01-04 22:00:17 +00:00
char* str = json_object_to_json_string(json);
2009-01-05 00:32:38 +00:00
output = Py_BuildValue("s", str);
2009-01-04 22:00:17 +00:00
json_object_put(json);
} else if(!strcmp(type, "xml")) {
xmlChar* str;
2009-01-04 22:00:17 +00:00
int size;
2009-01-07 04:03:52 +00:00
xmlDocDumpMemory(ptr->xml, &str, &size);
2009-01-05 00:32:38 +00:00
output = Py_BuildValue("s", str);
2009-01-04 22:00:17 +00:00
} else {
2009-01-07 04:03:52 +00:00
output = pythonize_recurse(ptr->xml->children->children);
2009-01-04 22:00:17 +00:00
if(output == NULL){
Py_INCREF(Py_None);
return Py_None;
}
}
2009-03-04 05:31:24 +00:00
parsed_parsley_free(ptr);
2009-01-04 22:00:17 +00:00
return output;
}
2009-04-02 01:29:20 +00:00
#define SET_FLAG(C, B) if(B) flags |= C
2009-04-02 01:22:03 +00:00
2009-01-04 22:00:17 +00:00
static PyObject *
2009-03-04 05:31:24 +00:00
PyParsley_parse(PyParsley *self, PyObject *args, PyObject *keywords)
2009-01-04 22:00:17 +00:00
{
char *file = NULL;
char *string = NULL;
2009-01-05 00:32:38 +00:00
char *input = "html";
2009-01-04 22:00:17 +00:00
char *output = "python";
2009-04-02 01:22:03 +00:00
char *base = NULL;
int prune = 1;
int collate = 1;
int allow_net = 1;
int allow_local = 1;
2009-12-24 21:49:23 +00:00
int utf8 = 0;
2009-01-04 22:00:17 +00:00
int len;
2009-04-02 01:22:03 +00:00
int flags = 0;
2009-03-04 05:31:24 +00:00
parsedParsleyPtr ptr;
2009-01-04 22:00:17 +00:00
2009-04-02 01:22:03 +00:00
static char * list[] = { "file", "string", "input",
"output", "base", "prune", "collate",
2009-12-24 21:49:23 +00:00
"allow_net", "allow_local", "utf8", NULL };
2009-01-04 22:00:17 +00:00
if (!PyArg_ParseTupleAndKeywords(args, keywords,
2009-12-24 21:49:23 +00:00
"|ss#sssiiiii", list, &file, &string, &len, &input, &output, &base, &prune, &collate, &allow_net, &allow_local, &utf8)) {
2009-01-04 22:00:17 +00:00
return NULL;
}
2009-04-02 01:22:03 +00:00
if(!strcmp(input, "html")) flags |= PARSLEY_OPTIONS_HTML;
SET_FLAG(PARSLEY_OPTIONS_PRUNE, prune);
SET_FLAG(PARSLEY_OPTIONS_COLLATE, collate);
SET_FLAG(PARSLEY_OPTIONS_ALLOW_NET, allow_net);
SET_FLAG(PARSLEY_OPTIONS_ALLOW_LOCAL, allow_local);
2009-12-24 21:49:23 +00:00
SET_FLAG(PARSLEY_OPTIONS_FORCE_UTF8, utf8);
2009-04-02 01:29:20 +00:00
// printf("%d %d %d %d %d \n", prune, collate, allow_net, allow_local, flags);
2009-04-02 01:22:03 +00:00
2009-03-04 05:31:24 +00:00
if(self->parsley == NULL) {
PyErr_SetString(PyExc_RuntimeError, "parsley data is NULL");
2009-01-05 00:32:38 +00:00
return NULL;
}
2009-01-04 22:00:17 +00:00
if(file != NULL) {
2009-04-02 01:22:03 +00:00
ptr = parsley_parse_file(self->parsley, file, flags);
2009-01-04 22:00:17 +00:00
} else {
2009-04-02 01:22:03 +00:00
ptr = parsley_parse_string(self->parsley, string, len, base, flags);
2009-01-05 00:32:38 +00:00
}
2009-03-04 05:31:24 +00:00
return PyParsley_parse_doc(ptr, output);
2009-01-04 22:00:17 +00:00
}
2009-03-04 05:31:24 +00:00
static PyMethodDef PyParsley_methods[] = {
{"parse", (PyCFunction)PyParsley_parse, METH_VARARGS | METH_KEYWORDS,
2009-01-04 22:00:17 +00:00
"Parses with a variety of options"
},
2009-03-04 05:31:24 +00:00
// {"parse_string", (PyCFunction)PyParsley_parse_string, METH_VARARGS,
// "Parses an in-memory string with the current parslet"
2009-01-04 22:00:17 +00:00
// },
2009-03-04 05:31:24 +00:00
// {"parse_file", (PyCFunction)PyParsley_parse_file, METH_VARARGS,
// "Parses file or url with the current parslet"
2009-01-04 22:00:17 +00:00
// },
{NULL} /* Sentinel */
};
2009-03-04 05:31:24 +00:00
static PyTypeObject pyparsley_PyParsleyType = {
2009-01-04 22:00:17 +00:00
PyObject_HEAD_INIT(NULL)
0, /*ob_size*/
2009-03-04 05:31:24 +00:00
"pyparsley.PyParsley", /*tp_name*/
sizeof(PyParsley), /*tp_basicsize*/
2009-01-04 22:00:17 +00:00
0, /*tp_itemsize*/
2009-03-04 05:31:24 +00:00
(destructor) PyParsley_dealloc, /*tp_dealloc*/
2009-01-04 22:00:17 +00:00
0, /*tp_print*/
0, /*tp_getattr*/
0, /*tp_setattr*/
0, /*tp_compare*/
0, /*tp_repr*/
0, /*tp_as_number*/
0, /*tp_as_sequence*/
0, /*tp_as_mapping*/
0, /*tp_hash */
0, /*tp_call*/
0, /*tp_str*/
0, /*tp_getattro*/
0, /*tp_setattro*/
0, /*tp_as_buffer*/
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
2009-03-04 05:31:24 +00:00
"PyParsley objects", /* tp_doc */
2009-01-04 22:00:17 +00:00
0, /* tp_traverse */
0, /* tp_clear */
0, /* tp_richcompare */
0, /* tp_weaklistoffset */
0, /* tp_iter */
0, /* tp_iternext */
2009-03-04 05:31:24 +00:00
PyParsley_methods, /* tp_methods */
2009-01-04 22:00:17 +00:00
0, /* tp_members */
0, /* tp_getset */
0, /* tp_base */
0, /* tp_dict */
0, /* tp_descr_get */
0, /* tp_descr_set */
0, /* tp_dictoffset */
2009-03-04 05:31:24 +00:00
(initproc)PyParsley_init, /* tp_init */
2009-01-04 22:00:17 +00:00
0, /* tp_alloc */
2009-03-04 05:31:24 +00:00
PyParsley_new, /* tp_new */
2009-01-04 22:00:17 +00:00
};