tentative rename, etc
This commit is contained in:
parent
70a7c52d92
commit
712f3a64e3
12
INSTALL
12
INSTALL
|
@ -1,4 +1,4 @@
|
|||
Dexter depends on
|
||||
Parsley depends on
|
||||
- the JSON C library from http://oss.metaparadigm.com/json-c/ (I used 0.7)
|
||||
- argp (standard with Linux, other platforms use argp-standalone package)
|
||||
- pcre (with dev headers)
|
||||
|
@ -32,17 +32,11 @@ sudo make install
|
|||
|
||||
Ruby Binding (via Gems)
|
||||
------------------------------------------------------------------------
|
||||
# install the C version first
|
||||
cd ruby
|
||||
gem build dexterous.gemspec
|
||||
sudo gem install dexterous
|
||||
http://github.com/fizx/parsley-ruby
|
||||
|
||||
Python Binding
|
||||
------------------------------------------------------------------------
|
||||
# install the C version first
|
||||
# Use Python 2.6, as this depends on the json support in Python's stdlib
|
||||
cd python
|
||||
python setup.py install
|
||||
http://github.com/fizx/pyparsley
|
||||
|
||||
Other OS/Configurations:
|
||||
------------------------------------------------------------------------
|
||||
|
|
8
INTRO
8
INTRO
|
@ -1,6 +1,6 @@
|
|||
<html><textarea style="width:100%;height:100%">
|
||||
Towards a universal scraping API
|
||||
or, an introduction to dexter
|
||||
or, an introduction to parsley
|
||||
|
||||
Web scraping is a chore. Scraper scripts are brittle and slow, and everyone writes their own custom implementation, resulting in countless hours of repeated work. Let's work together to make it easier. Let's do what regular expressions did for text processing, and what SQL did for databases. Let's create a universal domain-specific language for web scraping.
|
||||
|
||||
|
@ -47,8 +47,8 @@ Applying this to http://www.yelp.com/biz/amnesia-san-francisco yields:
|
|||
You'll note that the output structure mirrors the input structure. In the Ruby binding, you can get both input and output natively:
|
||||
|
||||
> require "open-uri"
|
||||
> require "dexter"
|
||||
> Dexterous.new({"title" => "h1", "links" => ["a"]}).parse(:url => "http://www.yelp.com/biz/amnesia-san-francisco")
|
||||
> require "parsley"
|
||||
> Parsley.new({"title" => "h1", "links" => ["a"]}).parse(:url => "http://www.yelp.com/biz/amnesia-san-francisco")
|
||||
#=> {"title"=>"Amnesia", "links"=>["Yelp", "Welcome", "About Me"]}
|
||||
|
||||
We'll also add both explicit and implicit grouping Here's an extension of the previous example with explicit grouping:
|
||||
|
@ -81,6 +81,4 @@ If you instead wanted to group by date, you could use implicit grouping. It's i
|
|||
}]
|
||||
}
|
||||
|
||||
In the next blog article, I'll talk about variables, crawling with dex, dex validations, sharing, and automatic inference of dex scripts from web page structures. Hopefully, you have a taste of what dex scripts can do, and you like it. There's an alpha implementation under active development at []. I'd love to have more collaborators, bug reports, unit tests, docs, encouragement, etc.
|
||||
|
||||
</textarea></html>
|
68
Makefile.am
68
Makefile.am
|
@ -1,55 +1,41 @@
|
|||
AM_YFLAGS = -d
|
||||
BUILT_SOURCES=parser.h
|
||||
|
||||
lib_LTLIBRARIES = libdexter.la
|
||||
libdexter_la_SOURCES = dex_mem.c xml2json.c regexp.c printbuf.c functions.c util.c kstring.c obstack.c scanner.l parser.y dexter.c
|
||||
include_HEADERS = dexter.h obstack.h xml2json.h
|
||||
lib_LTLIBRARIES = libparsley.la
|
||||
libparsley_la_SOURCES = parsley_mem.c xml2json.c regexp.c printbuf.c functions.c util.c kstring.c obstack.c scanner.l parser.y parsley.c
|
||||
include_HEADERS = parsley.h obstack.h xml2json.h
|
||||
|
||||
bin_PROGRAMS = dexterc dexter
|
||||
bin_PROGRAMS = parsleyc parsley
|
||||
|
||||
dexterc_SOURCES = dexterc_main.c
|
||||
dexterc_LDADD = libdexter.la
|
||||
parsleyc_SOURCES = parsleyc_main.c
|
||||
parsleyc_LDADD = libparsley.la
|
||||
|
||||
dexter_SOURCES = dexter_main.c
|
||||
dexter_LDADD = libdexter.la
|
||||
parsley_SOURCES = parsley_main.c
|
||||
parsley_LDADD = libparsley.la
|
||||
|
||||
bisect:
|
||||
./bootstrap.sh && ./configure && make clean && make check
|
||||
|
||||
port:
|
||||
make clean
|
||||
rm -rf /tmp/dexter-`cat VERSION`
|
||||
cp -R . /tmp/dexter-`cat VERSION`
|
||||
tar -C /tmp/ --exclude release --exclude .git -zcf "/tmp/dexter-`cat VERSION`.tar.gz" dexter-`cat VERSION`
|
||||
rsync --progress "/tmp/dexter-`cat VERSION`.tar.gz" kylemaxwell.com:/var/www/kylemaxwell_com/dexter/
|
||||
cat Portfile.in | sed "s/<VERSION>/`cat VERSION`/" > Portfile
|
||||
echo "checksums \
|
||||
md5 `md5 /tmp/dexter-\`cat VERSION\`.tar.gz | sed "s/.*= //"` \
|
||||
sha1 `openssl sha1 /tmp/dexter-\`cat VERSION\`.tar.gz | sed "s/.*= //"` \
|
||||
rmd160 `openssl rmd160 /tmp/dexter-\`cat VERSION\`.tar.gz | sed "s/.*= //"`" \
|
||||
>> Portfile
|
||||
sudo port build
|
||||
|
||||
install-all:
|
||||
./bootstrap.sh && ./configure && make && make install && cd ruby && rake install && cd ../python && python setup.py install
|
||||
|
||||
check-am:
|
||||
@echo "fictional..."; ./dexter test/fictional.dex test/fictional.html | diff test/fictional.json - && echo " success."
|
||||
@echo "fictional-opt..."; ./dexter test/fictional-opt.dex test/fictional-opt.html | diff test/fictional-opt.json - && echo " success."
|
||||
@echo "function-magic..."; ./dexter test/function-magic.dex test/function-magic.html | diff test/function-magic.json - && echo " success."
|
||||
@echo "malformed-expr..."; ./dexter test/malformed-expr.dex test/malformed-expr.html | diff test/malformed-expr.json - && echo " success."
|
||||
@echo "malformed-json..."; ./dexter test/malformed-json.dex test/malformed-json.html | diff test/malformed-json.json - && echo " success."
|
||||
@echo "css_attr..."; ./dexter -x test/css_attr.dex test/css_attr.html | diff test/css_attr.json - && echo " success."
|
||||
@echo "match..."; ./dexter -x test/match.dex test/match.xml | diff test/match.json - && echo " success."
|
||||
@echo "position..."; ./dexter test/position.dex test/position.html | diff test/position.json - && echo " success."
|
||||
@echo "replace..."; ./dexter -x test/replace.dex test/replace.xml | diff test/replace.json - && echo " success."
|
||||
@echo "scope..."; ./dexter test/scope.dex test/scope.html | diff test/scope.json - && echo " success."
|
||||
@echo "test..."; ./dexter -x test/test.dex test/test.xml | diff test/test.json - && echo " success."
|
||||
@echo "yelp..."; ./dexter test/yelp.dex test/yelp.html | diff test/yelp.json - && echo " success."
|
||||
@echo "optional..."; ./dexter test/optional.dex test/optional.html | diff test/optional.json - && echo " success."
|
||||
@echo "malformed-function..."; ./dexter test/malformed-function.dex test/malformed-function.html | diff test/malformed-function.json - && echo " success."
|
||||
@echo "empty..."; ./dexter test/empty.dex test/empty.html | diff test/empty.json - && echo " success."
|
||||
@echo "trivial..."; ./dexter test/trivial.dex test/trivial.html | diff test/trivial.json - && echo " success."
|
||||
@echo "trivial2..."; ./dexter test/trivial2.dex test/trivial2.html | diff test/trivial2.json - && echo " success."
|
||||
@echo "craigs-simple..."; ./dexter test/craigs-simple.dex test/craigs-simple.html | diff test/craigs-simple.json - && echo " success."
|
||||
@echo "yelp-home..."; ./dexter test/yelp-home.dex test/yelp-home.html | diff test/yelp-home.json - && echo " success."
|
||||
@echo "fictional..."; ./parsley test/fictional.let test/fictional.html | diff test/fictional.json - && echo " success."
|
||||
@echo "fictional-opt..."; ./parsley test/fictional-opt.let test/fictional-opt.html | diff test/fictional-opt.json - && echo " success."
|
||||
@echo "function-magic..."; ./parsley test/function-magic.let test/function-magic.html | diff test/function-magic.json - && echo " success."
|
||||
@echo "malformed-expr..."; ./parsley test/malformed-expr.let test/malformed-expr.html | diff test/malformed-expr.json - && echo " success."
|
||||
@echo "malformed-json..."; ./parsley test/malformed-json.let test/malformed-json.html | diff test/malformed-json.json - && echo " success."
|
||||
@echo "css_attr..."; ./parsley -x test/css_attr.let test/css_attr.html | diff test/css_attr.json - && echo " success."
|
||||
@echo "match..."; ./parsley -x test/match.let test/match.xml | diff test/match.json - && echo " success."
|
||||
@echo "position..."; ./parsley test/position.let test/position.html | diff test/position.json - && echo " success."
|
||||
@echo "replace..."; ./parsley -x test/replace.let test/replace.xml | diff test/replace.json - && echo " success."
|
||||
@echo "scope..."; ./parsley test/scope.let test/scope.html | diff test/scope.json - && echo " success."
|
||||
@echo "test..."; ./parsley -x test/test.let test/test.xml | diff test/test.json - && echo " success."
|
||||
@echo "yelp..."; ./parsley test/yelp.let test/yelp.html | diff test/yelp.json - && echo " success."
|
||||
@echo "optional..."; ./parsley test/optional.let test/optional.html | diff test/optional.json - && echo " success."
|
||||
@echo "malformed-function..."; ./parsley test/malformed-function.let test/malformed-function.html | diff test/malformed-function.json - && echo " success."
|
||||
@echo "empty..."; ./parsley test/empty.let test/empty.html | diff test/empty.json - && echo " success."
|
||||
@echo "trivial..."; ./parsley test/trivial.let test/trivial.html | diff test/trivial.json - && echo " success."
|
||||
@echo "trivial2..."; ./parsley test/trivial2.let test/trivial2.html | diff test/trivial2.json - && echo " success."
|
||||
@echo "craigs-simple..."; ./parsley test/craigs-simple.let test/craigs-simple.html | diff test/craigs-simple.json - && echo " success."
|
||||
@echo "yelp-home..."; ./parsley test/yelp-home.let test/yelp-home.html | diff test/yelp-home.json - && echo " success."
|
|
@ -60,7 +60,7 @@ am__installdirs = "$(DESTDIR)$(libdir)" "$(DESTDIR)$(bindir)" \
|
|||
libLTLIBRARIES_INSTALL = $(INSTALL)
|
||||
LTLIBRARIES = $(lib_LTLIBRARIES)
|
||||
libdexter_la_LIBADD =
|
||||
am_libdexter_la_OBJECTS = dex_mem.lo xml2json.lo regexp.lo printbuf.lo \
|
||||
am_libdexter_la_OBJECTS = parsley_mem.lo xml2json.lo regexp.lo printbuf.lo \
|
||||
functions.lo util.lo kstring.lo obstack.lo scanner.lo \
|
||||
parser.lo dexter.lo
|
||||
libdexter_la_OBJECTS = $(am_libdexter_la_OBJECTS)
|
||||
|
@ -229,7 +229,7 @@ top_srcdir = @top_srcdir@
|
|||
AM_YFLAGS = -d
|
||||
BUILT_SOURCES = parser.h
|
||||
lib_LTLIBRARIES = libdexter.la
|
||||
libdexter_la_SOURCES = dex_mem.c xml2json.c regexp.c printbuf.c functions.c util.c kstring.c obstack.c scanner.l parser.y dexter.c
|
||||
libdexter_la_SOURCES = parsley_mem.c xml2json.c regexp.c printbuf.c functions.c util.c kstring.c obstack.c scanner.l parser.y dexter.c
|
||||
include_HEADERS = dexter.h obstack.h xml2json.h
|
||||
dexterc_SOURCES = dexterc_main.c
|
||||
dexterc_LDADD = libdexter.la
|
||||
|
@ -348,7 +348,7 @@ mostlyclean-compile:
|
|||
distclean-compile:
|
||||
-rm -f *.tab.c
|
||||
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dex_mem.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/parsley_mem.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dexter.Plo@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dexter_main.Po@am__quote@
|
||||
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/dexterc_main.Po@am__quote@
|
||||
|
|
15
OUTLINE
15
OUTLINE
|
@ -1,15 +0,0 @@
|
|||
- what is dex?
|
||||
- data extraction from xml/html
|
||||
- current options: hpricot/nokogiri, XSLT, beautiful soup
|
||||
- selectors + structure
|
||||
- selectors: xpath + css + functions (xpath+exsl+regex)
|
||||
|
||||
h1>a
|
||||
substring-after(h1, ':')
|
||||
regexp:match(span.rating, '\d+', '')
|
||||
//location[obj='some-id']/ancestor::group/@id
|
||||
html('http://google.com')//title
|
||||
html(//div/a/@href)//title
|
||||
|
||||
- structure: json-by-example
|
||||
- example: yelp.dex
|
2
PAPER
2
PAPER
|
@ -1,6 +1,6 @@
|
|||
Abstract
|
||||
================================================================
|
||||
A common programming task is data extraction from xml and html documents. I introduce dex, an embedded language (ala SQL, regular expressions) that improves the usability and/or speed of current extraction techniques.
|
||||
A common programming task is data extraction from xml and html documents. I introduce parsley, an embedded language (ala SQL, regular expressions) that improves the usability and/or speed of current extraction techniques.
|
||||
|
||||
Introduction
|
||||
================================================================
|
||||
|
|
8
Portfile
8
Portfile
|
@ -1,15 +1,15 @@
|
|||
# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
|
||||
# $Id$
|
||||
PortSystem 1.0
|
||||
name dexter
|
||||
name parsley
|
||||
version 0.1.5
|
||||
categories net
|
||||
maintainers kyle@kylemaxwell.com
|
||||
description Data extractor
|
||||
long_description Dexter is a system to extract data from HTML/XML documents
|
||||
homepage http://github.com/fizx/dexter
|
||||
long_description Parsley is a system to extract data from HTML/XML documents
|
||||
homepage http://github.com/fizx/parsley
|
||||
platforms darwin
|
||||
master_sites http://kylemaxwell.com/dexter/
|
||||
master_sites http://parslets.com
|
||||
depends_lib port:argp-standalone \
|
||||
port:json-c \
|
||||
port:libxslt \
|
||||
|
|
|
@ -1,15 +1,15 @@
|
|||
# -*- coding: utf-8; mode: tcl; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- vim:fenc=utf-8:ft=tcl:et:sw=4:ts=4:sts=4
|
||||
# $Id$
|
||||
PortSystem 1.0
|
||||
name dexter
|
||||
name parsley
|
||||
version <VERSION>
|
||||
categories net
|
||||
maintainers kyle@kylemaxwell.com
|
||||
description Data extractor
|
||||
long_description Dexter is a system to extract data from HTML/XML documents
|
||||
homepage http://github.com/fizx/dexter
|
||||
long_description Parsley is a system to extract data from HTML/XML documents
|
||||
homepage http://github.com/fizx/parsley
|
||||
platforms darwin
|
||||
master_sites http://kylemaxwell.com/dexter/
|
||||
master_sites http://parslets.com
|
||||
depends_lib port:argp-standalone \
|
||||
port:json-c \
|
||||
port:libxslt \
|
||||
|
|
|
@ -1,45 +1,45 @@
|
|||
To use dexter from C, the following functions are available from dexter.h. In
|
||||
To use parsley from C, the following functions are available from parsley.h. In
|
||||
addition, there is a function to convert xml documents of the type returned by
|
||||
dexter into json.
|
||||
parsley into json.
|
||||
|
||||
You will also need passing familiarity with libxml2 and json-c to print, manipulate, and free some of the generated objects.
|
||||
|
||||
- http://svn.metaparadigm.com/svn/json-c/trunk
|
||||
- http://xmlsoft.org/
|
||||
|
||||
From dexter.h
|
||||
From parsley.h
|
||||
=============
|
||||
|
||||
parsedDexPtr -- a struct that contains the following elements:
|
||||
- xmlDocPtr xml -- the output of a dex document parse, as a libxml2 document
|
||||
parsedParsleyPtr -- a struct that contains the following elements:
|
||||
- xmlDocPtr xml -- the output of a parslet document parse, as a libxml2 document
|
||||
- char *error -- an error message, or NULL if no error
|
||||
- compiled_dex *dex -- reference to the dex that did the parsing
|
||||
- compiled_parsley *parsley -- reference to the parsley that did the parsing
|
||||
|
||||
dexPtr dex_compile(char* dex, char* incl)
|
||||
parsleyPtr parsley_compile(char* parsley, char* incl)
|
||||
|
||||
Arguments:
|
||||
- char* dex -- a string of dex to compile.
|
||||
- char* parsley -- a string of parsley to compile.
|
||||
- char* incl -- arbitrary XSLT to inject directly into the stylesheet,
|
||||
outside any templates.
|
||||
|
||||
Returns: A structure that you can pass to dex_parse_* to do the actual
|
||||
Returns: A structure that you can pass to parsley_parse_* to do the actual
|
||||
parsing. This structure contains the compiled XSLT.
|
||||
|
||||
Notes: This is *NOT* thread-safe. (Usage of the dex via dex_parse_* *IS*
|
||||
Notes: This is *NOT* thread-safe. (Usage of the parslet via parsley_parse_* *IS*
|
||||
thread-safe, however.)
|
||||
|
||||
void dex_free(dexPtr);
|
||||
void parsley_free(parsleyPtr);
|
||||
|
||||
Frees the dexPtr's memory.
|
||||
Frees the parsleyPtr's memory.
|
||||
|
||||
void parsed_dex_free(parsedDexPtr);
|
||||
void parsed_parsley_free(parsedParsleyPtr);
|
||||
|
||||
Frees the parsedDexPtr's memory.
|
||||
Frees the parsedParsleyPtr's memory.
|
||||
|
||||
parsedDexPtr dex_parse_file(dexPtr dex, char* file_name, boolean html);
|
||||
parsedParsleyPtr parsley_parse_file(parsleyPtr parsley, char* file_name, boolean html);
|
||||
|
||||
Arguments:
|
||||
- dexPtr dex -- Compiled dex struct
|
||||
- parsleyPtr parsley -- Compiled parsley struct
|
||||
- char* file_name -- file to parse
|
||||
- boolean html -- Use the html parser? (instead of xml)
|
||||
|
||||
|
@ -48,14 +48,14 @@ parsedDexPtr dex_parse_file(dexPtr dex, char* file_name, boolean html);
|
|||
like xmlSaveFormatFile(). If you want json output, look below for xml2json
|
||||
docs.
|
||||
|
||||
parsedDexPtr dex_parse_string(dexPtr dex, char* string, size_t len, boolean html);
|
||||
parsedParsleyPtr parsley_parse_string(parsleyPtr parsley, char* string, size_t len, boolean html);
|
||||
|
||||
Parses the in-memory string/length combination given. See dex_parse_file
|
||||
Parses the in-memory string/length combination given. See parsley_parse_file
|
||||
docs.
|
||||
|
||||
parsedDexPtr dex_parse_doc(dexPtr dex, xmlDocPtr doc);
|
||||
parsedParsleyPtr parsley_parse_doc(parsleyPtr parsley, xmlDocPtr doc);
|
||||
|
||||
Uses the dex parser to parse a libxml2 document.
|
||||
Uses the parsley parser to parse a libxml2 document.
|
||||
|
||||
From xml2json.h
|
||||
===============
|
||||
|
@ -63,13 +63,13 @@ From xml2json.h
|
|||
struct json_object * xml2json(xmlNodePtr);
|
||||
|
||||
Converts an xml subtree to json. The xml should be in the format returned
|
||||
by dexter. Basically, xml attributes get ignored, and if you want an array
|
||||
by parsley. Basically, xml attributes get ignored, and if you want an array
|
||||
like [a,b], use:
|
||||
|
||||
<dex:groups>
|
||||
<dex:group>a</dex:group>
|
||||
<dex:group>b</dex:group>
|
||||
</dex:groups>
|
||||
<parsley:groups>
|
||||
<parsley:group>a</parsley:group>
|
||||
<parsley:group>b</parsley:group>
|
||||
</parsley:groups>
|
||||
|
||||
To get a null-terminated string out, use:
|
||||
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
<h3>Overview<a name="start-readme"> </a></h3>
|
||||
|
||||
Dexter is a simple language for data-extraction from XML-like documents (including HTML). Dexter is:
|
||||
parsley is a simple language for data-extraction from XML-like documents (including HTML). parsley is:
|
||||
|
||||
1. Blazing fast -- Typical HTML parses are sub-50ms.
|
||||
2. Easy to write and understand -- Dexter uses your current knowledge of JSON, CSS, and XPath.
|
||||
3. Powerful. Dexter can understand full XPath, including standard and user-defined functions.
|
||||
2. Easy to write and understand -- parsley uses your current knowledge of JSON, CSS, and XPath.
|
||||
3. Powerful. parsley can understand full XPath, including standard and user-defined functions.
|
||||
|
||||
### Examples
|
||||
|
||||
A simple script, or "dex", looks like this:
|
||||
A simple script, or "parslet", looks like this:
|
||||
|
||||
{
|
||||
"title": "h1",
|
||||
|
@ -20,7 +20,7 @@ A simple script, or "dex", looks like this:
|
|||
]
|
||||
}
|
||||
|
||||
This returns JSON or XML output with the same structure. Applying this dex to http://www.yelp.com/biz/amnesia-san-francisco yields either:
|
||||
This returns JSON or XML output with the same structure. Applying this parslet to http://www.yelp.com/biz/amnesia-san-francisco yields either:
|
||||
|
||||
{
|
||||
"title": "Amnesia",
|
||||
|
@ -43,26 +43,26 @@ This returns JSON or XML output with the same structure. Applying this dex to h
|
|||
|
||||
or equivalently:
|
||||
|
||||
<dexter:root>
|
||||
<parsley:root>
|
||||
<title>Amnesia</title>
|
||||
<links>
|
||||
<dexter:group>
|
||||
<parsley:group>
|
||||
<href>/</href>
|
||||
<text>Yelp</text>
|
||||
</dexter:group>
|
||||
<dexter:group>
|
||||
</parsley:group>
|
||||
<parsley:group>
|
||||
<href>/</href>
|
||||
<text>Welcome</text>
|
||||
</dexter:group>
|
||||
<dexter:group>
|
||||
</parsley:group>
|
||||
<parsley:group>
|
||||
<href>/signup?return_url=%2Fuser_details</href>
|
||||
<text> About Me</text>
|
||||
</dexter:group>
|
||||
</parsley:group>
|
||||
.....
|
||||
</links>
|
||||
</dexter:root>
|
||||
</parsley:root>
|
||||
|
||||
This dex could also have been expressed as:
|
||||
This parslet could also have been expressed as:
|
||||
|
||||
{
|
||||
"title": "h1",
|
||||
|
@ -74,7 +74,7 @@ This dex could also have been expressed as:
|
|||
]
|
||||
}
|
||||
|
||||
The "a" in links(a) is a "key selector" -- an explicit grouping (with scope) for the array. You can use any XPath 1.0 or CSS3 expression as a value or a key selector. Dexter will try to be smart, and figure out which you are using. You can use CSS selectors inside XPath functions -- "substring-after(h1>a, ':')" is a valid expression.
|
||||
The "a" in links(a) is a "key selector" -- an explicit grouping (with scope) for the array. You can use any XPath 1.0 or CSS3 expression as a value or a key selector. Parsley will try to be smart, and figure out which you are using. You can use CSS selectors inside XPath functions -- "substring-after(h1>a, ':')" is a valid expression.
|
||||
|
||||
### Variables
|
||||
|
||||
|
|
4
TODO
4
TODO
|
@ -4,8 +4,8 @@
|
|||
# - define stable c api
|
||||
# - p/br support explicit
|
||||
# - p/br support needs div?!
|
||||
# - dex_parse_url support
|
||||
# - ruby binding for dex_parse_url
|
||||
# - parsley_parse_url support
|
||||
# - ruby binding for parsley_parse_url
|
||||
# - relative urls
|
||||
# - p/br support needs multicase handling
|
||||
# - reorganize project (at least tests, makefile.am src?!)
|
||||
|
|
|
@ -17,8 +17,8 @@
|
|||
#include "functions.h"
|
||||
|
||||
|
||||
void dex_register_all(){
|
||||
xsltRegisterExtModuleFunction ((const xmlChar *) "html-document", "http://kylemaxwell.com/dexter/library",
|
||||
void parsley_register_all(){
|
||||
xsltRegisterExtModuleFunction ((const xmlChar *) "html-document", "http://parslets.com/stdlib",
|
||||
xsltHtmlDocumentFunction);
|
||||
}
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#ifndef DEX_FUNCTIONS_H_INCLUDED
|
||||
#define DEX_FUNCTIONS_H_INCLUDED
|
||||
#ifndef PARSLEY_FUNCTIONS_H_INCLUDED
|
||||
#define PARSLEY_FUNCTIONS_H_INCLUDED
|
||||
|
||||
#include <libxml/xpath.h>
|
||||
#include <libxml/HTMLparser.h>
|
||||
|
@ -8,7 +8,7 @@
|
|||
#include <libxslt/transform.h>
|
||||
#include <libxslt/documents.h>
|
||||
|
||||
void dex_register_all();
|
||||
void parsley_register_all();
|
||||
|
||||
static void xsltHtmlDocumentFunction(xmlXPathParserContextPtr, int);
|
||||
static void xsltHtmlDocumentFunctionLoadDocument(xmlXPathParserContextPtr, xmlChar*);
|
||||
|
|
22
kstring.c
22
kstring.c
|
@ -4,8 +4,8 @@
|
|||
#include <stdarg.h>
|
||||
#include "kstring.h"
|
||||
#include "printbuf.h"
|
||||
#include "dex_mem.h"
|
||||
#include "dexter.h"
|
||||
#include "parsley_mem.h"
|
||||
#include "parsley.h"
|
||||
|
||||
char* arepl(char* orig, char* old, char* new) {
|
||||
// printf("y\n");
|
||||
|
@ -32,55 +32,55 @@ char* astrdup(char* c) {
|
|||
}
|
||||
|
||||
char* astrcat(char* a, char* b) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + 1));
|
||||
sprintf(output, "%s%s", a, b);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat3(char* a, char* b, char* c) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + 1));
|
||||
sprintf(output, "%s%s%s", a, b, c);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat4(char* a, char* b, char* c, char* d) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + 1));
|
||||
sprintf(output, "%s%s%s%s", a, b, c, d);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat5(char* a, char* b, char* c, char* d, char* e) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + 1));
|
||||
sprintf(output, "%s%s%s%s%s", a, b, c, d, e);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat6(char* a, char* b, char* c, char* d, char* e, char* f) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + 1));
|
||||
sprintf(output, "%s%s%s%s%s%s", a, b, c, d, e, f);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat7(char* a, char* b, char* c, char* d, char* e, char* f, char* g) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + 1));
|
||||
sprintf(output, "%s%s%s%s%s%s%s", a, b, c, d, e, f, g);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat8(char* a, char* b, char* c, char* d, char* e, char* f, char* g, char* h) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + strlen(h) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + strlen(h) + 1));
|
||||
sprintf(output, "%s%s%s%s%s%s%s%s", a, b, c, d, e, f, g, h);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat9(char* a, char* b, char* c, char* d, char* e, char* f, char* g, char* h, char* i) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + strlen(h) + strlen(i) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + strlen(h) + strlen(i) + 1));
|
||||
sprintf(output, "%s%s%s%s%s%s%s%s%s", a, b, c, d, e, f, g, h, i);
|
||||
return output;
|
||||
}
|
||||
|
||||
char* astrcat10(char* a, char* b, char* c, char* d, char* e, char* f, char* g, char* h, char* i, char* j) {
|
||||
char* output = (char*) dex_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + strlen(h) + strlen(i) + strlen(j) + 1));
|
||||
char* output = (char*) parsley_alloc(sizeof(char) * (strlen(a) + strlen(b) + strlen(c) + strlen(d) + strlen(e) + strlen(f) + strlen(g) + strlen(h) + strlen(i) + strlen(j) + 1));
|
||||
sprintf(output, "%s%s%s%s%s%s%s%s%s%s", a, b, c, d, e, f, g, h, i, j);
|
||||
return output;
|
||||
}
|
4
parser.y
4
parser.y
|
@ -564,11 +564,11 @@ char* xpath_alias(char* key) {
|
|||
|
||||
void init_xpath_alias() {
|
||||
alias_hash = xmlHashCreate(100);
|
||||
xmlHashAddEntry(alias_hash, "html", "dex:html-document");
|
||||
xmlHashAddEntry(alias_hash, "html", "lib:html-document");
|
||||
xmlHashAddEntry(alias_hash, "match", "regexp:match");
|
||||
xmlHashAddEntry(alias_hash, "replace", "regexp:replace");
|
||||
xmlHashAddEntry(alias_hash, "test", "regexp:test");
|
||||
xmlHashAddEntry(alias_hash, "with-newlines", "dex:nl");
|
||||
xmlHashAddEntry(alias_hash, "with-newlines", "lib:nl");
|
||||
|
||||
}
|
||||
|
||||
|
|
136
parsley.c
136
parsley.c
|
@ -4,7 +4,7 @@
|
|||
#include <stdarg.h>
|
||||
#include <json/json.h>
|
||||
#include "kstring.h"
|
||||
#include "dexter.h"
|
||||
#include "parsley.h"
|
||||
#include "y.tab.h"
|
||||
#include "printbuf.h"
|
||||
#include "functions.h"
|
||||
|
@ -26,14 +26,14 @@ int yywrap(void){
|
|||
return 1;
|
||||
}
|
||||
|
||||
void parsed_dex_free(parsedDexPtr ptr) {
|
||||
void parsed_parsley_free(parsedParsleyPtr ptr) {
|
||||
if(ptr->xml != NULL) xmlFree(ptr->xml);
|
||||
if(ptr->error != NULL) free(ptr->error);
|
||||
free(ptr);
|
||||
}
|
||||
|
||||
static parsedDexPtr parse_error(char* format, ...) {
|
||||
parsedDexPtr ptr = (parsedDexPtr) calloc(sizeof(parsed_dex), 1);
|
||||
static parsedParsleyPtr parse_error(char* format, ...) {
|
||||
parsedParsleyPtr ptr = (parsedParsleyPtr) calloc(sizeof(parsed_parsley), 1);
|
||||
ptr->xml = NULL;
|
||||
va_list args;
|
||||
va_start(args, format);
|
||||
|
@ -42,33 +42,33 @@ static parsedDexPtr parse_error(char* format, ...) {
|
|||
return ptr;
|
||||
}
|
||||
|
||||
parsedDexPtr dex_parse_file(dexPtr dex, char* file, bool html) {
|
||||
parsedParsleyPtr parsley_parse_file(parsleyPtr parsley, char* file, bool html) {
|
||||
if(html) {
|
||||
htmlParserCtxtPtr htmlCtxt = htmlNewParserCtxt();
|
||||
htmlDocPtr html = htmlCtxtReadFile(htmlCtxt, file, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
||||
htmlFreeParserCtxt(htmlCtxt);
|
||||
if(html == NULL) return parse_error("Couldn't parse file: %s\n", file);
|
||||
return dex_parse_doc(dex, html);
|
||||
return parsley_parse_doc(parsley, html);
|
||||
} else {
|
||||
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
|
||||
xmlDocPtr xml = xmlCtxtReadFile(ctxt, file, NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
if(xml == NULL) return parse_error("Couldn't parse file: %s\n", file);
|
||||
return dex_parse_doc(dex, xml);
|
||||
return parsley_parse_doc(parsley, xml);
|
||||
}
|
||||
}
|
||||
|
||||
parsedDexPtr dex_parse_string(dexPtr dex, char* string, size_t size, bool html) {
|
||||
parsedParsleyPtr parsley_parse_string(parsleyPtr parsley, char* string, size_t size, bool html) {
|
||||
if(html) {
|
||||
htmlParserCtxtPtr htmlCtxt = htmlNewParserCtxt();
|
||||
htmlDocPtr html = htmlCtxtReadMemory(htmlCtxt, string, size, "http://kylemaxwell.com/dexter/memory", NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
||||
htmlDocPtr html = htmlCtxtReadMemory(htmlCtxt, string, size, "http://parslets.com/in-memory-string", NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
||||
if(html == NULL) return parse_error("Couldn't parse string");
|
||||
return dex_parse_doc(dex, html);
|
||||
return parsley_parse_doc(parsley, html);
|
||||
} else {
|
||||
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
|
||||
xmlDocPtr xml = xmlCtxtReadMemory(ctxt, string, size, "http://kylemaxwell.com/dexter/memory", NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
||||
xmlDocPtr xml = xmlCtxtReadMemory(ctxt, string, size, "http://parslets.com/in-memory-string", NULL, HTML_PARSE_RECOVER | HTML_PARSE_NOERROR |HTML_PARSE_NOWARNING);
|
||||
if(xml == NULL) return parse_error("Couldn't parse string");
|
||||
return dex_parse_doc(dex, xml);
|
||||
return parsley_parse_doc(parsley, xml);
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -101,11 +101,11 @@ unlink(xmlNodePtr xml) {
|
|||
|
||||
static bool
|
||||
is_root(xmlElementPtr xml) {
|
||||
return xml != NULL && xml->name != NULL && xml->prefix !=NULL && !strcmp(xml->name, "root") && !strcmp(xml->prefix, "dexter");
|
||||
return xml != NULL && xml->name != NULL && xml->prefix !=NULL && !strcmp(xml->name, "root") && !strcmp(xml->prefix, "parsley");
|
||||
}
|
||||
|
||||
static void
|
||||
prune(parsedDexPtr ptr, xmlNodePtr xml, char* err) {
|
||||
prune(parsedParsleyPtr ptr, xmlNodePtr xml, char* err) {
|
||||
if(xml == NULL) return;
|
||||
bool optional = ((xmlElementPtr )xml)->attributes != NULL;
|
||||
if(optional) {
|
||||
|
@ -123,7 +123,7 @@ prune(parsedDexPtr ptr, xmlNodePtr xml, char* err) {
|
|||
}
|
||||
|
||||
static void
|
||||
visit(parsedDexPtr ptr, xmlNodePtr xml, bool bubbling) {
|
||||
visit(parsedParsleyPtr ptr, xmlNodePtr xml, bool bubbling) {
|
||||
if(xml->type != XML_ELEMENT_NODE) return;
|
||||
xmlNodePtr child = xml->children;
|
||||
xmlNodePtr parent = xml->parent;
|
||||
|
@ -146,10 +146,10 @@ xml_empty(xmlNodePtr xml) {
|
|||
return true;
|
||||
}
|
||||
|
||||
parsedDexPtr dex_parse_doc(dexPtr dex, xmlDocPtr doc) {
|
||||
parsedDexPtr ptr = (parsedDexPtr) calloc(sizeof(parsed_dex), 1);
|
||||
ptr->dex = dex;
|
||||
ptr->xml = xsltApplyStylesheet(dex->stylesheet, doc, NULL);
|
||||
parsedParsleyPtr parsley_parse_doc(parsleyPtr parsley, xmlDocPtr doc) {
|
||||
parsedParsleyPtr ptr = (parsedParsleyPtr) calloc(sizeof(parsed_parsley), 1);
|
||||
ptr->parsley = parsley;
|
||||
ptr->xml = xsltApplyStylesheet(parsley->stylesheet, doc, NULL);
|
||||
if(ptr->xml != NULL && ptr->error == NULL) visit(ptr, ptr->xml->children, false);
|
||||
if(ptr->xml == NULL && ptr->error == NULL) { // == NULL
|
||||
ptr->error = strdup("Internal runtime error");
|
||||
|
@ -157,57 +157,57 @@ parsedDexPtr dex_parse_doc(dexPtr dex, xmlDocPtr doc) {
|
|||
return ptr;
|
||||
}
|
||||
|
||||
dexPtr dex_compile(char* dex_str, char* incl) {
|
||||
dexPtr dex = (dexPtr) calloc(sizeof(compiled_dex), 1);
|
||||
parsleyPtr parsley_compile(char* parsley_str, char* incl) {
|
||||
parsleyPtr parsley = (parsleyPtr) calloc(sizeof(compiled_parsley), 1);
|
||||
|
||||
if(last_dex_error != NULL) {
|
||||
free(last_dex_error);
|
||||
last_dex_error = NULL;
|
||||
if(last_parsley_error != NULL) {
|
||||
free(last_parsley_error);
|
||||
last_parsley_error = NULL;
|
||||
}
|
||||
|
||||
registerEXSLT();
|
||||
|
||||
struct json_object *json = json_tokener_parse(dex_str);
|
||||
struct json_object *json = json_tokener_parse(parsley_str);
|
||||
if(is_error(json)) {
|
||||
dex->error = strdup("Your dex is not valid json.");
|
||||
parsley->error = strdup("Your parslet is not valid json.");
|
||||
// json_object_put(json); // frees json
|
||||
return dex;
|
||||
return parsley;
|
||||
}
|
||||
|
||||
struct printbuf* buf = printbuf_new();
|
||||
|
||||
sprintbuf_dex_header(buf);
|
||||
sprintbuf_parsley_header(buf);
|
||||
sprintbuf(buf, "%s\n", incl);
|
||||
sprintbuf(buf, "<xsl:template match=\"/\">\n");
|
||||
sprintbuf(buf, "<dexter:root>\n");
|
||||
sprintbuf(buf, "<parsley:root>\n");
|
||||
|
||||
contextPtr context = new_context(json, buf);
|
||||
__dex_recurse(context);
|
||||
__parsley_recurse(context);
|
||||
|
||||
json_object_put(json); // frees json
|
||||
dex->error = last_dex_error;
|
||||
parsley->error = last_parsley_error;
|
||||
|
||||
sprintbuf(buf, "</dexter:root>\n");
|
||||
sprintbuf(buf, "</parsley:root>\n");
|
||||
sprintbuf(buf, "</xsl:template>\n");
|
||||
sprintbuf(buf, context->key_buf->buf);
|
||||
sprintbuf(buf, "</xsl:stylesheet>\n");
|
||||
|
||||
if(dex->error == NULL) {
|
||||
if(parsley->error == NULL) {
|
||||
xmlParserCtxtPtr ctxt = xmlNewParserCtxt();
|
||||
xmlDocPtr doc = xmlCtxtReadMemory(ctxt, buf->buf, buf->size, "http://kylemaxwell.com/dexter/compiled", NULL, 3);
|
||||
xmlDocPtr doc = xmlCtxtReadMemory(ctxt, buf->buf, buf->size, "http://kylemaxwell.com/parsley/compiled", NULL, 3);
|
||||
xmlFreeParserCtxt(ctxt);
|
||||
dex->raw_stylesheet = strdup(buf->buf);
|
||||
dex->stylesheet = xsltParseStylesheetDoc(doc);
|
||||
parsley->raw_stylesheet = strdup(buf->buf);
|
||||
parsley->stylesheet = xsltParseStylesheetDoc(doc);
|
||||
}
|
||||
|
||||
printbuf_free(buf);
|
||||
dex_collect();
|
||||
parsley_collect();
|
||||
|
||||
return dex;
|
||||
return parsley;
|
||||
}
|
||||
|
||||
static contextPtr new_context(struct json_object * json, struct printbuf *buf) {
|
||||
contextPtr c = dex_alloc(sizeof(dex_context));
|
||||
contextPtr c = parsley_alloc(sizeof(parsley_context));
|
||||
c->key_buf = printbuf_new();
|
||||
sprintbuf(c->key_buf, "");
|
||||
c->name = "root";
|
||||
|
@ -228,17 +228,17 @@ static contextPtr new_context(struct json_object * json, struct printbuf *buf) {
|
|||
}
|
||||
|
||||
contextPtr deeper_context(contextPtr context, char* key, struct json_object * val) {
|
||||
contextPtr c = dex_alloc(sizeof(dex_context));
|
||||
contextPtr c = parsley_alloc(sizeof(parsley_context));
|
||||
c->key_buf = context->key_buf;
|
||||
c->keys = context->keys;
|
||||
c->tag = dex_key_tag(key);
|
||||
c->flags = dex_key_flags(key);
|
||||
c->tag = parsley_key_tag(key);
|
||||
c->flags = parsley_key_flags(key);
|
||||
c->name = astrcat3(context->name, ".", c->tag);
|
||||
dex_parsing_context = c;
|
||||
parsley_parsing_context = c;
|
||||
c->array = val != NULL && json_object_is_type(val, json_type_array);
|
||||
c->json = c->array ? json_object_array_get_idx(val, 0) : val;
|
||||
c->string = val != NULL && json_object_is_type(c->json, json_type_string);
|
||||
c->filter = dex_key_filter(key);
|
||||
c->filter = parsley_key_filter(key);
|
||||
c->magic = ((c->filter == NULL) && c->array && !(c->string)) ? c->name : context->magic;
|
||||
if(context->filter != NULL && !c->array) c->magic = NULL;
|
||||
c->buf = context->buf;
|
||||
|
@ -259,7 +259,7 @@ static char* filter_intersection(char* key, char* expr) {
|
|||
}
|
||||
}
|
||||
|
||||
void dex_free(dexPtr ptr) {
|
||||
void parsley_free(parsleyPtr ptr) {
|
||||
if(ptr->error != NULL)
|
||||
free(ptr->error);
|
||||
if(ptr->raw_stylesheet != NULL)
|
||||
|
@ -271,14 +271,14 @@ void dex_free(dexPtr ptr) {
|
|||
|
||||
void yyerror(const char * s) {
|
||||
struct printbuf *buf = printbuf_new();
|
||||
if(last_dex_error !=NULL) sprintbuf(buf, "%s\n", last_dex_error);
|
||||
sprintbuf(buf, "%s in key: %s", s, dex_parsing_context->name);
|
||||
last_dex_error = strdup(buf->buf);
|
||||
if(last_parsley_error !=NULL) sprintbuf(buf, "%s\n", last_parsley_error);
|
||||
sprintbuf(buf, "%s in key: %s", s, parsley_parsing_context->name);
|
||||
last_parsley_error = strdup(buf->buf);
|
||||
printbuf_free(buf);
|
||||
}
|
||||
|
||||
static char* optional(contextPtr c) {
|
||||
return (c->flags & DEX_OPTIONAL) ? " optional=\"true\"" : "";
|
||||
return (c->flags & PARSLEY_OPTIONAL) ? " optional=\"true\"" : "";
|
||||
}
|
||||
|
||||
static bool
|
||||
|
@ -289,7 +289,7 @@ all_strings(struct json_object * json) {
|
|||
return true;
|
||||
}
|
||||
|
||||
void __dex_recurse(contextPtr context) {
|
||||
void __parsley_recurse(contextPtr context) {
|
||||
// printf("a\n");
|
||||
char* tmp;
|
||||
struct printbuf * buf;
|
||||
|
@ -303,14 +303,14 @@ void __dex_recurse(contextPtr context) {
|
|||
if(c->array || context->zipped) {
|
||||
if(c->filter){
|
||||
// printf("b\n");
|
||||
sprintbuf(c->buf, "<dexter:groups optional=\"true\"><xsl:for-each select=\"%s\"><dexter:group optional=\"true\">\n", c->filter);
|
||||
sprintbuf(c->buf, "<parsley:groups optional=\"true\"><xsl:for-each select=\"%s\"><parsley:group optional=\"true\">\n", c->filter);
|
||||
sprintbuf(c->buf, "<xsl:value-of select=\"%s\" />\n", c->raw_expr);
|
||||
sprintbuf(c->buf, "</dexter:group></xsl:for-each></dexter:groups>\n");
|
||||
sprintbuf(c->buf, "</parsley:group></xsl:for-each></parsley:groups>\n");
|
||||
} else {
|
||||
// printf("c\n");
|
||||
sprintbuf(c->buf, "<dexter:groups optional=\"true\"><xsl:for-each select=\"%s\"><dexter:group optional=\"true\">\n", c->expr);
|
||||
sprintbuf(c->buf, "<parsley:groups optional=\"true\"><xsl:for-each select=\"%s\"><parsley:group optional=\"true\">\n", c->expr);
|
||||
sprintbuf(c->buf, "<xsl:value-of select=\".\" />\n");
|
||||
sprintbuf(c->buf, "</dexter:group></xsl:for-each></dexter:groups>\n");
|
||||
sprintbuf(c->buf, "</parsley:group></xsl:for-each></parsley:groups>\n");
|
||||
}
|
||||
} else {
|
||||
if(c->filter){
|
||||
|
@ -327,28 +327,28 @@ void __dex_recurse(contextPtr context) {
|
|||
if(c->array) { // scoped
|
||||
if(c->filter != NULL) {
|
||||
// printf("f\n");
|
||||
sprintbuf(c->buf, "<dexter:groups optional=\"true\"><xsl:for-each select=\"%s\"><dexter:group optional=\"true\">\n", c->filter);
|
||||
__dex_recurse(c);
|
||||
sprintbuf(c->buf, "</dexter:group></xsl:for-each></dexter:groups>\n");
|
||||
sprintbuf(c->buf, "<parsley:groups optional=\"true\"><xsl:for-each select=\"%s\"><parsley:group optional=\"true\">\n", c->filter);
|
||||
__parsley_recurse(c);
|
||||
sprintbuf(c->buf, "</parsley:group></xsl:for-each></parsley:groups>\n");
|
||||
} else { // magic
|
||||
if(all_strings(c->json)) {
|
||||
c->magic = NULL;
|
||||
c->zipped = 1;
|
||||
sprintbuf(c->buf, "<dexter:zipped>\n");
|
||||
__dex_recurse(c);
|
||||
sprintbuf(c->buf, "</dexter:zipped>\n");
|
||||
sprintbuf(c->buf, "<parsley:zipped>\n");
|
||||
__parsley_recurse(c);
|
||||
sprintbuf(c->buf, "</parsley:zipped>\n");
|
||||
} else {
|
||||
// printf("h\n");
|
||||
sprintbuf(c->buf, "<xsl:variable name=\"%s__context\" select=\".\"/>\n", c->name);
|
||||
dex_parsing_context = c;
|
||||
parsley_parsing_context = c;
|
||||
char * str = inner_key_of(c->json);
|
||||
if(str != NULL) {
|
||||
// printf("i\n");
|
||||
tmp = myparse(astrdup(str));
|
||||
sprintbuf(c->buf, "<dexter:groups optional=\"true\"><xsl:for-each select=\"%s\">\n", filter_intersection(context->magic, tmp));
|
||||
sprintbuf(c->buf, "<parsley:groups optional=\"true\"><xsl:for-each select=\"%s\">\n", filter_intersection(context->magic, tmp));
|
||||
|
||||
// keys
|
||||
keys = dex_alloc(sizeof(key_node));
|
||||
keys = parsley_alloc(sizeof(key_node));
|
||||
keys->name = c->name;
|
||||
keys->use = full_expr(c, tmp);
|
||||
keys->next = c->keys;
|
||||
|
@ -371,20 +371,20 @@ void __dex_recurse(contextPtr context) {
|
|||
);
|
||||
|
||||
sprintbuf(c->buf, "<xsl:variable name=\"%s__index\" select=\"%s\"/>\n", c->name, tmp);
|
||||
sprintbuf(c->buf, "<xsl:for-each select=\"$%s__context\"><dexter:group optional=\"true\">\n", c->name);
|
||||
__dex_recurse(c);
|
||||
sprintbuf(c->buf, "</dexter:group></xsl:for-each></xsl:for-each></dexter:groups>\n");
|
||||
sprintbuf(c->buf, "<xsl:for-each select=\"$%s__context\"><parsley:group optional=\"true\">\n", c->name);
|
||||
__parsley_recurse(c);
|
||||
sprintbuf(c->buf, "</parsley:group></xsl:for-each></xsl:for-each></parsley:groups>\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// printf("j\n");
|
||||
if(c->filter == NULL) {
|
||||
__dex_recurse(c);
|
||||
__parsley_recurse(c);
|
||||
} else {
|
||||
// printf("k\n");
|
||||
sprintbuf(c->buf, "<xsl:for-each select=\"%s\"><xsl:if test=\"position() = 1\">\n", c->filter);
|
||||
__dex_recurse(c);
|
||||
__parsley_recurse(c);
|
||||
sprintbuf(c->buf, "</xsl:if></xsl:for-each>\n");
|
||||
}
|
||||
}
|
||||
|
|
52
parsley.h
52
parsley.h
|
@ -1,7 +1,7 @@
|
|||
#ifndef DEXTER_H_INCLUDED
|
||||
#define DEXTER_H_INCLUDED
|
||||
#ifndef PARSLEY_H_INCLUDED
|
||||
#define PARSLEY_H_INCLUDED
|
||||
|
||||
#define DEX_BUF_SIZE 1024
|
||||
#define PARSLEY_BUF_SIZE 1024
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <libxslt/xslt.h>
|
||||
|
@ -9,25 +9,25 @@
|
|||
#include <libxslt/transform.h>
|
||||
|
||||
|
||||
static int dex_debug_mode = 0;
|
||||
static char* last_dex_error;
|
||||
static int parsley_debug_mode = 0;
|
||||
static char* last_parsley_error;
|
||||
|
||||
#include <json/json.h>
|
||||
|
||||
typedef struct __compiled_dex {
|
||||
typedef struct __compiled_parsley {
|
||||
char* raw_stylesheet;
|
||||
xsltStylesheetPtr stylesheet;
|
||||
char* error;
|
||||
} compiled_dex;
|
||||
} compiled_parsley;
|
||||
|
||||
typedef struct __parsed_dex {
|
||||
typedef struct __parsed_parsley {
|
||||
xmlDocPtr xml;
|
||||
char *error;
|
||||
compiled_dex *dex;
|
||||
} parsed_dex;
|
||||
compiled_parsley *parsley;
|
||||
} parsed_parsley;
|
||||
|
||||
typedef compiled_dex * dexPtr;
|
||||
typedef parsed_dex * parsedDexPtr;
|
||||
typedef compiled_parsley * parsleyPtr;
|
||||
typedef parsed_parsley * parsedParsleyPtr;
|
||||
|
||||
typedef struct __key_node {
|
||||
char* name;
|
||||
|
@ -37,12 +37,12 @@ typedef struct __key_node {
|
|||
|
||||
typedef key_node * keyPtr;
|
||||
|
||||
typedef struct __dex_context {
|
||||
typedef struct __parsley_context {
|
||||
struct printbuf * buf;
|
||||
struct printbuf * key_buf;
|
||||
keyPtr keys;
|
||||
struct json_object * json;
|
||||
struct __dex_context * parent;
|
||||
struct __parsley_context * parent;
|
||||
char* tag;
|
||||
char* filter;
|
||||
char* expr;
|
||||
|
@ -54,23 +54,23 @@ typedef struct __dex_context {
|
|||
int string;
|
||||
int flags;
|
||||
int zipped;
|
||||
} dex_context;
|
||||
} parsley_context;
|
||||
|
||||
typedef dex_context * contextPtr;
|
||||
typedef parsley_context * contextPtr;
|
||||
|
||||
void parsed_dex_free(parsedDexPtr);
|
||||
void parsed_parsley_free(parsedParsleyPtr);
|
||||
|
||||
void dex_free(dexPtr);
|
||||
dexPtr dex_compile(char* dex, char* incl);
|
||||
parsedDexPtr dex_parse_file(dexPtr, char*, bool);
|
||||
parsedDexPtr dex_parse_string(dexPtr, char*, size_t, bool);
|
||||
parsedDexPtr dex_parse_doc(dexPtr, xmlDocPtr);
|
||||
void parsley_free(parsleyPtr);
|
||||
parsleyPtr parsley_compile(char* parsley, char* incl);
|
||||
parsedParsleyPtr parsley_parse_file(parsleyPtr, char*, bool);
|
||||
parsedParsleyPtr parsley_parse_string(parsleyPtr, char*, size_t, bool);
|
||||
parsedParsleyPtr parsley_parse_doc(parsleyPtr, xmlDocPtr);
|
||||
|
||||
enum {
|
||||
DEX_OPTIONAL = 1,
|
||||
PARSLEY_OPTIONAL = 1,
|
||||
};
|
||||
|
||||
static contextPtr dex_parsing_context;
|
||||
static contextPtr parsley_parsing_context;
|
||||
|
||||
static char* full_expr(contextPtr, char*);
|
||||
static char* expr_join(char*, char*);
|
||||
|
@ -84,13 +84,13 @@ static contextPtr tagged_context(contextPtr, char*);
|
|||
static contextPtr new_context(struct json_object *, struct printbuf *);
|
||||
static contextPtr deeper_context(contextPtr, char*, struct json_object *);
|
||||
|
||||
static void __dex_recurse(contextPtr);
|
||||
static void __parsley_recurse(contextPtr);
|
||||
static char* filter_intersection(char*, char*);
|
||||
|
||||
static char* inner_key_of(struct json_object *);
|
||||
static char* inner_key_each(struct json_object *);
|
||||
|
||||
static void visit(parsedDexPtr ptr, xmlNodePtr xml, bool bubbling);
|
||||
static void visit(parsedParsleyPtr ptr, xmlNodePtr xml, bool bubbling);
|
||||
static bool xml_empty(xmlNodePtr xml);
|
||||
|
||||
#endif
|
|
@ -3,7 +3,7 @@
|
|||
#include <string.h>
|
||||
#include "kstring.h"
|
||||
#include "printbuf.h"
|
||||
#include "dexter.h"
|
||||
#include "parsley.h"
|
||||
#include "xml2json.h"
|
||||
#include <libxslt/xslt.h>
|
||||
#include <libxslt/xsltInternals.h>
|
||||
|
@ -21,7 +21,7 @@ struct arguments
|
|||
struct list_elem *include_files;
|
||||
int input_xml;
|
||||
int output_xml;
|
||||
char *dex;
|
||||
char *parsley;
|
||||
char *input_file;
|
||||
char *output_file;
|
||||
};
|
||||
|
@ -32,10 +32,10 @@ struct list_elem {
|
|||
char *string;
|
||||
};
|
||||
|
||||
const char *argp_program_version = "dexter 0.1";
|
||||
const char *argp_program_version = "parsley 0.1";
|
||||
const char *argp_program_bug_address = "<kyle@kylemaxwell.com>";
|
||||
static char args_doc[] = "DEX_FILE FILE_TO_PARSE";
|
||||
static char doc[] = "Dexter is a dex parser.";
|
||||
static char doc[] = "Parsley is a parslet parser.";
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{"input-xml", 'x', 0, 0, "Use the XML parser (not HTML)" },
|
||||
|
@ -72,7 +72,7 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
|||
case ARGP_KEY_ARG:
|
||||
switch(state->arg_num){
|
||||
case 0:
|
||||
arguments->dex = arg;
|
||||
arguments->parsley = arg;
|
||||
break;
|
||||
case 1:
|
||||
arguments->input_file = arg;
|
||||
|
@ -106,23 +106,23 @@ int main (int argc, char **argv) {
|
|||
struct printbuf *buf = printbuf_new();
|
||||
struct printbuf *incl = printbuf_new();
|
||||
|
||||
FILE * fd = dex_fopen(arguments.dex, "r");
|
||||
FILE * fd = parsley_fopen(arguments.parsley, "r");
|
||||
printbuf_file_read(fd, buf);
|
||||
|
||||
while(elemptr->has_next) {
|
||||
elemptr = elemptr->next;
|
||||
FILE* f = dex_fopen(elemptr->string, "r");
|
||||
FILE* f = parsley_fopen(elemptr->string, "r");
|
||||
printbuf_file_read(f, incl);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
dexPtr compiled = dex_compile(buf->buf, incl->buf);
|
||||
parsleyPtr compiled = parsley_compile(buf->buf, incl->buf);
|
||||
if(compiled->error != NULL) {
|
||||
fprintf(stderr, "%s\n", compiled->error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
parsedDexPtr ptr = dex_parse_file(compiled, arguments.input_file, !(arguments.input_xml));
|
||||
parsedParsleyPtr ptr = parsley_parse_file(compiled, arguments.input_file, !(arguments.input_xml));
|
||||
|
||||
if(ptr->error != NULL) {
|
||||
fprintf(stderr, "Parsing failed: %s\n", ptr->error);
|
||||
|
@ -133,7 +133,7 @@ int main (int argc, char **argv) {
|
|||
xmlSaveFormatFile(arguments.output_file, ptr->xml, 1);
|
||||
} else {
|
||||
struct json_object *json = xml2json(ptr->xml->children->children);
|
||||
FILE* f = dex_fopen(arguments.output_file, "w");
|
||||
FILE* f = parsley_fopen(arguments.output_file, "w");
|
||||
fprintf(f, "%s\n", json_object_to_json_string(json));
|
||||
fclose(f);
|
||||
}
|
||||
|
|
|
@ -1,22 +1,22 @@
|
|||
#include "dex_mem.h"
|
||||
#include "parsley_mem.h"
|
||||
#include <stdlib.h>
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
|
||||
static struct obstack dex_obstack;
|
||||
static bool dex_obstack_initialized = false;
|
||||
static struct obstack parsley_obstack;
|
||||
static bool parsley_obstack_initialized = false;
|
||||
|
||||
void dex_collect() {
|
||||
obstack_free(&dex_obstack, NULL);
|
||||
obstack_init(&dex_obstack);
|
||||
void parsley_collect() {
|
||||
obstack_free(&parsley_obstack, NULL);
|
||||
obstack_init(&parsley_obstack);
|
||||
}
|
||||
|
||||
void * dex_alloc(int size) {
|
||||
if(!dex_obstack_initialized) {
|
||||
obstack_init(&dex_obstack);
|
||||
dex_obstack_initialized = true;
|
||||
void * parsley_alloc(int size) {
|
||||
if(!parsley_obstack_initialized) {
|
||||
obstack_init(&parsley_obstack);
|
||||
parsley_obstack_initialized = true;
|
||||
}
|
||||
void * mem = obstack_alloc(&dex_obstack, size);
|
||||
void * mem = obstack_alloc(&parsley_obstack, size);
|
||||
void * ptr = mem;
|
||||
for(int i = 0; i < size; i++)
|
||||
{
|
||||
|
|
|
@ -1,13 +1,13 @@
|
|||
#ifndef DEX_MEM_H_INCLUDED
|
||||
#define DEX_MEM_H_INCLUDED
|
||||
#ifndef PARSLEY_MEM_H_INCLUDED
|
||||
#define PARSLEY_MEM_H_INCLUDED
|
||||
|
||||
#define obstack_chunk_alloc malloc
|
||||
#define obstack_chunk_free free
|
||||
|
||||
#include "obstack.h"
|
||||
|
||||
void dex_collect();
|
||||
void * dex_alloc(int size);
|
||||
void parsley_collect();
|
||||
void * parsley_alloc(int size);
|
||||
|
||||
|
||||
#endif
|
|
@ -4,7 +4,7 @@
|
|||
#include <string.h>
|
||||
#include "kstring.h"
|
||||
#include "printbuf.h"
|
||||
#include "dexter.h"
|
||||
#include "parsley.h"
|
||||
#include "util.h"
|
||||
|
||||
struct list_elem {
|
||||
|
@ -16,14 +16,14 @@ struct list_elem {
|
|||
struct arguments
|
||||
{
|
||||
struct list_elem *include_files;
|
||||
char *dex;
|
||||
char *parsley;
|
||||
char *output_file;
|
||||
};
|
||||
|
||||
const char *argp_program_version = "dexterc 0.1";
|
||||
const char *argp_program_version = "parsleyc 0.1";
|
||||
const char *argp_program_bug_address = "<kyle@kylemaxwell.com>";
|
||||
static char args_doc[] = "DEX_FILE";
|
||||
static char doc[] = "Dexter is a dex to XSLT compiler";
|
||||
static char doc[] = "Parsleyc is a parslet to XSLT compiler";
|
||||
|
||||
static struct argp_option options[] = {
|
||||
{"debug", 'd', 0, 0, "Turn on Bison parser debugging" },
|
||||
|
@ -48,14 +48,14 @@ static error_t parse_opt (int key, char *arg, struct argp_state *state)
|
|||
base->has_next = 1;
|
||||
break;
|
||||
case 'd':
|
||||
// dex_set_debug_mode(1);
|
||||
// parsley_set_debug_mode(1);
|
||||
break;
|
||||
case 'o':
|
||||
arguments->output_file = arg;
|
||||
break;
|
||||
case ARGP_KEY_ARG:
|
||||
if (state->arg_num >= 1) argp_usage (state);
|
||||
arguments->dex = arg;
|
||||
arguments->parsley = arg;
|
||||
break;
|
||||
case ARGP_KEY_END:
|
||||
if (state->arg_num < 1) argp_usage (state);
|
||||
|
@ -76,29 +76,29 @@ int main (int argc, char **argv) {
|
|||
|
||||
arguments.include_files = elemptr;
|
||||
arguments.output_file = "-";
|
||||
arguments.dex = "-";
|
||||
arguments.parsley = "-";
|
||||
argp_parse (&argp, argc, argv, 0, 0, &arguments);
|
||||
|
||||
struct printbuf* dex = printbuf_new();
|
||||
struct printbuf* parsley = printbuf_new();
|
||||
struct printbuf* incl = printbuf_new();
|
||||
|
||||
FILE* in = dex_fopen(arguments.dex, "r");
|
||||
FILE* in = parsley_fopen(arguments.parsley, "r");
|
||||
|
||||
printbuf_file_read(in, dex);
|
||||
printbuf_file_read(in, parsley);
|
||||
while(elemptr->has_next) {
|
||||
elemptr = elemptr->next;
|
||||
FILE* f = dex_fopen(elemptr->string, "r");
|
||||
FILE* f = parsley_fopen(elemptr->string, "r");
|
||||
printbuf_file_read(f, incl);
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
dexPtr compiled = dex_compile(dex->buf, incl->buf);
|
||||
parsleyPtr compiled = parsley_compile(parsley->buf, incl->buf);
|
||||
if(compiled->error != NULL) {
|
||||
fprintf(stderr, "%s\n", compiled->error);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
FILE* fo = dex_fopen(arguments.output_file, "w");
|
||||
FILE* fo = parsley_fopen(arguments.output_file, "w");
|
||||
fprintf(fo, compiled->raw_stylesheet);
|
||||
fclose(fo);
|
||||
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
#include <Python.h>
|
||||
#include <dexter.h>
|
||||
#include <parsley.h>
|
||||
#include <libxslt/xslt.h>
|
||||
#include <libexslt/exslt.h>
|
||||
#include <libxslt/xsltInternals.h>
|
||||
|
@ -8,7 +8,6 @@
|
|||
#include <libxml/HTMLparser.h>
|
||||
#include <libxml/HTMLtree.h>
|
||||
#include <libxml/xmlwriter.h>
|
||||
#include <dexter.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <json/json.h>
|
||||
|
@ -16,12 +15,12 @@
|
|||
|
||||
typedef struct {
|
||||
PyObject_HEAD
|
||||
dexPtr dex;
|
||||
} DexPy;
|
||||
parsleyPtr parsley;
|
||||
} PyParsley;
|
||||
|
||||
static PyTypeObject dexpy_DexPyType;
|
||||
static PyTypeObject pyparsley_PyParsleyType;
|
||||
|
||||
static PyMethodDef dexpy_methods[] = {
|
||||
static PyMethodDef pyparsley_methods[] = {
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
|
@ -32,7 +31,7 @@ static PyMethodDef dexpy_methods[] = {
|
|||
static PyObject *jsonmodule;
|
||||
|
||||
PyMODINIT_FUNC
|
||||
initdexpy(void)
|
||||
initpyparsley(void)
|
||||
{
|
||||
jsonmodule = PyImport_ImportModule("json");
|
||||
if(jsonmodule == NULL)
|
||||
|
@ -40,34 +39,34 @@ initdexpy(void)
|
|||
|
||||
PyObject* m;
|
||||
|
||||
dexpy_DexPyType.tp_new = PyType_GenericNew;
|
||||
if (PyType_Ready(&dexpy_DexPyType) < 0)
|
||||
pyparsley_PyParsleyType.tp_new = PyType_GenericNew;
|
||||
if (PyType_Ready(&pyparsley_PyParsleyType) < 0)
|
||||
return;
|
||||
|
||||
m = Py_InitModule3("dexpy", dexpy_methods,
|
||||
"Python binding for dexter");
|
||||
m = Py_InitModule3("pyparsley", pyparsley_methods,
|
||||
"Python binding for parsley");
|
||||
|
||||
Py_INCREF(&dexpy_DexPyType);
|
||||
PyModule_AddObject(m, "DexPy", (PyObject *)&dexpy_DexPyType);
|
||||
Py_INCREF(&pyparsley_PyParsleyType);
|
||||
PyModule_AddObject(m, "PyParsley", (PyObject *)&pyparsley_PyParsleyType);
|
||||
}
|
||||
|
||||
static void
|
||||
DexPy_dealloc(DexPy* self)
|
||||
PyParsley_dealloc(PyParsley* self)
|
||||
{
|
||||
if(self->dex != NULL) dex_free(self->dex);
|
||||
if(self->parsley != NULL) parsley_free(self->parsley);
|
||||
self->ob_type->tp_free((PyObject*)self);
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
DexPy_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
PyParsley_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
DexPy *self;
|
||||
self = (DexPy *)type->tp_alloc(type, 0);
|
||||
PyParsley *self;
|
||||
self = (PyParsley *)type->tp_alloc(type, 0);
|
||||
return (PyObject *)self;
|
||||
}
|
||||
|
||||
static int
|
||||
DexPy_init(DexPy *self, PyObject *args, PyObject *kwds)
|
||||
PyParsley_init(PyParsley *self, PyObject *args, PyObject *kwds)
|
||||
{
|
||||
PyObject *script;
|
||||
char *string = "";
|
||||
|
@ -89,10 +88,10 @@ DexPy_init(DexPy *self, PyObject *args, PyObject *kwds)
|
|||
string = PyString_AsString(script);
|
||||
if(string == NULL) return -1;
|
||||
|
||||
self->dex = dex_compile(string, incl);
|
||||
self->parsley = parsley_compile(string, incl);
|
||||
|
||||
if(self->dex->error != NULL) {
|
||||
PyErr_SetString(PyExc_RuntimeError, self->dex->error);
|
||||
if(self->parsley->error != NULL) {
|
||||
PyErr_SetString(PyExc_RuntimeError, self->parsley->error);
|
||||
Py_DECREF(self);
|
||||
return -1;
|
||||
}
|
||||
|
@ -117,7 +116,7 @@ pythonize_recurse(xmlNodePtr xml) {
|
|||
PyDict_SetItemString(obj, child->name, pythonize_recurse(child->children));
|
||||
child = child->next;
|
||||
}
|
||||
} else if(!strcmp(xml->ns->prefix, "dexter")) {
|
||||
} else if(!strcmp(xml->ns->prefix, "parsley")) {
|
||||
if(!strcmp(xml->name, "groups")) {
|
||||
obj = PyList_New(0);
|
||||
while(child != NULL) {
|
||||
|
@ -125,7 +124,7 @@ pythonize_recurse(xmlNodePtr xml) {
|
|||
child = child->next;
|
||||
}
|
||||
} else if(!strcmp(xml->name, "group")) {
|
||||
// Implicitly handled by dexter:groups handler
|
||||
// Implicitly handled by parsley:groups handler
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
@ -141,11 +140,11 @@ pythonize_recurse(xmlNodePtr xml) {
|
|||
}
|
||||
|
||||
static PyObject *
|
||||
DexPy_parse_doc(parsedDexPtr ptr, char *type) {
|
||||
PyParsley_parse_doc(parsedParsleyPtr ptr, char *type) {
|
||||
if(ptr->error != NULL || ptr->xml == NULL) {
|
||||
if(ptr->error == NULL) ptr->error = strdup("Unknown dex error");
|
||||
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
||||
PyErr_SetString(PyExc_RuntimeError, ptr->error);
|
||||
parsed_dex_free(ptr);
|
||||
parsed_parsley_free(ptr);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
@ -167,19 +166,19 @@ DexPy_parse_doc(parsedDexPtr ptr, char *type) {
|
|||
return Py_None;
|
||||
}
|
||||
}
|
||||
parsed_dex_free(ptr);
|
||||
parsed_parsley_free(ptr);
|
||||
return output;
|
||||
}
|
||||
|
||||
static PyObject *
|
||||
DexPy_parse(DexPy *self, PyObject *args, PyObject *keywords)
|
||||
PyParsley_parse(PyParsley *self, PyObject *args, PyObject *keywords)
|
||||
{
|
||||
char *file = NULL;
|
||||
char *string = NULL;
|
||||
char *input = "html";
|
||||
char *output = "python";
|
||||
int len;
|
||||
parsedDexPtr ptr;
|
||||
parsedParsleyPtr ptr;
|
||||
|
||||
static char * list[] = { "file", "string", "input", "output", NULL };
|
||||
|
||||
|
@ -188,41 +187,41 @@ DexPy_parse(DexPy *self, PyObject *args, PyObject *keywords)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if(self->dex == NULL) {
|
||||
PyErr_SetString(PyExc_RuntimeError, "dex data is NULL");
|
||||
if(self->parsley == NULL) {
|
||||
PyErr_SetString(PyExc_RuntimeError, "parsley data is NULL");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if(file != NULL) {
|
||||
ptr = dex_parse_file(self->dex, file, !strcmp(input, "html"));
|
||||
ptr = parsley_parse_file(self->parsley, file, !strcmp(input, "html"));
|
||||
} else {
|
||||
ptr = dex_parse_string(self->dex, string, len, !strcmp(input, "html"));
|
||||
ptr = parsley_parse_string(self->parsley, string, len, !strcmp(input, "html"));
|
||||
}
|
||||
|
||||
return DexPy_parse_doc(ptr, output);
|
||||
return PyParsley_parse_doc(ptr, output);
|
||||
}
|
||||
|
||||
|
||||
static PyMethodDef DexPy_methods[] = {
|
||||
{"parse", (PyCFunction)DexPy_parse, METH_VARARGS | METH_KEYWORDS,
|
||||
static PyMethodDef PyParsley_methods[] = {
|
||||
{"parse", (PyCFunction)PyParsley_parse, METH_VARARGS | METH_KEYWORDS,
|
||||
"Parses with a variety of options"
|
||||
},
|
||||
// {"parse_string", (PyCFunction)DexPy_parse_string, METH_VARARGS,
|
||||
// "Parses an in-memory string with the current dex"
|
||||
// {"parse_string", (PyCFunction)PyParsley_parse_string, METH_VARARGS,
|
||||
// "Parses an in-memory string with the current parslet"
|
||||
// },
|
||||
// {"parse_file", (PyCFunction)DexPy_parse_file, METH_VARARGS,
|
||||
// "Parses file or url with the current dex"
|
||||
// {"parse_file", (PyCFunction)PyParsley_parse_file, METH_VARARGS,
|
||||
// "Parses file or url with the current parslet"
|
||||
// },
|
||||
{NULL} /* Sentinel */
|
||||
};
|
||||
|
||||
static PyTypeObject dexpy_DexPyType = {
|
||||
static PyTypeObject pyparsley_PyParsleyType = {
|
||||
PyObject_HEAD_INIT(NULL)
|
||||
0, /*ob_size*/
|
||||
"dexpy.DexPy", /*tp_name*/
|
||||
sizeof(DexPy), /*tp_basicsize*/
|
||||
"pyparsley.PyParsley", /*tp_name*/
|
||||
sizeof(PyParsley), /*tp_basicsize*/
|
||||
0, /*tp_itemsize*/
|
||||
(destructor) DexPy_dealloc, /*tp_dealloc*/
|
||||
(destructor) PyParsley_dealloc, /*tp_dealloc*/
|
||||
0, /*tp_print*/
|
||||
0, /*tp_getattr*/
|
||||
0, /*tp_setattr*/
|
||||
|
@ -238,14 +237,14 @@ static PyTypeObject dexpy_DexPyType = {
|
|||
0, /*tp_setattro*/
|
||||
0, /*tp_as_buffer*/
|
||||
Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /*tp_flags*/
|
||||
"DexPy objects", /* tp_doc */
|
||||
"PyParsley objects", /* tp_doc */
|
||||
0, /* tp_traverse */
|
||||
0, /* tp_clear */
|
||||
0, /* tp_richcompare */
|
||||
0, /* tp_weaklistoffset */
|
||||
0, /* tp_iter */
|
||||
0, /* tp_iternext */
|
||||
DexPy_methods, /* tp_methods */
|
||||
PyParsley_methods, /* tp_methods */
|
||||
0, /* tp_members */
|
||||
0, /* tp_getset */
|
||||
0, /* tp_base */
|
||||
|
@ -253,7 +252,7 @@ static PyTypeObject dexpy_DexPyType = {
|
|||
0, /* tp_descr_get */
|
||||
0, /* tp_descr_set */
|
||||
0, /* tp_dictoffset */
|
||||
(initproc)DexPy_init, /* tp_init */
|
||||
(initproc)PyParsley_init, /* tp_init */
|
||||
0, /* tp_alloc */
|
||||
DexPy_new, /* tp_new */
|
||||
PyParsley_new, /* tp_new */
|
||||
};
|
|
@ -6,7 +6,7 @@ from distutils.core import setup, Extension
|
|||
from subprocess import Popen, PIPE
|
||||
|
||||
white = re.compile(r'\s+')
|
||||
flags = "-ljson -ldexter -I/usr/include -I/usr/local/include -I/opt/local/include -L/usr/lib -L/usr/local/lib -L/opt/local/lib "
|
||||
flags = "-ljson -lparsley -I/usr/include -I/usr/local/include -I/opt/local/include -L/usr/lib -L/usr/local/lib -L/opt/local/lib "
|
||||
flags += Popen(["xml2-config", "--libs", "--cflags"], stdout=PIPE).communicate()[0]
|
||||
flags += Popen(["xslt-config", "--libs", "--cflags"], stdout=PIPE).communicate()[0]
|
||||
flags = white.sub(" ", flags)
|
||||
|
@ -15,8 +15,8 @@ libraries = re.findall(r'-l(\S+)', flags)
|
|||
include_dirs = re.findall(r'-I(\S+)', flags)
|
||||
lib_dirs = re.findall(r'-L(\S+)', flags)
|
||||
|
||||
setup(name="dexpy", version="1.0",
|
||||
ext_modules=[Extension("dexpy", ["dexpymodule.c"],
|
||||
setup(name="pyparsley", version="1.0",
|
||||
ext_modules=[Extension("pyparsley", ["pyparsleymodule.c"],
|
||||
library_dirs = lib_dirs,
|
||||
include_dirs = include_dirs,
|
||||
libraries = libraries
|
||||
|
|
|
@ -1,29 +1,29 @@
|
|||
#!/usr/bin/env python2.6
|
||||
import unittest
|
||||
from dexpy import DexPy
|
||||
from pyparsley import PyParsley
|
||||
from inspect import currentframe
|
||||
from os.path import dirname
|
||||
|
||||
class TestDexPy(unittest.TestCase):
|
||||
class TestPyParsley(unittest.TestCase):
|
||||
|
||||
def setUp(self):
|
||||
self.dex = DexPy({'title': 'title'})
|
||||
self.alt_dex = DexPy('{"title": "title"}')
|
||||
self.parsley = PyParsley({'title': 'title'})
|
||||
self.alt_parsley = PyParsley('{"title": "title"}')
|
||||
self.__file__ = currentframe().f_code.co_filename
|
||||
self.__dir__ = dirname(self.__file__)
|
||||
self.file = self.__dir__ + '/../../test/yelp.html'
|
||||
self.json = '{ "title": "\\t\\tNick\'s Crispy Tacos - Russian Hill - San Francisco, CA\\n" }'
|
||||
|
||||
def test_file_xml(self):
|
||||
parsed = self.dex.parse(file = self.file, output = "json")
|
||||
parsed = self.parsley.parse(file = self.file, output = "json")
|
||||
self.assertEquals(self.json, parsed)
|
||||
|
||||
def test_json_file_xml(self):
|
||||
parsed = self.alt_dex.parse(file = self.file, output = "json")
|
||||
parsed = self.alt_parsley.parse(file = self.file, output = "json")
|
||||
self.assertEquals(self.json, parsed)
|
||||
|
||||
def test_native(self):
|
||||
parsed = self.alt_dex.parse(file = self.file, output = "python")
|
||||
parsed = self.alt_parsley.parse(file = self.file, output = "python")
|
||||
self.assertEquals({ "title": "\t\tNick's Crispy Tacos - Russian Hill - San Francisco, CA\n" }, parsed)
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
@ -17,6 +17,6 @@ task :configure do
|
|||
end
|
||||
|
||||
task :install do
|
||||
system "gem build dexterous.gemspec"
|
||||
system "gem install dexterous"
|
||||
system "gem build parsley-ruby.gemspec"
|
||||
system "gem install parsley-ruby"
|
||||
end
|
|
@ -8,56 +8,56 @@
|
|||
#include <libxml/HTMLparser.h>
|
||||
#include <libxml/HTMLtree.h>
|
||||
#include <libxml/xmlwriter.h>
|
||||
#include <dexter.h>
|
||||
#include <parsley.h>
|
||||
#include <json/json.h>
|
||||
#include <xml2json.h>
|
||||
|
||||
VALUE _new(VALUE, VALUE, VALUE);
|
||||
VALUE _parse_file(VALUE, VALUE, VALUE, VALUE);
|
||||
VALUE _parse_string(VALUE, VALUE, VALUE, VALUE);
|
||||
VALUE _parse_doc(parsedDexPtr, VALUE);
|
||||
VALUE _parse_doc(parsedParsleyPtr, VALUE);
|
||||
VALUE rubify_recurse(xmlNodePtr xml);
|
||||
VALUE c_dex_err;
|
||||
VALUE c_dex;
|
||||
VALUE c_parsley_err;
|
||||
VALUE c_parsley;
|
||||
|
||||
void Init_cdexter()
|
||||
void Init_cparsley()
|
||||
{
|
||||
c_dex = rb_define_class("CDexter", rb_cObject);
|
||||
c_dex_err = rb_define_class("DexError", rb_eRuntimeError);
|
||||
rb_define_singleton_method(c_dex, "new", _new, 2);
|
||||
rb_define_method(c_dex, "parse_file", _parse_file, 3);
|
||||
rb_define_method(c_dex, "parse_string", _parse_string, 3);
|
||||
c_parsley = rb_define_class("CParsley", rb_cObject);
|
||||
c_parsley_err = rb_define_class("ParsleyError", rb_eRuntimeError);
|
||||
rb_define_singleton_method(c_parsley, "new", _new, 2);
|
||||
rb_define_method(c_parsley, "parse_file", _parse_file, 3);
|
||||
rb_define_method(c_parsley, "parse_string", _parse_string, 3);
|
||||
}
|
||||
|
||||
VALUE _new(VALUE self, VALUE dex, VALUE incl){
|
||||
dexPtr ptr = dex_compile(STR2CSTR(dex), STR2CSTR(incl));
|
||||
VALUE _new(VALUE self, VALUE parsley, VALUE incl){
|
||||
parsleyPtr ptr = parsley_compile(STR2CSTR(parsley), STR2CSTR(incl));
|
||||
if(ptr->error != NULL) {
|
||||
rb_raise(c_dex_err, ptr->error);
|
||||
dex_free(ptr);
|
||||
rb_raise(c_parsley_err, ptr->error);
|
||||
parsley_free(ptr);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
return Data_Wrap_Struct(c_dex, 0, dex_free, ptr);
|
||||
return Data_Wrap_Struct(c_parsley, 0, parsley_free, ptr);
|
||||
}
|
||||
|
||||
VALUE _parse_file(VALUE self, VALUE name, VALUE input, VALUE output){
|
||||
dexPtr dex;
|
||||
Data_Get_Struct(self, dexPtr, dex);
|
||||
return _parse_doc(dex_parse_file(dex, STR2CSTR(name), input == ID2SYM(rb_intern("html"))), output);
|
||||
parsleyPtr parsley;
|
||||
Data_Get_Struct(self, parsleyPtr, parsley);
|
||||
return _parse_doc(parsley_parse_file(parsley, STR2CSTR(name), input == ID2SYM(rb_intern("html"))), output);
|
||||
}
|
||||
|
||||
VALUE _parse_string(VALUE self, VALUE string, VALUE input, VALUE output) {
|
||||
dexPtr dex;
|
||||
Data_Get_Struct(self, dexPtr, dex);
|
||||
parsleyPtr parsley;
|
||||
Data_Get_Struct(self, parsleyPtr, parsley);
|
||||
char* cstr = STR2CSTR(string);
|
||||
return _parse_doc(dex_parse_string(dex, cstr, strlen(cstr), input == ID2SYM(rb_intern("html"))), output);
|
||||
return _parse_doc(parsley_parse_string(parsley, cstr, strlen(cstr), input == ID2SYM(rb_intern("html"))), output);
|
||||
}
|
||||
|
||||
VALUE _parse_doc(parsedDexPtr ptr, VALUE type) {
|
||||
VALUE _parse_doc(parsedParsleyPtr ptr, VALUE type) {
|
||||
if(ptr->error != NULL || ptr->xml == NULL) {
|
||||
if(ptr->error == NULL) ptr->error = strdup("Unknown dex error");
|
||||
rb_raise(c_dex_err, ptr->error);
|
||||
parsed_dex_free(ptr);
|
||||
if(ptr->error == NULL) ptr->error = strdup("Unknown parsley error");
|
||||
rb_raise(c_parsley_err, ptr->error);
|
||||
parsed_parsley_free(ptr);
|
||||
return Qnil;
|
||||
}
|
||||
|
||||
|
@ -77,7 +77,7 @@ VALUE _parse_doc(parsedDexPtr ptr, VALUE type) {
|
|||
if(output == NULL) output = Qnil;
|
||||
}
|
||||
|
||||
parsed_dex_free(ptr);
|
||||
parsed_parsley_free(ptr);
|
||||
|
||||
return output;
|
||||
}
|
||||
|
@ -97,7 +97,7 @@ VALUE rubify_recurse(xmlNodePtr xml) {
|
|||
rb_hash_aset(obj, rb_str_new2(child->name), rubify_recurse(child->children));
|
||||
child = child->next;
|
||||
}
|
||||
} else if(!strcmp(xml->ns->prefix, "dexter")) {
|
||||
} else if(!strcmp(xml->ns->prefix, "parsley")) {
|
||||
if(!strcmp(xml->name, "groups")) {
|
||||
obj = rb_ary_new();
|
||||
while(child != NULL) {
|
||||
|
@ -105,7 +105,7 @@ VALUE rubify_recurse(xmlNodePtr xml) {
|
|||
child = child->next;
|
||||
}
|
||||
} else if(!strcmp(xml->name, "group")) {
|
||||
// Implicitly handled by dexter:groups handler
|
||||
// Implicitly handled by parsley:groups handler
|
||||
}
|
||||
}
|
||||
break;
|
|
@ -61,7 +61,7 @@ mylib = %w[/usr/local/lib /opt/local/lib /usr/lib]
|
|||
find_header('json/json.h', INCLUDEDIR, *myincl) or abort "need json/json.h"
|
||||
find_library('json', 'json_object_new_string', LIBDIR, *mylib) or abort "need libjson"
|
||||
|
||||
find_header('dexter.h', INCLUDEDIR, *myincl) or abort "need dexter.h"
|
||||
find_library('dexter', 'dex_compile', LIBDIR, *mylib) or abort "need libdexter"
|
||||
find_header('parsley.h', INCLUDEDIR, *myincl) or abort "need parsley.h"
|
||||
find_library('parsley', 'parsley_compile', LIBDIR, *mylib) or abort "need libparsley"
|
||||
|
||||
create_makefile('cdexter')
|
||||
create_makefile('cparsley')
|
|
@ -1,16 +1,16 @@
|
|||
require File.dirname(__FILE__) + "/../ext/cdexter"
|
||||
require File.dirname(__FILE__) + "/../ext/cparsley"
|
||||
require "rubygems"
|
||||
require "json"
|
||||
require "thread"
|
||||
|
||||
class Dexterous
|
||||
def initialize(dex, incl = "")
|
||||
if(dex.is_a?(Hash))
|
||||
dex = dex.to_json
|
||||
class Parsley
|
||||
def initialize(parsley, incl = "")
|
||||
if(parsley.is_a?(Hash))
|
||||
parsley = parsley.to_json
|
||||
end
|
||||
@@mutex ||= Mutex.new
|
||||
@@mutex.synchronize do
|
||||
@dex = CDexter.new(dex, incl)
|
||||
@parsley = CParsley.new(parsley, incl)
|
||||
end
|
||||
end
|
||||
|
||||
|
@ -31,9 +31,9 @@ class Dexterous
|
|||
options[:input] ||= :html
|
||||
options[:output]||= :ruby
|
||||
if options[:file]
|
||||
@dex.parse_file options[:file], options[:input], options[:output]
|
||||
@parsley.parse_file options[:file], options[:input], options[:output]
|
||||
else
|
||||
@dex.parse_string options[:string], options[:input], options[:output]
|
||||
@parsley.parse_string options[:string], options[:input], options[:output]
|
||||
end
|
||||
end
|
||||
end
|
|
@ -1,11 +1,11 @@
|
|||
Gem::Specification.new do |s|
|
||||
s.name = "dexterous"
|
||||
s.name = "parsley-ruby"
|
||||
s.version = "0.1.0"
|
||||
s.date = "2008-08-10"
|
||||
s.summary = "Ruby binding for dexter"
|
||||
s.summary = "Ruby binding for parsley"
|
||||
s.email = "kyle@kylemaxwell.com"
|
||||
s.homepage = "http://github.com/fizx/robots"
|
||||
s.description = "Ruby binding for dexter"
|
||||
s.homepage = "http://github.com/fizx/parsley-ruby"
|
||||
s.description = "Ruby binding for parsley"
|
||||
s.has_rdoc = true
|
||||
s.require_paths = ["lib", "ext"]
|
||||
s.extensions = "ext/extconf.rb"
|
|
@ -1,52 +1,52 @@
|
|||
require "test/unit"
|
||||
require File.dirname(__FILE__) + "/../lib/dexterous"
|
||||
require File.dirname(__FILE__) + "/../lib/parsley"
|
||||
|
||||
class TestDexterous < Test::Unit::TestCase
|
||||
class TestParsley < Test::Unit::TestCase
|
||||
def setup
|
||||
@file = File.dirname(__FILE__) + "/../../test/yelp.html"
|
||||
end
|
||||
|
||||
def test_yelp
|
||||
@dex = Dexterous.new(File.read(File.dirname(__FILE__) + "/../../test/yelp-home.dex"))
|
||||
out = @dex.parse(:file => File.dirname(__FILE__) + "/../../test/yelp-home.html")
|
||||
@parsley = Parsley.new(File.read(File.dirname(__FILE__) + "/../../test/yelp-home.let"))
|
||||
out = @parsley.parse(:file => File.dirname(__FILE__) + "/../../test/yelp-home.html")
|
||||
assert_equal "/c/sf/shopping", out["categories"][0]["href"]
|
||||
end
|
||||
|
||||
def test_yelp_xml
|
||||
@dex = Dexterous.new(File.read(File.dirname(__FILE__) + "/../../test/yelp-home.dex"))
|
||||
out = @dex.parse(:file => File.dirname(__FILE__) + "/../../test/yelp-home.html", :output => :xml)
|
||||
@parsley = Parsley.new(File.read(File.dirname(__FILE__) + "/../../test/yelp-home.let"))
|
||||
out = @parsley.parse(:file => File.dirname(__FILE__) + "/../../test/yelp-home.html", :output => :xml)
|
||||
end
|
||||
|
||||
def test_simple
|
||||
@dex = Dexterous.new("hi" => "h1")
|
||||
assert_equal({"hi" => "Nick's Crispy Tacos"}, @dex.parse(:file => @file))
|
||||
@parsley = Parsley.new("hi" => "h1")
|
||||
assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @file))
|
||||
end
|
||||
|
||||
def test_simple_string
|
||||
@dex = Dexterous.new("hi" => "h1")
|
||||
assert_equal({"hi" => "Nick's Crispy Tacos"}, @dex.parse(:string => "<html><body><h1>Nick's Crispy Tacos</h1></body></html>"))
|
||||
@parsley = Parsley.new("hi" => "h1")
|
||||
assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:string => "<html><body><h1>Nick's Crispy Tacos</h1></body></html>"))
|
||||
end
|
||||
|
||||
def test_xml
|
||||
@dex = Dexterous.new("hi" => "h1")
|
||||
xml = "<?xml version=\"1.0\"?>\n<dexter:root xmlns:dexter=\"http://kylemaxwell.com/dexter\"><hi>Nick's Crispy Tacos</hi></dexter:root>\n"
|
||||
assert_equal(xml, @dex.parse(:file => @file, :output => :xml))
|
||||
@parsley = Parsley.new("hi" => "h1")
|
||||
xml = "<?xml version=\"1.0\"?>\n<parsley:root xmlns:parsley=\"http://parslets.com/json\"><hi>Nick's Crispy Tacos</hi></parsley:root>\n"
|
||||
assert_equal(xml, @parsley.parse(:file => @file, :output => :xml))
|
||||
end
|
||||
|
||||
def test_json
|
||||
@dex = Dexterous.new("hi" => "h1")
|
||||
assert_equal('{ "hi": "Nick\'s Crispy Tacos" }', @dex.parse(:file => @file, :output => :json))
|
||||
@parsley = Parsley.new("hi" => "h1")
|
||||
assert_equal('{ "hi": "Nick\'s Crispy Tacos" }', @parsley.parse(:file => @file, :output => :json))
|
||||
end
|
||||
|
||||
def test_rescuable_file_error
|
||||
@dex = Dexterous.new("hi" => "h1")
|
||||
@parsley = Parsley.new("hi" => "h1")
|
||||
@nonexistant_file = File.dirname(__FILE__) + "/../fixtures/yelp.html"
|
||||
assert_equal({"hi" => "Nick's Crispy Tacos"}, @dex.parse(:file => @nonexistant_file)) rescue nil
|
||||
assert_equal({"hi" => "Nick's Crispy Tacos"}, @parsley.parse(:file => @nonexistant_file)) rescue nil
|
||||
end
|
||||
|
||||
def test_array_string
|
||||
@dex = Dexterous.new({"foo" => ["li"]})
|
||||
out = @dex.parse(:file => @file)
|
||||
@parsley = Parsley.new({"foo" => ["li"]})
|
||||
out = @parsley.parse(:file => @file)
|
||||
assert_kind_of Hash, out
|
||||
assert_kind_of Array, out["foo"], out.inspect
|
||||
assert out["foo"].length > 1
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
require "rubygems"
|
||||
require "nokogiri"
|
||||
require "hpricot"
|
||||
require "dexterous"
|
||||
require "parsley"
|
||||
require "benchmark"
|
||||
require "pp"
|
||||
|
||||
|
@ -29,8 +29,8 @@ def parse(doc)
|
|||
end
|
||||
end
|
||||
|
||||
def dext
|
||||
dex = Dexterous.new({
|
||||
def pars
|
||||
parslet = Parsley.new({
|
||||
"name" => "h1",
|
||||
"phone" => "#bizPhone",
|
||||
"address" => "address",
|
||||
|
@ -42,12 +42,12 @@ def dext
|
|||
}
|
||||
]
|
||||
})
|
||||
pp dex.parse(:file => YELP_HTML)
|
||||
pp parslet.parse(:file => YELP_HTML)
|
||||
end
|
||||
|
||||
Benchmark.bm do |x|
|
||||
x.report("nokogiri: ") { 3.times { noko } }
|
||||
x.report("hpricot: ") { 3.times { hpri } }
|
||||
x.report("dexterous: ") { 3.times { dext } }
|
||||
x.report("parsley: ") { 3.times { pars } }
|
||||
end
|
||||
|
||||
|
|
32
util.c
32
util.c
|
@ -1,13 +1,13 @@
|
|||
#include "util.h"
|
||||
#include "dexter.h"
|
||||
#include "parsley.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
|
||||
static bool dex_exslt_registered = false;
|
||||
static bool parsley_exslt_registered = false;
|
||||
|
||||
FILE* dex_fopen(char* name, char* mode) {
|
||||
FILE* parsley_fopen(char* name, char* mode) {
|
||||
FILE* fo;
|
||||
if(!strcmp("-", name)) {
|
||||
if(!strcmp("w", mode)) {
|
||||
|
@ -27,16 +27,16 @@ FILE* dex_fopen(char* name, char* mode) {
|
|||
|
||||
|
||||
void registerEXSLT() {
|
||||
if(!dex_exslt_registered) {
|
||||
if(!parsley_exslt_registered) {
|
||||
exsltRegisterAll();
|
||||
dex_register_all();
|
||||
parsley_register_all();
|
||||
init_xpath_alias();
|
||||
exslt_org_regular_expressions_init();
|
||||
dex_exslt_registered = true;
|
||||
parsley_exslt_registered = true;
|
||||
}
|
||||
}
|
||||
|
||||
int dex_key_flags(char* key) {
|
||||
int parsley_key_flags(char* key) {
|
||||
char* ptr = key;
|
||||
char* last_alnum = key;
|
||||
char* last_paren = key;
|
||||
|
@ -52,14 +52,14 @@ int dex_key_flags(char* key) {
|
|||
while(*ptr++ != '\0'){
|
||||
switch(*ptr){
|
||||
case '?':
|
||||
flags |= DEX_OPTIONAL;
|
||||
flags |= PARSLEY_OPTIONAL;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
char* dex_key_tag(char* key) {
|
||||
char* parsley_key_tag(char* key) {
|
||||
char *tag = astrdup(key);
|
||||
char *ptr = tag;
|
||||
while(*ptr++ != '\0'){
|
||||
|
@ -71,7 +71,7 @@ char* dex_key_tag(char* key) {
|
|||
return tag;
|
||||
}
|
||||
|
||||
char* dex_key_filter(char* key) {
|
||||
char* parsley_key_filter(char* key) {
|
||||
char *expr = astrdup(key);
|
||||
char *ptr = expr;
|
||||
char *last_paren;
|
||||
|
@ -93,26 +93,26 @@ char* dex_key_filter(char* key) {
|
|||
|
||||
|
||||
|
||||
char* sprintbuf_dex_header(struct printbuf *buf) {
|
||||
char* sprintbuf_parsley_header(struct printbuf *buf) {
|
||||
sprintbuf(buf, "<xsl:stylesheet version=\"1.0\" xmlns:xsl=\"http://www.w3.org/1999/XSL/Transform\"");
|
||||
sprintbuf(buf, " xmlns:dex=\"http://kylemaxwell.com/dexter/library\"");
|
||||
sprintbuf(buf, " xmlns:dexter=\"http://kylemaxwell.com/dexter\"");
|
||||
sprintbuf(buf, " xmlns:lib=\"http://parslets.com/stdlib\"");
|
||||
sprintbuf(buf, " xmlns:parslet=\"http://parslets.com/json\"");
|
||||
sprintbuf(buf, " xmlns:str=\"http://exslt.org/strings\"");
|
||||
sprintbuf(buf, " xmlns:set=\"http://exslt.org/sets\"");
|
||||
sprintbuf(buf, " xmlns:math=\"http://exslt.org/math\"");
|
||||
sprintbuf(buf, " xmlns:func=\"http://exslt.org/functions\"");
|
||||
sprintbuf(buf, " xmlns:user=\"http://kylemaxwell.com/dexter/user-functions\"");
|
||||
sprintbuf(buf, " xmlns:user=\"http://parslets.com/usre\"");
|
||||
sprintbuf(buf, " xmlns:dyn=\"http://exslt.org/dynamic\"");
|
||||
sprintbuf(buf, " xmlns:date=\"http://exslt.org/dates-and-times\"");
|
||||
sprintbuf(buf, " xmlns:exsl=\"http://exslt.org/common\"");
|
||||
sprintbuf(buf, " xmlns:saxon=\"http://icl.com/saxon\"");
|
||||
sprintbuf(buf, " xmlns:regexp=\"http://exslt.org/regular-expressions\"");
|
||||
sprintbuf(buf, " xmlns:regex=\"http://exslt.org/regular-expressions\"");
|
||||
sprintbuf(buf, " extension-element-prefixes=\"dex str math set func dyn exsl saxon user date regexp regex\"");
|
||||
sprintbuf(buf, " extension-element-prefixes=\"lib str math set func dyn exsl saxon user date regexp regex\"");
|
||||
sprintbuf(buf, ">\n");
|
||||
sprintbuf(buf, "<xsl:output method=\"xml\" indent=\"yes\"/>\n");
|
||||
sprintbuf(buf, "<xsl:strip-space elements=\"*\"/>\n");
|
||||
sprintbuf(buf, "<func:function name=\"dex:nl\"><xsl:param name=\"in\" select=\".\"/>");
|
||||
sprintbuf(buf, "<func:function name=\"lib:nl\"><xsl:param name=\"in\" select=\".\"/>");
|
||||
sprintbuf(buf, "<xsl:variable name=\"out\"><xsl:apply-templates mode=\"innertext\" select=\"exsl:node-set($in)\"/></xsl:variable>");
|
||||
sprintbuf(buf, "<func:result select=\"$out\" /></func:function>");
|
||||
sprintbuf(buf, "<xsl:template match=\"text()\" mode=\"innertext\"><xsl:value-of select=\".\" /></xsl:template>");
|
||||
|
|
10
util.h
10
util.h
|
@ -4,13 +4,13 @@
|
|||
#include <stdio.h>
|
||||
#include <json/json.h>
|
||||
|
||||
FILE* dex_fopen(char*, char*);
|
||||
char* sprintbuf_dex_header(struct printbuf *);
|
||||
FILE* parsley_fopen(char*, char*);
|
||||
char* sprintbuf_parsley_header(struct printbuf *);
|
||||
void registerEXSLT();
|
||||
|
||||
int dex_key_flags(char*);
|
||||
char* dex_key_tag(char*);
|
||||
char* dex_key_filter(char*);
|
||||
int parsley_key_flags(char*);
|
||||
char* parsley_key_tag(char*);
|
||||
char* parsley_key_filter(char*);
|
||||
|
||||
|
||||
#endif
|
|
@ -23,7 +23,7 @@ static struct json_object * _xml2json(xmlNodePtr xml) {
|
|||
json_object_object_add(json, child->name, xml2json(child->children));
|
||||
child = child->next;
|
||||
}
|
||||
} else if(!strcmp(xml->ns->prefix, "dexter")) {
|
||||
} else if(!strcmp(xml->ns->prefix, "parsley")) {
|
||||
if(!strcmp(xml->name, "zipped")) {
|
||||
int len = 0;
|
||||
xmlNodePtr ptr = xml->children;
|
||||
|
@ -66,7 +66,7 @@ static struct json_object * _xml2json(xmlNodePtr xml) {
|
|||
child = child->next;
|
||||
}
|
||||
} else if(!strcmp(xml->name, "group")) {
|
||||
// Implicitly handled by dexter:groups handler
|
||||
// Implicitly handled by parsley:groups handler
|
||||
}
|
||||
}
|
||||
break;
|
||||
|
|
Loading…
Reference in New Issue