Author: scoder
Date: Wed May 31 09:38:15 2006
New Revision: 27947
Modified:
lxml/trunk/doc/performance.txt
lxml/trunk/src/lxml/apihelpers.pxi
lxml/trunk/src/lxml/etree.h
lxml/trunk/src/lxml/proxy.pxi
lxml/trunk/src/lxml/tree.pxd
Log:
C macro implementation of an iterative tree walker: reduces code duplication
between various functions and speeds up tree walking operations by up to 30%
(deallocation, iteration, etc.)
Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt (original)
+++ lxml/trunk/doc/performance.txt Wed May 31 09:38:15 2006
@@ -194,37 +194,37 @@
especially if few elements are of interest or the element tag name is known,
lxml is a good choice::
- lxe: getiterator_all (-- T2 ) 31.2719 msec/pass
+ lxe: getiterator_all (-- T2 ) 23.0440 msec/pass
cET: getiterator_all (-- T2 ) 36.3687 msec/pass
ET : getiterator_all (-- T2 ) 46.2846 msec/pass
- lxe: getiterator_islice (-- T2 ) 2.8503 msec/pass
+ lxe: getiterator_islice (-- T2 ) 2.0699 msec/pass
cET: getiterator_islice (-- T2 ) 0.3299 msec/pass
ET : getiterator_islice (-- T2 ) 44.5898 msec/pass
- lxe: getiterator_tag (-- T2 ) 3.0983 msec/pass
+ lxe: getiterator_tag (-- T2 ) 1.9176 msec/pass
cET: getiterator_tag (-- T2 ) 11.2861 msec/pass
ET : getiterator_tag (-- T2 ) 37.5661 msec/pass
- lxe: getiterator_tag_all (-- T2 ) 4.9760 msec/pass
+ lxe: getiterator_tag_all (-- T2 ) 4.5722 msec/pass
cET: getiterator_tag_all (-- T2 ) 33.2602 msec/pass
ET : getiterator_tag_all (-- T2 ) 37.6200 msec/pass
This similarly shows in ``Element.findall()``::
- lxe: findall (-- T2 ) 36.4730 msec/pass
+ lxe: findall (-- T2 ) 27.3874 msec/pass
cET: findall (-- T2 ) 38.8718 msec/pass
ET : findall (-- T2 ) 50.9692 msec/pass
- lxe: findall (-- T3 ) 4.3956 msec/pass
+ lxe: findall (-- T3 ) 3.8227 msec/pass
cET: findall (-- T3 ) 11.8051 msec/pass
ET : findall (-- T3 ) 11.2570 msec/pass
- lxe: findall_tag (-- T2 ) 4.3950 msec/pass
+ lxe: findall_tag (-- T2 ) 4.5549 msec/pass
cET: findall_tag (-- T2 ) 31.3107 msec/pass
ET : findall_tag (-- T2 ) 36.7813 msec/pass
- lxe: findall_tag (-- T3 ) 0.5946 msec/pass
+ lxe: findall_tag (-- T3 ) 0.5643 msec/pass
cET: findall_tag (-- T3 ) 7.4491 msec/pass
ET : findall_tag (-- T3 ) 9.2943 msec/pass
Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi Wed May 31 09:38:15 2006
@@ -267,34 +267,13 @@
2) its descendents
3) its following siblings.
"""
- cdef xmlNode* c_next
- cdef xmlNode* c_start_parent
if c_name is NULL:
# always match
return c_node
- if c_node is NULL:
- return NULL
- c_start_parent = c_node.parent
- while c_node is not NULL:
- if _tagMatches(c_node, c_href, c_name):
- return c_node
- # walk through children
- c_next = c_node.children
- if c_next is NULL:
- # sibling?
- c_next = _nextElement(c_node)
- elif not _isElement(c_next):
- # we need an element
- c_next = _nextElement(c_next)
- if c_next is NULL:
- c_next = _nextElement(c_node)
- # back off through parents
- while c_next is NULL:
- c_node = c_node.parent
- if c_node is c_start_parent:
- return NULL
- c_next = _nextElement(c_node)
- c_node = c_next
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+ if _tagMatches(c_node, c_href, c_name):
+ return c_node
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
return NULL
cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):
Modified: lxml/trunk/src/lxml/etree.h
==============================================================================
--- lxml/trunk/src/lxml/etree.h (original)
+++ lxml/trunk/src/lxml/etree.h Wed May 31 09:38:15 2006
@@ -1,6 +1,10 @@
#ifndef HAS_ETREE_H
#define HAS_ETREE_H
+/* v_arg functions */
+#define va_int(ap) va_arg(ap, int)
+#define va_charptr(ap) va_arg(ap, char *)
+
/* Py_ssize_t support was added in Python 2.5 */
#if PY_VERSION_HEX < 0x02050000
#ifndef PY_SSIZE_T_MAX /* patched Pyrex? */
@@ -19,12 +23,61 @@
#define str(o) PyObject_Str(o)
#define iter(o) PyObject_GetIter(o)
#define _cstr(s) PyString_AS_STRING(s)
+
#define _isElement(c_node) \
((c_node)->type == XML_ELEMENT_NODE || \
(c_node)->type == XML_COMMENT_NODE)
-/* v_arg functions */
-#define va_int(ap) va_arg(ap, int)
-#define va_charptr(ap) va_arg(ap, char *)
+/* Macro set implementation of a depth first tree walker
+ *
+ * Calls the code block between the BEGIN and END macros
+ * 1) for the start element (or the first 'element' sibling)
+ * 2) for all children (recursively)
+ * 3) all siblings (recursively)
+ *
+ * Usage in Pyrex:
+ * cdef xmlNode* some_node
+ * some_node = parent_node.children
+ * BEGIN_FOR_EACH_ELEMENT_FROM(some_node)
+ * # do something with some_node
+ * END_FOR_EACH_ELEMENT_FROM(some_node)
+ *
+ * NOTE: 'some_node' MUST be a plain 'xmlNode*' !
+ * NOTE: parent modification during the walk will segfault !
+ */
+
+#define BEGIN_FOR_EACH_ELEMENT_FROM(c_node) \
+{ \
+ while ((c_node != 0) && (!_isElement(c_node))) \
+ c_node = c_node->next; \
+ if (c_node != 0) { \
+ xmlNode* ___start_parent = c_node->parent; \
+ xmlNode* ___next; \
+ while (c_node != 0) {
+ /* here goes the code to be run for each element */
+#define END_FOR_EACH_ELEMENT_FROM(c_node) \
+ /* walk through children */ \
+ ___next = c_node->children; \
+ while ((___next != 0) && (!_isElement(___next))) \
+ ___next = ___next->next; \
+ if (___next == 0) { \
+ /* try siblings */ \
+ ___next = c_node->next; \
+ while ((___next != 0) && (!_isElement(___next))) \
+ ___next = ___next->next; \
+ } \
+ /* back off through parents */ \
+ while (___next == 0) { \
+ c_node = c_node->parent; \
+ if (c_node == ___start_parent) \
+ break; \
+ ___next = c_node->next; \
+ while ((___next != 0) && (!_isElement(___next))) \
+ ___next = ___next->next; \
+ } \
+ c_node = ___next; \
+ } \
+ } \
+}
#endif /*HAS_ETREE_H*/
Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi (original)
+++ lxml/trunk/src/lxml/proxy.pxi Wed May 31 09:38:15 2006
@@ -136,15 +136,11 @@
return NULL
cdef int canDeallocateChildNodes(xmlNode* c_node):
- cdef xmlNode* c_current
- c_current = c_node.children
- while c_current is not NULL:
- if _isElement(c_current):
- if c_current._private is not NULL:
- return 0
- if not canDeallocateChildNodes(c_current):
- return 0
- c_current = c_current.next
+ c_node = c_node.children
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+ if c_node._private is not NULL:
+ return 0
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
return 1
################################################################################
@@ -160,22 +156,18 @@
"""
tree.xmlReconciliateNs(doc._c_doc, node._c_node)
if node._doc is not doc:
+ node._doc = doc
changeDocumentBelow(node._c_node, doc)
cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
"""Update the Python references in the tree below the node.
+ Does not update the node itself.
Note that we expect C pointers to the document to be updated already by
libxml2.
"""
- cdef xmlNode* c_current
- # adjust all children recursively
- c_current = c_node.children
- while c_current is not NULL:
- if _isElement(c_current):
- changeDocumentBelow(c_current, doc)
- c_current = c_current.next
-
- # adjust Python reference of current node
+ c_node = c_node.children
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
if c_node._private is not NULL:
(<_NodeBase>c_node._private)._doc = doc
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd (original)
+++ lxml/trunk/src/lxml/tree.pxd Wed May 31 09:38:15 2006
@@ -248,3 +248,5 @@
cdef extern from "etree.h":
cdef int _isElement(xmlNode* node)
+ cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* node)
+ cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* node)
|