logo       
Google Custom Search
    AddThis Social Bookmark Button
-->

r27947 - in lxml/trunk: doc src/lxml: msg#00322

Subject: r27947 - in lxml/trunk: doc src/lxml
Author: scoder
Date: Wed May 31 09:38:15 2006
New Revision: 27947

Modified:
   lxml/trunk/doc/performance.txt
   lxml/trunk/src/lxml/apihelpers.pxi
   lxml/trunk/src/lxml/etree.h
   lxml/trunk/src/lxml/proxy.pxi
   lxml/trunk/src/lxml/tree.pxd
Log:
C macro implementation of an iterative tree walker: reduces code duplication 
between various functions and speeds up tree walking operations by up to 30% 
(deallocation, iteration, etc.)

Modified: lxml/trunk/doc/performance.txt
==============================================================================
--- lxml/trunk/doc/performance.txt      (original)
+++ lxml/trunk/doc/performance.txt      Wed May 31 09:38:15 2006
@@ -194,37 +194,37 @@
 especially if few elements are of interest or the element tag name is known,
 lxml is a good choice::
 
-  lxe: getiterator_all      (-- T2   )   31.2719 msec/pass
+  lxe: getiterator_all      (-- T2   )   23.0440 msec/pass
   cET: getiterator_all      (-- T2   )   36.3687 msec/pass
   ET : getiterator_all      (-- T2   )   46.2846 msec/pass
 
-  lxe: getiterator_islice   (-- T2   )    2.8503 msec/pass
+  lxe: getiterator_islice   (-- T2   )    2.0699 msec/pass
   cET: getiterator_islice   (-- T2   )    0.3299 msec/pass
   ET : getiterator_islice   (-- T2   )   44.5898 msec/pass
 
-  lxe: getiterator_tag      (-- T2   )    3.0983 msec/pass
+  lxe: getiterator_tag      (-- T2   )    1.9176 msec/pass
   cET: getiterator_tag      (-- T2   )   11.2861 msec/pass
   ET : getiterator_tag      (-- T2   )   37.5661 msec/pass
 
-  lxe: getiterator_tag_all  (-- T2   )    4.9760 msec/pass
+  lxe: getiterator_tag_all  (-- T2   )    4.5722 msec/pass
   cET: getiterator_tag_all  (-- T2   )   33.2602 msec/pass
   ET : getiterator_tag_all  (-- T2   )   37.6200 msec/pass
 
 This similarly shows in ``Element.findall()``::
 
-  lxe: findall              (-- T2   )   36.4730 msec/pass
+  lxe: findall              (-- T2   )   27.3874 msec/pass
   cET: findall              (-- T2   )   38.8718 msec/pass
   ET : findall              (-- T2   )   50.9692 msec/pass
 
-  lxe: findall              (-- T3   )    4.3956 msec/pass
+  lxe: findall              (-- T3   )    3.8227 msec/pass
   cET: findall              (-- T3   )   11.8051 msec/pass
   ET : findall              (-- T3   )   11.2570 msec/pass
 
-  lxe: findall_tag          (-- T2   )    4.3950 msec/pass
+  lxe: findall_tag          (-- T2   )    4.5549 msec/pass
   cET: findall_tag          (-- T2   )   31.3107 msec/pass
   ET : findall_tag          (-- T2   )   36.7813 msec/pass
 
-  lxe: findall_tag          (-- T3   )    0.5946 msec/pass
+  lxe: findall_tag          (-- T3   )    0.5643 msec/pass
   cET: findall_tag          (-- T3   )    7.4491 msec/pass
   ET : findall_tag          (-- T3   )    9.2943 msec/pass
 

Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi  (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi  Wed May 31 09:38:15 2006
@@ -267,34 +267,13 @@
     2) its descendents
     3) its following siblings.
     """
-    cdef xmlNode* c_next
-    cdef xmlNode* c_start_parent
     if c_name is NULL:
         # always match
         return c_node
-    if c_node is NULL:
-        return NULL
-    c_start_parent = c_node.parent
-    while c_node is not NULL:
-        if _tagMatches(c_node, c_href, c_name):
-            return c_node
-        # walk through children
-        c_next = c_node.children
-        if c_next is NULL:
-            # sibling?
-            c_next = _nextElement(c_node)
-        elif not _isElement(c_next):
-            # we need an element
-            c_next = _nextElement(c_next)
-            if c_next is NULL:
-                c_next = _nextElement(c_node)
-        # back off through parents
-        while c_next is NULL:
-            c_node = c_node.parent
-            if c_node is c_start_parent:
-                return NULL
-            c_next = _nextElement(c_node)
-        c_node = c_next
+    tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+    if _tagMatches(c_node, c_href, c_name):
+        return c_node
+    tree.END_FOR_EACH_ELEMENT_FROM(c_node)
     return NULL
 
 cdef int _tagMatches(xmlNode* c_node, char* c_href, char* c_name):

Modified: lxml/trunk/src/lxml/etree.h
==============================================================================
--- lxml/trunk/src/lxml/etree.h (original)
+++ lxml/trunk/src/lxml/etree.h Wed May 31 09:38:15 2006
@@ -1,6 +1,10 @@
 #ifndef HAS_ETREE_H
 #define HAS_ETREE_H
 
+/* v_arg functions */
+#define va_int(ap)     va_arg(ap, int)
+#define va_charptr(ap) va_arg(ap, char *)
+
 /* Py_ssize_t support was added in Python 2.5 */
 #if PY_VERSION_HEX < 0x02050000
 #ifndef PY_SSIZE_T_MAX /* patched Pyrex? */
@@ -19,12 +23,61 @@
 #define str(o)          PyObject_Str(o)
 #define iter(o)         PyObject_GetIter(o)
 #define _cstr(s)        PyString_AS_STRING(s)
+
 #define _isElement(c_node) \
         ((c_node)->type == XML_ELEMENT_NODE || \
         (c_node)->type == XML_COMMENT_NODE)
 
-/* v_arg functions */
-#define va_int(ap)     va_arg(ap, int)
-#define va_charptr(ap) va_arg(ap, char *)
+/* Macro set implementation of a depth first tree walker
+ *
+ * Calls the code block between the BEGIN and END macros
+ *   1) for the start element (or the first 'element' sibling)
+ *   2) for all children (recursively)
+ *   3) all siblings (recursively)
+ *
+ * Usage in Pyrex:
+ *    cdef xmlNode* some_node
+ *    some_node = parent_node.children
+ *    BEGIN_FOR_EACH_ELEMENT_FROM(some_node)
+ *    # do something with some_node
+ *    END_FOR_EACH_ELEMENT_FROM(some_node)
+ *
+ * NOTE: 'some_node' MUST be a plain 'xmlNode*' !
+ * NOTE: parent modification during the walk will segfault !
+ */
+
+#define BEGIN_FOR_EACH_ELEMENT_FROM(c_node)                       \
+{                                                                 \
+    while ((c_node != 0) && (!_isElement(c_node)))                \
+        c_node = c_node->next;                                    \
+    if (c_node != 0) {                                            \
+        xmlNode* ___start_parent = c_node->parent;                \
+        xmlNode* ___next;                                         \
+        while (c_node != 0) {
+            /* here goes the code to be run for each element */
+#define END_FOR_EACH_ELEMENT_FROM(c_node)                         \
+            /* walk through children */                           \
+            ___next = c_node->children;                           \
+            while ((___next != 0) && (!_isElement(___next)))      \
+                ___next = ___next->next;                          \
+            if (___next == 0) {                                   \
+                /* try siblings */                                \
+                ___next = c_node->next;                           \
+                while ((___next != 0) && (!_isElement(___next)))  \
+                   ___next = ___next->next;                       \
+            }                                                     \
+            /* back off through parents */                        \
+            while (___next == 0) {                                \
+                c_node = c_node->parent;                          \
+                if (c_node == ___start_parent)                    \
+                    break;                                        \
+                ___next = c_node->next;                           \
+                while ((___next != 0) && (!_isElement(___next)))  \
+                    ___next = ___next->next;                      \
+            }                                                     \
+            c_node = ___next;                                     \
+        }                                                         \
+    }                                                             \
+}
 
 #endif /*HAS_ETREE_H*/

Modified: lxml/trunk/src/lxml/proxy.pxi
==============================================================================
--- lxml/trunk/src/lxml/proxy.pxi       (original)
+++ lxml/trunk/src/lxml/proxy.pxi       Wed May 31 09:38:15 2006
@@ -136,15 +136,11 @@
         return NULL
 
 cdef int canDeallocateChildNodes(xmlNode* c_node):
-    cdef xmlNode* c_current
-    c_current = c_node.children
-    while c_current is not NULL:
-        if _isElement(c_current):
-            if c_current._private is not NULL:
-                return 0
-            if not canDeallocateChildNodes(c_current):
-                return 0
-        c_current = c_current.next
+    c_node = c_node.children
+    tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
+    if c_node._private is not NULL:
+        return 0
+    tree.END_FOR_EACH_ELEMENT_FROM(c_node)
     return 1
 
 
################################################################################
@@ -160,22 +156,18 @@
     """
     tree.xmlReconciliateNs(doc._c_doc, node._c_node)
     if node._doc is not doc:
+        node._doc = doc
         changeDocumentBelow(node._c_node, doc)
 
 cdef void changeDocumentBelow(xmlNode* c_node, _Document doc):
     """Update the Python references in the tree below the node.
+    Does not update the node itself.
 
     Note that we expect C pointers to the document to be updated already by
     libxml2.
     """
-    cdef xmlNode* c_current
-    # adjust all children recursively
-    c_current = c_node.children
-    while c_current is not NULL:
-        if _isElement(c_current):
-            changeDocumentBelow(c_current, doc)
-        c_current = c_current.next
-
-    # adjust Python reference of current node
+    c_node = c_node.children
+    tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node)
     if c_node._private is not NULL:
         (<_NodeBase>c_node._private)._doc = doc
+    tree.END_FOR_EACH_ELEMENT_FROM(c_node)

Modified: lxml/trunk/src/lxml/tree.pxd
==============================================================================
--- lxml/trunk/src/lxml/tree.pxd        (original)
+++ lxml/trunk/src/lxml/tree.pxd        Wed May 31 09:38:15 2006
@@ -248,3 +248,5 @@
 
 cdef extern from "etree.h":
     cdef int _isElement(xmlNode* node)
+    cdef void BEGIN_FOR_EACH_ELEMENT_FROM(xmlNode* node)
+    cdef void END_FOR_EACH_ELEMENT_FROM(xmlNode* node)


<Prev in Thread] Current Thread [Next in Thread>