logo       
Google Custom Search
    AddThis Social Bookmark Button
-->

r26540 - lxml/trunk/src/lxml: msg#00134

Subject: r26540 - lxml/trunk/src/lxml
Author: scoder
Date: Fri Apr 28 21:56:36 2006
New Revision: 26540

Modified:
   lxml/trunk/src/lxml/apihelpers.pxi
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/extensions.pxi
   lxml/trunk/src/lxml/parser.pxi
   lxml/trunk/src/lxml/xmlerror.pxi
   lxml/trunk/src/lxml/xpath.pxi
   lxml/trunk/src/lxml/xslt.pxi
Log:
moved helper functions from etree.pyx to apihelpers.pxi, some cleanup

Modified: lxml/trunk/src/lxml/apihelpers.pxi
==============================================================================
--- lxml/trunk/src/lxml/apihelpers.pxi  (original)
+++ lxml/trunk/src/lxml/apihelpers.pxi  Fri Apr 28 21:56:36 2006
@@ -1,1403 +1,13 @@
-cimport tree, python
-from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement
-from python cimport isinstance, issubclass, hasattr, callable
-from python cimport iter, str, _cstr
-cimport xpath
-cimport xslt
-cimport xmlerror
-cimport xinclude
-cimport c14n
-cimport cstd
-import re
-
-import _elementpath
-from StringIO import StringIO
-import sys
-
-# the rules
-# any libxml C argument/variable is prefixed with c_
-# any non-public function/class is prefixed with an underscore
-# instance creation is always through factories
-
-ctypedef enum LXML_PROXY_TYPE:
-    PROXY_ELEMENT
-    PROXY_ATTRIB
-
-# what to do with libxml2/libxslt error messages?
-# 0 : drop
-# 1 : use log
-cdef int __DEBUG
-__DEBUG = 1
-
-# maximum number of lines in the libxml2/xslt log if __DEBUG == 1
-cdef int __MAX_LOG_SIZE
-__MAX_LOG_SIZE = 100
-
-# make the compiled-in debug state publicly available
-DEBUG = __DEBUG
-
-# Error superclass for ElementTree compatibility
-class Error(Exception):
-    pass
-
-# module level superclass for all exceptions
-class LxmlError(Error):
-    def __init__(self, *args):
-        Error.__init__(self, *args)
-        self.error_log = __copyGlobalErrorLog()
-
-# superclass for all syntax errors
-class LxmlSyntaxError(LxmlError, SyntaxError):
-    pass
-
-class DocumentInvalid(LxmlError):
-    pass
-
-class XIncludeError(LxmlError):
-    pass
-
-class C14NError(LxmlError):
-    pass
-
-
-# class for temporary storage of Python references
-cdef class _TempStore:
-    cdef object _storage
-    def __init__(self):
-        self._storage = {}
-
-    cdef void add(self, obj):
-        python.PyDict_SetItem(self._storage, id(obj), obj)
-
-    cdef void clear(self):
-        python.PyDict_Clear(self._storage)
-
-    cdef object dictcopy(self):
-        return self._storage.copy()
-
-# class for temporarily storing exceptions raised in extensions
-cdef class _ExceptionContext:
-    cdef object _exc_info
-    def __init__(self):
-        self._exc_info = None
-
-    cdef void clear(self):
-        self._exc_info = None
-
-    cdef void _store_raised(self):
-        self._exc_info = sys.exc_info()
-
-    cdef void _store_exception(self, exception):
-        self._exc_info = (exception, None, None)
-
-    cdef _has_raised(self):
-        return self._exc_info is not None
-
-    cdef _raise_if_stored(self):
-        _exc_info = self._exc_info
-        if _exc_info is not None:
-            self._exc_info = None
-            type, value, traceback = _exc_info
-            if traceback is None and value is None:
-                raise type
-            else:
-                raise type, value, traceback
-
-
-cdef class BaseParser # forward declaration
-
-cdef class _Document:
-    """Internal base class to reference a libxml document.
-
-    When instances of this class are garbage collected, the libxml
-    document is cleaned up.
-    """
-    cdef int _ns_counter
-    cdef xmlDoc* _c_doc
-    cdef BaseParser _parser
-    
-    def __dealloc__(self):
-        # if there are no more references to the document, it is safe
-        # to clean the whole thing up, as all nodes have a reference to
-        # the document
-        #print "freeing document:", <int>self._c_doc
-        #displayNode(<xmlNode*>self._c_doc, 0)
-        #print <int>self._c_doc, self._c_doc.dict is 
__GLOBAL_PARSER_CONTEXT._c_dict
-        tree.xmlFreeDoc(self._c_doc)
-
-    cdef getroot(self):
-        cdef xmlNode* c_node
-        c_node = tree.xmlDocGetRootElement(self._c_doc)
-        if c_node is NULL:
-            return None
-        return _elementFactory(self, c_node)
-
-    cdef buildNewPrefix(self):
-        ns = python.PyString_FromFormat("ns%d", self._ns_counter)
-        self._ns_counter = self._ns_counter + 1
-        return ns
-
-    cdef xmlNs* _findOrBuildNodeNs(self, xmlNode* c_node, char* href):
-        """Get or create namespace structure for a node.
-        """
-        cdef xmlNs* c_ns
-        # look for existing ns
-        c_ns = tree.xmlSearchNsByHref(self._c_doc, c_node, href)
-        if c_ns is not NULL:
-            return c_ns
-        # create ns if existing ns cannot be found
-        # try to simulate ElementTree's namespace prefix creation
-        prefix = self.buildNewPrefix()
-        c_ns = tree.xmlNewNs(c_node, href, _cstr(prefix))
-        return c_ns
-
-    cdef void _setNodeNs(self, xmlNode* c_node, char* href):
-        "Lookup namespace structure and set it for the node."
-        cdef xmlNs* c_ns
-        c_ns = self._findOrBuildNodeNs(c_node, href)
-        tree.xmlSetNs(c_node, c_ns)
-
-    cdef void _setNodeNamespaces(self, xmlNode* c_node,
-                                 object node_ns_utf, object nsmap):
-        """Lookup current namespace prefixes, then set namespace structure for
-        node and register new ns-prefix mappings.
-        """
-        cdef xmlNs*  c_ns
-        cdef xmlDoc* c_doc
-        cdef char*   c_prefix
-        cdef char*   c_href
-        if not nsmap:
-            if node_ns_utf is not None:
-                self._setNodeNs(c_node, node_ns_utf)
-            return
-
-        c_doc  = self._c_doc
-        for prefix, href in nsmap.items():
-            href_utf = _utf8(href)
-            c_href = _cstr(href_utf)
-            if prefix is not None:
-                prefix_utf = _utf8(prefix)
-                c_prefix = _cstr(prefix_utf)
-            else:
-                c_prefix = NULL
-            # add namespace with prefix if ns is not already known
-            c_ns = tree.xmlSearchNsByHref(c_doc, c_node, c_href)
-            if c_ns is NULL:
-                c_ns = tree.xmlNewNs(c_node, c_href, c_prefix)
-            if href_utf == node_ns_utf:
-                tree.xmlSetNs(c_node, c_ns)
-                node_ns_utf = None
-
-        if node_ns_utf is not None:
-            self._setNodeNs(c_node, node_ns_utf)
-
-cdef _Document _parseDocument(source, parser):
-    cdef xmlDoc* c_doc
-    filename = _getFilenameForFile(source)
-    # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...)
-    if not filename and hasattr(source, 'read'):
-        return _parseMemoryDocument(source.read(), parser)
-
-    # Otherwise parse the file directly from the filesystem
-    if filename is None:
-        filename = source
-    # open filename
-    c_doc = _parseDocFromFile(_utf8(filename), parser)
-    return _documentFactory(c_doc, parser)
-
-cdef _Document _parseMemoryDocument(text, parser):
-    cdef xmlDoc* c_doc
-    if python.PyUnicode_Check(text):
-        text = _stripDeclaration(_utf8(text))
-    c_doc = _parseDoc(text, parser)
-    return _documentFactory(c_doc, parser)
-
-cdef _Document _documentFactory(xmlDoc* c_doc, parser):
-    cdef _Document result
-    result = _Document()
-    result._c_doc = c_doc
-    result._ns_counter = 0
-    if parser is None:
-        parser = __DEFAULT_PARSER
-    result._parser = parser.copy()
-    return result
+# Private helper functions
 
-# to help with debugging
 cdef void displayNode(xmlNode* c_node, indent):
+    # to help with debugging
     cdef xmlNode* c_child
     print indent * ' ', <int>c_node
     c_child = c_node.children
     while c_child is not NULL:
         displayNode(c_child, indent + 1)
         c_child = c_child.next
-        
-cdef class _NodeBase:
-    """Base class to reference a document object and a libxml node.
-
-    By pointing to a Document instance, a reference is kept to
-    _Document as long as there is some pointer to a node in it.
-    """
-    cdef _Document _doc
-    cdef xmlNode* _c_node
-    cdef int _proxy_type
-    
-    def __dealloc__(self):
-        #print "trying to free node:", <int>self._c_node
-        #displayNode(self._c_node, 0)
-        if self._c_node is not NULL:
-            unregisterProxy(self)
-            attemptDeallocation(self._c_node)
-
-    def _init(self):
-        """Called after object initialisation. Subclasses may override
-        this if they recursively call _init() in the superclasses.
-        """
-
-cdef class _ElementTree:
-    cdef _Document _doc
-    cdef _NodeBase _context_node
-
-    def parse(self, source, parser=None):
-        """Updates self with the content of source and returns its root
-        """
-        self._doc          = _parseDocument(source, parser)
-        self._context_node = self._doc.getroot()
-        return self._context_node
-    
-    def getroot(self):
-        return self._context_node
-    
-    def write(self, file, encoding='us-ascii'):
-        if not hasattr(file, 'write'):
-            # file is a filename, we want a file object
-            file = open(file, 'wb')
-
-        m = tostring(self._context_node, encoding)
-        # XXX this is purely for ElementTree compatibility..
-        if encoding == 'UTF-8' or encoding == 'us-ascii':
-            m = _stripDeclaration(m)
-            if m[-1:] == '\n':
-                m = m[:-1]
-        file.write(m)
-
-    def getiterator(self, tag=None):
-        root = self.getroot()
-        if root is None:
-            return ()
-        return root.getiterator(tag)
-
-    def find(self, path):
-        root = self.getroot()
-        assert root is not None
-        if path[:1] == "/":
-            path = "." + path
-        return root.find(path)
-
-    def findtext(self, path, default=None):
-        root = self.getroot()
-        assert root is not None
-        if path[:1] == "/":
-            path = "." + path
-        return root.findtext(path, default)
-
-    def findall(self, path):
-        root = self.getroot()
-        assert root is not None
-        if path[:1] == "/":
-            path = "." + path
-        return root.findall(path)
-    
-    # extensions to ElementTree API
-    def xpath(self, _path, namespaces=None, **_variables):
-        """XPath evaluate in context of document.
-
-        namespaces is an optional dictionary with prefix to namespace URI
-        mappings, used by XPath.
-        
-        Returns a list (nodeset), or bool, float or string.
-
-        In case of a list result, return Element for element nodes,
-        string for text and attribute values.
-
-        Note: if you are going to apply multiple XPath expressions
-        against the same document, it is more efficient to use
-        XPathEvaluator directly.
-        """
-        evaluator = XPathElementEvaluator(self._context_node, namespaces)
-        return evaluator.evaluate(_path, **_variables)
-
-    def xslt(self, _xslt, extensions=None, **_kw):
-        """Transform this document using other document.
-
-        xslt is a tree that should be XSLT
-        keyword parameters are XSLT transformation parameters.
-
-        Returns the transformed tree.
-
-        Note: if you are going to apply the same XSLT stylesheet against
-        multiple documents, it is more efficient to use the XSLT
-        class directly.
-        """
-        style = XSLT(_xslt, extensions)
-        return style(self, **_kw)
-
-    def relaxng(self, relaxng):
-        """Validate this document using other document.
-
-        relaxng is a tree that should contain Relax NG XML
-
-        Returns True or False, depending on whether validation
-        succeeded.
-
-        Note: if you are going to apply the same Relax NG schema against
-        multiple documents, it is more efficient to use the RelaxNG
-        class directly.
-        """
-        schema = RelaxNG(relaxng)
-        return schema.validate(self)
-
-    def xmlschema(self, xmlschema):
-        """Validate this document using other doucment.
-
-        xmlschema is a tree that should contain XML Schema XML.
-
-        Returns True or False, depending on whether validation
-        succeeded.
-
-        Note: If you are going to applyt he same XML Schema against
-        multiple documents, it is more efficient to use the XMLSchema
-        class directly.
-        """
-        schema = XMLSchema(xmlschema)
-        return schema.validate(self)
-        
-    def xinclude(self):
-        """Process this document, including using XInclude.
-        """
-        cdef int result
-        # XXX what happens memory-wise with the original XInclude nodes?
-        # they seem to be still accessible if a reference to them has
-        # been made previously, but I have no idea whether they get freed
-        # at all. The XInclude nodes appear to be still being in the same
-        # parent and same document, but they must not be connected to the
-        # tree..
-        result = xinclude.xmlXIncludeProcessTree(self._context_node._c_node)
-        if result == -1:
-            raise XIncludeError, "XInclude processing failed"
-        
-    def write_c14n(self, file):
-        """C14N write of document. Always writes UTF-8.
-        """
-        cdef xmlDoc* c_base_doc
-        cdef xmlDoc* c_doc
-        cdef char* data
-        cdef int bytes
-        c_base_doc = self._doc._c_doc
-
-        c_doc = _fakeRootDoc(c_base_doc, self._context_node._c_node)
-        bytes = c14n.xmlC14NDocDumpMemory(c_doc, NULL, 0, NULL, 1, &data)
-        _destroyFakeDoc(c_base_doc, c_doc)
-
-        if bytes < 0:
-            raise C14NError, "C14N failed"
-        if not hasattr(file, 'write'):
-            file = open(file, 'wb')
-        file.write(data)
-        tree.xmlFree(data)
-    
-cdef _ElementTree _elementTreeFactory(_Document doc,
-                                      _NodeBase context_node):
-    return _newElementTree(doc, context_node, _ElementTree)
-
-cdef _ElementTree _newElementTree(_Document doc, _NodeBase context_node,
-                                  object baseclass):
-    cdef _ElementTree result
-    result = baseclass()
-    result._doc = doc
-    if context_node is None and doc is not None:
-        context_node = doc.getroot()
-    result._context_node = context_node
-    return result
-
-cdef class _Element(_NodeBase):
-    cdef object _tag
-
-    # MANIPULATORS
-
-    def __setitem__(self, index, _NodeBase element):
-        cdef xmlNode* c_node
-        cdef xmlNode* c_next
-        cdef int foreign
-        c_node = _findChild(self._c_node, index)
-        if c_node is NULL:
-            raise IndexError
-        foreign = self._doc is not element._doc
-        c_next = element._c_node.next
-        _removeText(c_node.next)
-        tree.xmlReplaceNode(c_node, element._c_node)
-        _moveTail(c_next, element._c_node)
-        changeDocumentBelow(element, self._doc, foreign)
-        
-    def __delitem__(self, index):
-        cdef xmlNode* c_node
-        c_node = _findChild(self._c_node, index)
-        if c_node is NULL:
-            raise IndexError
-        _removeText(c_node.next)
-        _removeNode(c_node)
-
-    def __delslice__(self, start, stop):
-        cdef xmlNode* c_node
-        c_node = _findChild(self._c_node, start)
-        _deleteSlice(c_node, start, stop)
-        
-    def __setslice__(self, start, stop, value):
-        cdef xmlNode* c_node
-        cdef xmlNode* c_next
-        cdef _Element mynode
-        cdef int foreign
-        # first, find start of slice
-        c_node = _findChild(self._c_node, start)
-        # now delete the slice
-        if start != stop:
-            c_node = _deleteSlice(c_node, start, stop)
-        # if the insertion point is at the end, append there
-        if c_node is NULL:
-            append = self.append
-            for node in value:
-                append(node)
-            return
-        # if the next element is in the list, insert before it
-        for mynode in value:
-            if mynode is None:
-                raise TypeError, "Node must not be None."
-            foreign = self._doc is not mynode._doc
-            # store possible text tail
-            c_next = mynode._c_node.next
-            # now move node previous to insertion point
-            tree.xmlUnlinkNode(mynode._c_node)
-            tree.xmlAddPrevSibling(c_node, mynode._c_node)
-            # and move tail just behind his node
-            _moveTail(c_next, mynode._c_node)
-            # move it into a new document
-            changeDocumentBelow(mynode, self._doc, foreign)
-
-    def __deepcopy__(self, memo):
-        return self.__copy__()
-        
-    def __copy__(self):
-        cdef xmlNode* c_node
-        cdef xmlDoc* c_doc
-        cdef xmlDoc* fake_c_doc
-        cdef _Document doc
-        doc = self._doc
-        fake_c_doc = _fakeRootDoc(doc._c_doc, self._c_node)
-        c_doc = tree.xmlCopyDoc(fake_c_doc, 1) # recursive copy
-        _destroyFakeDoc(doc._c_doc, fake_c_doc)
-        doc = _documentFactory(c_doc, doc._parser)
-        return doc.getroot()
-        
-    def set(self, key, value):
-        self.attrib[key] = value
-        
-    def append(self, _Element element not None):
-        cdef xmlNode* c_next
-        cdef xmlNode* c_node
-        cdef int foreign
-        foreign = self._doc is not element._doc
-        c_node = element._c_node
-        # store possible text node
-        c_next = c_node.next
-        # XXX what if element is coming from a different document?
-        tree.xmlUnlinkNode(c_node)
-        # move node itself
-        tree.xmlAddChild(self._c_node, c_node)
-        _moveTail(c_next, c_node)
-        # uh oh, elements may be pointing to different doc when
-        # parent element has moved; change them too..
-        changeDocumentBelow(element, self._doc, foreign)
-
-    def clear(self):
-        cdef xmlAttr* c_attr
-        cdef xmlAttr* c_attr_next
-        cdef xmlNode* c_node
-        cdef xmlNode* c_node_next
-        c_node = self._c_node
-        # remove self.text and self.tail
-        _removeText(c_node.children)
-        _removeText(c_node.next)
-        # remove all attributes
-        c_attr = c_node.properties
-        while c_attr is not NULL:
-            c_attr_next = c_attr.next
-            tree.xmlRemoveProp(c_attr)
-            c_attr = c_attr_next
-        # remove all subelements
-        c_node = c_node.children
-        while c_node is not NULL:
-            c_node_next = c_node.next
-            if _isElement(c_node):
-                _removeText(c_node_next)
-                c_node_next = c_node.next
-                _removeNode(c_node)
-            c_node = c_node_next
-    
-    def insert(self, index, _Element element not None):
-        cdef xmlNode* c_node
-        cdef xmlNode* c_next
-        cdef int foreign
-        c_node = _findChild(self._c_node, index)
-        if c_node is NULL:
-            self.append(element)
-            return
-        foreign = self._doc is not element._doc
-        c_next = element._c_node.next
-        tree.xmlAddPrevSibling(c_node, element._c_node)
-        _moveTail(c_next, element._c_node)
-        changeDocumentBelow(element, self._doc, foreign)
-
-    def remove(self, _Element element not None):
-        cdef xmlNode* c_node
-        c_node = element._c_node
-        if c_node.parent is not self._c_node:
-            raise ValueError, "Element is not a child of this node."
-        _removeText(c_node.next)
-        tree.xmlUnlinkNode(c_node)
-        
-    # PROPERTIES
-    property tag:
-        def __get__(self):
-            if self._tag is not None:
-                return self._tag
-            self._tag = _namespacedName(self._c_node)
-            return self._tag
-    
-        def __set__(self, value):
-            cdef xmlNs* c_ns
-            ns, text = _getNsTag(value)
-            self._tag = value
-            tree.xmlNodeSetName(self._c_node, _cstr(text))
-            if ns is None:
-                return
-            self._doc._setNodeNs(self._c_node, _cstr(ns))
-
-    # not in ElementTree, read-only
-    property prefix:
-        def __get__(self):
-            if self._c_node.ns is not NULL:
-                if self._c_node.ns.prefix is not NULL:
-                    return funicode(self._c_node.ns.prefix)
-            return None
-        
-    property attrib:
-        def __get__(self):
-            return _attribFactory(self._doc, self._c_node)
-        
-    property text:
-        def __get__(self):
-            return _collectText(self._c_node.children)
-        
-        def __set__(self, value):
-            cdef xmlNode* c_text_node
-            # remove all text nodes at the start first
-            _removeText(self._c_node.children)
-            if value is None:
-                return
-            # now add new text node with value at start
-            text = _utf8(value)
-            c_text_node = tree.xmlNewDocText(self._doc._c_doc,
-                                             _cstr(text))
-            if self._c_node.children is NULL:
-                tree.xmlAddChild(self._c_node, c_text_node)
-            else:
-                tree.xmlAddPrevSibling(self._c_node.children,
-                                       c_text_node)
-        
-    property tail:
-        def __get__(self):
-            return _collectText(self._c_node.next)
-           
-        def __set__(self, value):
-            cdef xmlNode* c_text_node
-            # remove all text nodes at the start first
-            _removeText(self._c_node.next)
-            if value is None:
-                return
-            text = _utf8(value)
-            c_text_node = tree.xmlNewDocText(self._doc._c_doc, _cstr(text))
-            # XXX what if we're the top element?
-            tree.xmlAddNextSibling(self._c_node, c_text_node)
-
-    # ACCESSORS
-    def __repr__(self):
-        return "<Element %s at %x>" % (self.tag, id(self))
-    
-    def __getitem__(self, index):
-        cdef xmlNode* c_node
-        c_node = _findChild(self._c_node, index)
-        if c_node is NULL:
-            raise IndexError, "list index out of range"
-        return _elementFactory(self._doc, c_node)
-
-    def __getslice__(self, start, stop):
-        cdef xmlNode* c_node
-        cdef _Document doc
-        cdef int c, c_stop
-        # this does not work for negative start, stop, however,
-        # python seems to convert these to positive start, stop before
-        # calling, so this all works perfectly (at the cost of a len() call)
-        c_node = _findChild(self._c_node, start)
-        if c_node is NULL:
-            return []
-        c = start
-        c_stop = stop
-        result = []
-        doc = self._doc
-        while c_node is not NULL and c < c_stop:
-            if _isElement(c_node):
-                ret = python.PyList_Append(result, _elementFactory(doc, 
c_node))
-                if ret:
-                    raise
-                c = c + 1
-            c_node = c_node.next
-        return result
-            
-    def __len__(self):
-        cdef int c
-        cdef xmlNode* c_node
-        c = 0
-        c_node = self._c_node.children
-        while c_node is not NULL:
-            if _isElement(c_node):
-                c = c + 1
-            c_node = c_node.next
-        return c
-
-    def __nonzero__(self):
-        cdef xmlNode* c_node
-        c_node = _findChildBackwards(self._c_node, 0)
-        return c_node != NULL
-
-    def __iter__(self):
-        return ElementChildIterator(self)
-
-    def __reversed__(self):
-        return ElementChildIterator(self, reversed=True)
-
-    def index(self, _Element x not None, start=None, stop=None):
-        cdef int k
-        cdef int l
-        cdef int c_stop
-        cdef int c_start
-        cdef xmlNode* c_child
-        cdef xmlNode* c_start_node
-        c_child = x._c_node
-        if c_child.parent is not self._c_node:
-            raise ValueError, "Element is not a child of this node."
-
-        if start is None:
-            c_start = 0
-        else:
-            c_start = start
-        if stop is None:
-            c_stop = 0
-        else:
-            c_stop = stop
-            if c_stop == 0 or \
-                   c_start >= c_stop and (c_stop > 0 or c_start < 0):
-                raise ValueError, "list.index(x): x not in slice"
-
-        # for negative slice indices, check slice before searching index
-        if c_start < 0 or c_stop < 0:
-            # start from right, at most up to leftmost(c_start, c_stop)
-            if c_start < c_stop:
-                k = -c_start
-            else:
-                k = -c_stop
-            c_start_node = self._c_node.last
-            l = 1
-            while c_start_node != c_child and l < k:
-                if _isElement(c_start_node):
-                    l = l + 1
-                c_start_node = c_start_node.prev
-            if c_start_node == c_child:
-                # found! before slice end?
-                if c_stop < 0 and l <= -c_stop:
-                    raise ValueError, "list.index(x): x not in slice"
-            elif c_start < 0:
-                raise ValueError, "list.index(x): x not in slice"
-
-        # now determine the index backwards from child
-        c_child = c_child.prev
-        k = 0
-        if c_stop > 0:
-            # we can optimize: stop after c_stop elements if not found
-            while c_child != NULL and k < c_stop:
-                if _isElement(c_child):
-                    k = k + 1
-                c_child = c_child.prev
-            if k < c_stop:
-                return k
-        else:
-            # traverse all
-            while c_child != NULL:
-                if _isElement(c_child):
-                    k = k + 1
-                c_child = c_child.prev
-            if c_start > 0:
-                if k >= c_start:
-                    return k
-            else:
-                return k
-        if c_start or c_stop:
-            raise ValueError, "list.index(x): x not in slice"
-        else:
-            raise ValueError, "list.index(x): x not in list"
-
-    def get(self, key, default=None):
-        # XXX more redundancy, but might be slightly faster than
-        #     return self.attrib.get(key, default)
-        cdef char* cresult
-        cdef char* c_tag
-        ns, tag = _getNsTag(key)
-        c_tag = _cstr(tag)
-        if ns is None:
-            cresult = tree.xmlGetNoNsProp(self._c_node, c_tag)
-        else:
-            cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
-        if cresult is NULL:
-            result = default
-        else:
-            result = funicode(cresult)
-            tree.xmlFree(cresult)
-        return result
-
-    def keys(self):
-        return self.attrib.keys()
-
-    def items(self):
-        return self.attrib.items()
-
-    def getchildren(self):
-        cdef xmlNode* c_node
-        cdef _Document doc
-        cdef int ret
-        result = []
-        doc = self._doc
-        c_node = self._c_node.children
-        while c_node is not NULL:
-            if _isElement(c_node):
-                ret = python.PyList_Append(result, _elementFactory(doc, 
c_node))
-                if ret:
-                    raise
-            c_node = c_node.next
-        return result
-
-    def getparent(self):
-        cdef xmlNode* c_node
-        c_node = self._c_node.parent
-        if c_node is not NULL and _isElement(c_node):
-            return _elementFactory(self._doc, c_node)
-        return None
-
-    def getiterator(self, tag=None):
-        iterator = ElementDepthFirstIterator(self)
-        if tag is None or tag == '*':
-            return iterator
-        else:
-            return ElementTagFilter(iterator, tag)
-
-    def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
-        "Creates a new element associated with the same document."
-        # a little code duplication, but less overhead through doc reuse
-        cdef xmlNode*  c_node
-        cdef xmlDoc*   c_doc
-        cdef _Document doc
-        ns_utf, name_utf = _getNsTag(_tag)
-        doc = self._doc
-        c_doc = doc._c_doc
-        c_node = _createElement(c_doc, name_utf, attrib, _extra)
-        # add namespaces to node if necessary
-        doc._setNodeNamespaces(c_node, ns_utf, nsmap)
-        return _elementFactory(doc, c_node)
-
-    def find(self, path):
-        return _elementpath.find(self, path)
-
-    def findtext(self, path, default=None):
-        return _elementpath.findtext(self, path, default)
-
-    def findall(self, path):
-        return _elementpath.findall(self, path)
-
-    def xpath(self, _path, namespaces=None, **_variables):
-        evaluator = XPathElementEvaluator(self, namespaces)
-        return evaluator.evaluate(_path, **_variables)
-
-cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
-    cdef _Element result
-    cdef char* c_ns_href
-    result = getProxy(c_node, PROXY_ELEMENT)
-    if result is not None:
-        return result
-    if c_node is NULL:
-        return None
-    if c_node.type == tree.XML_ELEMENT_NODE:
-        if c_node.ns == NULL:
-            c_ns_href = NULL
-        else:
-            c_ns_href = c_node.ns.href
-        element_class = _find_element_class(c_ns_href, c_node.name)
-    elif c_node.type == tree.XML_COMMENT_NODE:
-        element_class = _Comment
-    else:
-        assert 0, "Unknown node type: %s" % c_node.type
-    result = element_class()
-    result._tag = None
-    result._doc = doc
-    result._c_node = c_node
-    result._proxy_type = PROXY_ELEMENT
-    registerProxy(result, PROXY_ELEMENT)
-    result._init()
-    return result
-
-cdef class _Comment(_Element):
-    def set(self, key, value):
-        pass
-    
-    def append(self, _Element element):
-        pass
-
-    property tag:
-        def __get__(self):
-            return None
-        
-    property attrib:
-        def __get__(self):
-            return {}
-        
-    property text:
-        def __get__(self):
-            return funicode(self._c_node.content)
-
-        def __set__(self, value):
-            pass
-                        
-    # ACCESSORS
-    def __repr__(self):
-        return "<Comment[%s]>" % self.text
-    
-    def __getitem__(self, n):
-        raise IndexError
-
-    def __len__(self):
-        return 0
-
-    def get(self, key, default=None):
-        return None
-
-    def keys(self):
-        return []
-    
-    def items(self):
-        return []
-    
-cdef _Comment _commentFactory(_Document doc, xmlNode* c_node):
-    cdef _Comment result
-    result = getProxy(c_node, PROXY_ELEMENT)
-    if result is not None:
-        return result
-    if c_node is NULL:
-        return None
-    result = _Comment()
-    result._doc = doc
-    result._c_node = c_node
-    result._proxy_type = PROXY_ELEMENT
-    registerProxy(result, PROXY_ELEMENT)
-    return result
-
-cdef class _Attrib(_NodeBase):
-    # MANIPULATORS
-    def __setitem__(self, key, value):
-        cdef xmlNs* c_ns
-        cdef char* c_value
-        cdef char* c_tag
-        ns, tag = _getNsTag(key)
-        c_tag = _cstr(tag)
-        value = _utf8(value)
-        c_value = _cstr(value)
-        if ns is None:
-            tree.xmlSetProp(self._c_node, c_tag, c_value)
-        else:
-            c_ns = self._doc._findOrBuildNodeNs(self._c_node, _cstr(ns))
-            tree.xmlSetNsProp(self._c_node, c_ns, c_tag, c_value)
-
-    def __delitem__(self, key):
-        cdef xmlNs* c_ns
-        cdef xmlAttr* c_attr
-        cdef char* c_tag
-        ns, tag = _getNsTag(key)
-        c_tag = _cstr(tag)
-        if ns is None:
-            c_attr = tree.xmlHasProp(self._c_node, c_tag)
-        else:
-            c_attr = tree.xmlHasNsProp(self._c_node, c_tag, _cstr(ns))
-        if c_attr is NULL:
-            # XXX free namespace that is not in use..?
-            raise KeyError, key
-        tree.xmlRemoveProp(c_attr)
-        
-    # ACCESSORS
-    def __repr__(self):
-        result = {}
-        for key, value in self.items():
-            result[key] = value
-        return repr(result)
-    
-    def __getitem__(self, key):
-        cdef xmlNs* c_ns
-        cdef char* cresult
-        cdef char* c_tag
-        ns, tag = _getNsTag(key)
-        c_tag = _cstr(tag)
-        if ns is None:
-            cresult = tree.xmlGetNoNsProp(self._c_node, c_tag)
-        else:
-            cresult = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
-        if cresult is NULL:
-            # XXX free namespace that is not in use..?
-            raise KeyError, key
-        result = funicode(cresult)
-        tree.xmlFree(cresult)
-        return result
-
-    def __len__(self):
-        cdef int c
-        cdef xmlNode* c_node
-        c = 0
-        c_node = <xmlNode*>(self._c_node.properties)
-        while c_node is not NULL:
-            if c_node.type == tree.XML_ATTRIBUTE_NODE:
-                c = c + 1
-            c_node = c_node.next
-        return c
-    
-    def get(self, key, default=None):
-        try:
-            return self.__getitem__(key)
-        except KeyError:
-            return default
-
-    def keys(self):
-        result = []
-        cdef xmlNode* c_node
-        c_node = <xmlNode*>(self._c_node.properties)
-        while c_node is not NULL:
-            if c_node.type == tree.XML_ATTRIBUTE_NODE:
-                python.PyList_Append(result, _namespacedName(c_node))
-            c_node = c_node.next
-        return result
-
-    def __iter__(self):
-        return iter(self.keys())
-    
-    def iterkeys(self):
-        return iter(self.keys())
-
-    def values(self):
-        cdef xmlNode* c_node
-        result = []
-        c_node = <xmlNode*>(self._c_node.properties)
-        while c_node is not NULL:
-            if c_node.type == tree.XML_ATTRIBUTE_NODE:
-                python.PyList_Append(
-                    result, _attributeValue(self._c_node, c_node))
-            c_node = c_node.next
-        return result
-
-    def itervalues(self):
-        return iter(self.values())
-
-    def items(self):
-        result = []
-        cdef xmlNode* c_node
-        c_node = <xmlNode*>(self._c_node.properties)
-        while c_node is not NULL:
-            if c_node.type == tree.XML_ATTRIBUTE_NODE:
-                python.PyList_Append(result, (
-                    _namespacedName(c_node),
-                    _attributeValue(self._c_node, c_node)
-                    ))
-            c_node = c_node.next
-        return result
-
-    def iteritems(self):
-        return iter(self.items())
-
-    def has_key(self, key):
-        cdef xmlNs* c_ns
-        cdef char* result
-        cdef char* c_tag
-        ns, tag = _getNsTag(key)
-        c_tag = _cstr(tag)
-        if ns is None:
-            result = tree.xmlGetNoNsProp(self._c_node, c_tag)
-        else:
-            result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
-        if result is not NULL:
-            tree.xmlFree(result)
-            return True
-        else:
-            return False
-
-    def __contains__(self, key):
-        cdef xmlNs* c_ns
-        cdef char* result
-        cdef char* c_tag
-        ns, tag = _getNsTag(key)
-        c_tag = _cstr(tag)
-        if ns is None:
-            result = tree.xmlGetNoNsProp(self._c_node, c_tag)
-        else:
-            result = tree.xmlGetNsProp(self._c_node, c_tag, _cstr(ns))
-        if result is not NULL:
-            tree.xmlFree(result)
-            return True
-        else:
-            return False
-  
-cdef _Attrib _attribFactory(_Document doc, xmlNode* c_node):
-    cdef _Attrib result
-    result = getProxy(c_node, PROXY_ATTRIB)
-    if result is not None:
-        return result
-    result = _Attrib()
-    result._doc = doc
-    result._c_node = c_node
-    result._proxy_type = PROXY_ATTRIB
-    registerProxy(result, PROXY_ATTRIB)
-    return result
-
-ctypedef xmlNode* (*_node_to_node_function)(xmlNode*)
-
-cdef class ElementChildIterator:
-    # we keep Python references here to control GC
-    cdef _NodeBase _node
-    cdef _node_to_node_function _next_element
-    def __init__(self, _NodeBase node, reversed=False): # Python ref!
-        cdef xmlNode* c_node
-        if reversed:
-            c_node = _findChildBackwards(node._c_node, 0)
-            self._next_element = _previousElement
-        else:
-            c_node = _findChildForwards(node._c_node, 0)
-            self._next_element = _nextElement
-        if c_node is NULL:
-            self._node = None
-        else:
-            self._node = _elementFactory(node._doc, c_node)
-    def __iter__(self):
-        return self
-    def __next__(self):
-        cdef xmlNode* c_node
-        cdef _NodeBase current_node
-        # Python ref:
-        current_node = self._node
-        if current_node is None:
-            raise StopIteration
-        c_node = self._next_element(current_node._c_node)
-        if c_node is NULL:
-            self._node = None
-        else:
-            # Python ref:
-            self._node = _elementFactory(current_node._doc, c_node)
-        return current_node
-
-cdef class ElementDepthFirstIterator:
-    """Iterates over an element and its sub-elements in document order (depth
-    first pre-order)."""
-    # we keep Python references here to control GC
-    # keep next node to return and a stack of position state in the tree
-    cdef object _stack
-    cdef _NodeBase _next_node
-    def __init__(self, _NodeBase node not None):
-        cdef xmlNode* c_node
-        self._next_node = node
-        self._stack = []
-        self._findAndPushNextNode(node)
-    def __iter__(self):
-        return self
-    def __next__(self):
-        cdef xmlNode* c_node
-        cdef _NodeBase next_node
-        current_node = self._next_node
-        if current_node is None:
-            raise StopIteration
-        stack = self._stack
-        if python.PyList_GET_SIZE(stack) == 0:
-            self._next_node = None
-            return current_node
-        next_node = stack[-1]
-        self._next_node = next_node
-        self._findAndPushNextNode(next_node)
-        return current_node
-
-    cdef void _findAndPushNextNode(self, _NodeBase node):
-        cdef xmlNode* c_node
-        stack = self._stack
-        # try next child level until we hit a leaf
-        c_node = _findChildForwards(node._c_node, 0)
-        if c_node is NULL:
-            pop = stack.pop
-            while c_node is NULL and python.PyList_GET_SIZE(stack):
-                # walk up the stack until we find a sibling
-                node = pop()
-                c_node = _nextElement(node._c_node)
-        if c_node is not NULL:
-            python.PyList_Append(
-                stack, _elementFactory(node._doc, c_node))
-
-cdef class ElementTagFilter:
-    cdef object _iterator
-    cdef object _pystrings
-    cdef char* _href
-    cdef char* _name
-    def __init__(self, element_iterator, tag):
-        self._iterator = iter(element_iterator)
-        ns_href, name = _getNsTag(tag)
-        self._pystrings = (ns_href, name) # keep Python references
-        self._name = _cstr(name)
-        if ns_href is None:
-            self._href = NULL
-        else:
-            self._href = _cstr(ns_href)
-    def __iter__(self):
-        return self
-    def __next__(self):
-        cdef _NodeBase node
-        while 1:
-            node = self._iterator.next()
-            if self._tagMatches(node._c_node):
-                return node
-
-    cdef int _tagMatches(self, xmlNode* c_node):
-        if tree.strcmp(c_node.name, self._name) == 0:
-            if c_node.ns == NULL or c_node.ns.href == NULL:
-                return self._href == NULL
-            else:
-                return tree.strcmp(c_node.ns.href, self._href) == 0
-        return 0
-
-cdef xmlNode* _createElement(xmlDoc* c_doc, object name_utf,
-                             object attrib, object extra) except NULL:
-    cdef xmlNode* c_node
-    if extra:
-        if attrib is None:
-            attrib = extra
-        else:
-            attrib.update(extra)
-    c_node = tree.xmlNewDocNode(c_doc, NULL, _cstr(name_utf), NULL)
-    if attrib:
-        for name, value in attrib.items():
-            attr_name_utf = _utf8(name)
-            value_utf = _utf8(value)
-            tree.xmlNewProp(c_node, _cstr(attr_name_utf), _cstr(value_utf))
-    return c_node
-
-cdef xmlNode* _createComment(xmlDoc* c_doc, char* text):
-    cdef xmlNode* c_node
-    c_node = tree.xmlNewDocComment(c_doc, text)
-    return c_node
-
-
-# module-level API for ElementTree
-
-def Element(_tag, attrib=None, nsmap=None, **_extra):
-    cdef xmlNode*  c_node
-    cdef xmlDoc*   c_doc
-    cdef _Document doc
-    ns_utf, name_utf = _getNsTag(_tag)
-    c_doc = _newDoc()
-    c_node = _createElement(c_doc, name_utf, attrib, _extra)
-    tree.xmlDocSetRootElement(c_doc, c_node)
-    doc = _documentFactory(c_doc, None)
-    # add namespaces to node if necessary
-    doc._setNodeNamespaces(c_node, ns_utf, nsmap)
-    return _elementFactory(doc, c_node)
-
-def Comment(text=None):
-    cdef _Document doc
-    cdef xmlNode*  c_node
-    cdef xmlDoc*   c_doc
-    if text is None:
-        text = '  '
-    else:
-        text = ' %s ' % _utf8(text)
-    c_doc = _newDoc()
-    doc = _documentFactory(c_doc, None)
-    c_node = _createComment(c_doc, text)
-    tree.xmlAddChild(<xmlNode*>c_doc, c_node)
-    return _commentFactory(doc, c_node)
-
-def SubElement(_Element _parent not None, _tag,
-               attrib=None, nsmap=None, **_extra):
-    cdef xmlNode*  c_node
-    cdef _Document doc
-    ns_utf, name_utf = _getNsTag(_tag)
-    doc = _parent._doc
-    c_node = _createElement(doc._c_doc, name_utf, attrib, _extra)
-    tree.xmlAddChild(_parent._c_node, c_node)
-    # add namespaces to node if necessary
-    doc._setNodeNamespaces(c_node, ns_utf, nsmap)
-    return _elementFactory(doc, c_node)
-
-def ElementTree(_Element element=None, file=None, parser=None):
-    cdef xmlNode* c_next
-    cdef xmlNode* c_node
-    cdef xmlNode* c_node_copy
-    cdef xmlDoc*  c_doc
-    cdef _ElementTree etree
-    cdef _Document doc
-
-    if element is not None:
-        doc  = element._doc
-    elif file is not None:
-        doc = _parseDocument(file, parser)
-    else:
-        c_doc = _newDoc()
-        doc = _documentFactory(c_doc, parser)
-
-    etree = _elementTreeFactory(doc, element)
-
-##     # XXX what if element and file are both not None?
-##     if element is not None:
-##         c_next = element._c_node.next
-##         tree.xmlDocSetRootElement(etree._c_doc, element._c_node)
-##         _moveTail(c_next, element._c_node)
-##         changeDocumentBelow(element, etree)
-    
-    return etree
-
-def HTML(text):
-    cdef _Document doc
-    doc = _parseMemoryDocument(text, __DEFAULT_HTML_PARSER)
-    return doc.getroot()
-
-def XML(text):
-    cdef _Document doc
-    doc = _parseMemoryDocument(text, __DEFAULT_XML_PARSER)
-    return doc.getroot()
-
-fromstring = XML
-
-cdef class QName:
-    cdef readonly object text
-    def __init__(self, text_or_uri, tag=None):
-        if tag is not None:
-            text_or_uri = "{%s}%s" % (text_or_uri, tag)
-        elif not python.PyString_Check(text_or_uri) and \
-             not python.PyUnicode_Check(text_or_uri):
-            text_or_uri = str(text_or_uri)
-        self.text = text_or_uri
-    def __str__(self):
-        return self.text
-    def __hash__(self):
-        return self.text.__hash__()
-
-def iselement(element):
-    return isinstance(element, _Element)
-
-def dump(_NodeBase elem):
-    assert elem is not None, "Must supply element."
-    # better, but not ET compatible : "_NodeBase elem not None"
-    _dumpToFile(sys.stdout, elem._doc._c_doc, elem._c_node)
-
-def tostring(_NodeBase element, encoding='us-ascii'):
-    cdef _Document doc
-    cdef tree.xmlOutputBuffer* c_buffer
-    cdef tree.xmlCharEncodingHandler* enchandler
-    cdef char* enc
-
-    assert element is not None
-    # better, but not ET compatible : "_NodeBase element not None"
-    
-    #if encoding is None:
-    #    encoding = 'UTF-8'
-    if encoding in ('utf8', 'UTF8', 'utf-8'):
-        encoding = 'UTF-8'
-    doc = element._doc
-    enc = _cstr(encoding)
-    # it is necessary to *and* find the encoding handler *and* use
-    # encoding during output
-    enchandler = tree.xmlFindCharEncodingHandler(enc)
-    c_buffer = tree.xmlAllocOutputBuffer(enchandler)
-    tree.xmlNodeDumpOutput(c_buffer, doc._c_doc, element._c_node, 0, 0,
-                           enc)
-    _dumpNextNode(c_buffer, doc._c_doc, element._c_node, enc)
-    tree.xmlOutputBufferFlush(c_buffer)
-    if c_buffer.conv is not NULL: 
-        result = tree.xmlBufferContent(c_buffer.conv)
-    else:
-        result = tree.xmlBufferContent(c_buffer.buffer)
-    tree.xmlOutputBufferClose(c_buffer)
-    return result
-
-def parse(source, parser=None):
-    """Return an ElementTree object loaded with source elements.  If no parser
-    is provided as second argument, the default parser is used.
-    """
-    cdef _Document doc
-    doc = _parseDocument(source, parser)
-    return ElementTree(doc.getroot())
-
-
-# include submodules
-include "proxy.pxi"      # Proxy handling (element backpointers/memory/etc.)
-include "xmlerror.pxi"   # error and log handling
-include "nsclasses.pxi"  # Namespace implementation and registry
-include "docloader.pxi"  # Support for custom document loaders
-include "parser.pxi"     # XML Parser
-include "xmlid.pxi"      # XMLID and IDDict
-include "extensions.pxi" # XPath/XSLT extension functions
-include "xpath.pxi"      # XPath evaluation
-include "xslt.pxi"       # XSL transformations
-
-
-################################################################################
-# Validation
-
-cdef class _Validator:
-    "Base class for XML validators."
-    cdef _ErrorLog _error_log
-    def __init__(self):
-        self._error_log = _ErrorLog()
-        
-    def validate(self, etree):
-        """Validate the document using this schema.
-
-        Returns true if document is valid, false if not."""
-        return self(etree)
-
-    def assertValid(self, etree):
-        "Raises DocumentInvalid if the document does not comply with the 
schema."
-        if not self(etree):
-            raise DocumentInvalid, "Document does not comply with schema"
-
-    def assert_(self, etree):
-        "Raises AssertionError if the document does not comply with the 
schema."
-        if not self(etree):
-            raise AssertionError, "Document does not comply with schema"
-
-    property error_log:
-        def __get__(self):
-            return self._error_log.copy()
-
-include "relaxng.pxi"   # RelaxNG
-include "xmlschema.pxi" # XMLSchema
-
-
-################################################################################
-# Private helper functions
 
 cdef _Document _documentOrRaise(object input):
     cdef _Document doc

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx       (original)
+++ lxml/trunk/src/lxml/etree.pyx       Fri Apr 28 21:56:36 2006
@@ -2,9 +2,6 @@
 from tree cimport xmlDoc, xmlNode, xmlAttr, xmlNs, _isElement
 from python cimport isinstance, issubclass, hasattr, callable
 from python cimport iter, str, _cstr
-cimport xpath
-cimport xslt
-cimport xmlerror
 cimport xinclude
 cimport c14n
 cimport cstd
@@ -191,27 +188,6 @@
         if node_ns_utf is not None:
             self._setNodeNs(c_node, node_ns_utf)
 
-cdef _Document _parseDocument(source, parser):
-    cdef xmlDoc* c_doc
-    filename = _getFilenameForFile(source)
-    # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...)
-    if not filename and hasattr(source, 'read'):
-        return _parseMemoryDocument(source.read(), parser)
-
-    # Otherwise parse the file directly from the filesystem
-    if filename is None:
-        filename = source
-    # open filename
-    c_doc = _parseDocFromFile(_utf8(filename), parser)
-    return _documentFactory(c_doc, parser)
-
-cdef _Document _parseMemoryDocument(text, parser):
-    cdef xmlDoc* c_doc
-    if python.PyUnicode_Check(text):
-        text = _stripDeclaration(_utf8(text))
-    c_doc = _parseDoc(text, parser)
-    return _documentFactory(c_doc, parser)
-
 cdef _Document _documentFactory(xmlDoc* c_doc, parser):
     cdef _Document result
     result = _Document()
@@ -221,15 +197,6 @@
         parser = __DEFAULT_PARSER
     result._parser = parser.copy()
     return result
-
-# to help with debugging
-cdef void displayNode(xmlNode* c_node, indent):
-    cdef xmlNode* c_child
-    print indent * ' ', <int>c_node
-    c_child = c_node.children
-    while c_child is not NULL:
-        displayNode(c_child, indent + 1)
-        c_child = c_child.next
         
 cdef class _NodeBase:
     """Base class to reference a document object and a libxml node.
@@ -1353,7 +1320,8 @@
 
 # include submodules
 include "proxy.pxi"      # Proxy handling (element backpointers/memory/etc.)
-include "xmlerror.pxi"   # error and log handling
+include "apihelpers.pxi" # Private helper functions
+include "xmlerror.pxi"   # Error and log handling
 include "nsclasses.pxi"  # Namespace implementation and registry
 include "docloader.pxi"  # Support for custom document loaders
 include "parser.pxi"     # XML Parser
@@ -1394,391 +1362,3 @@
 
 include "relaxng.pxi"   # RelaxNG
 include "xmlschema.pxi" # XMLSchema
-
-
-################################################################################
-# Private helper functions
-
-cdef _Document _documentOrRaise(object input):
-    cdef _Document doc
-    doc = _documentOf(input)
-    if doc is None:
-        raise TypeError, "Invalid input object: %s" % type(input)
-    else:
-        return doc
-
-cdef _Document _documentOf(object input):
-    # call this to get the document of a
-    # _Document, _ElementTree or _NodeBase object
-    if isinstance(input, _ElementTree):
-        return (<_ElementTree>input)._doc
-    elif isinstance(input, _NodeBase):
-        return (<_NodeBase>input)._doc
-    elif isinstance(input, _Document):
-        return <_Document>input
-    else:
-        return None
-
-cdef _NodeBase _rootNodeOf(object input):
-    # call this to get the root node of a
-    # _Document, _ElementTree or _NodeBase object
-    if isinstance(input, _ElementTree):
-        return (<_ElementTree>input)._context_node
-    elif isinstance(input, _NodeBase):
-        return <_NodeBase>input
-    elif isinstance(input, _Document):
-        return (<_Document>input).getroot()
-    else:
-        return None
-
-cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node):
-    # build a temporary document that has the given node as root node
-    # note that copy and original must not be modified during its lifetime!!
-    # always call _destroyFakeDoc() after use!
-    cdef xmlNode* c_child
-    cdef xmlNode* c_root
-    cdef xmlDoc*  c_doc
-    c_root = tree.xmlDocGetRootElement(c_base_doc)
-    if c_root == c_node:
-        # already the root node
-        return c_base_doc
-
-    c_doc  = tree.xmlCopyDoc(c_base_doc, 0)        # non recursive!
-    c_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
-
-    c_root.children = c_node.children
-    c_root.last = c_node.last
-    c_root.next = c_root.prev = c_root.parent = NULL
-
-    # store original node
-    c_root._private = c_node
-
-    # divert parent pointers of children
-    c_child = c_root.children
-    while c_child is not NULL:
-        c_child.parent = c_root
-        c_child = c_child.next
-
-    c_doc.children = c_root
-    return c_doc
-
-cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc):
-    # delete a temporary document
-    cdef xmlNode* c_child
-    cdef xmlNode* c_parent
-    cdef xmlNode* c_root
-    if c_doc != c_base_doc:
-        c_root = tree.xmlDocGetRootElement(c_doc)
-
-        # restore parent pointers of children
-        c_parent = <xmlNode*>c_root._private
-        c_child = c_root.children
-        while c_child is not NULL:
-            c_child.parent = c_parent
-            c_child = c_child.next
-
-        # prevent recursive removal of children
-        c_root.children = c_root.last = c_root._private = NULL
-        tree.xmlFreeDoc(c_doc)
-
-cdef object _attributeValue(xmlNode* c_element, xmlNode* c_attrib_node):
-    cdef char* value
-    if c_attrib_node.ns is NULL or c_attrib_node.ns.href is NULL:
-        value = tree.xmlGetNoNsProp(c_element, c_attrib_node.name)
-    else:
-        value = tree.xmlGetNsProp(c_element, c_attrib_node.name,
-                                  c_attrib_node.ns.href)
-    return funicode(value)
-
-cdef _dumpToFile(f, xmlDoc* c_doc, xmlNode* c_node):
-    cdef python.PyObject* o
-    cdef tree.xmlOutputBuffer* c_buffer
-    
-    if not python.PyFile_Check(f):
-        raise ValueError, "Not a file"
-    o = <python.PyObject*>f
-    c_buffer = tree.xmlOutputBufferCreateFile(python.PyFile_AsFile(o), NULL)
-    tree.xmlNodeDumpOutput(c_buffer, c_doc, c_node, 0, 0, NULL)
-    # dump next node if it's a text node
-    _dumpNextNode(c_buffer, c_doc, c_node, NULL)
-    tree.xmlOutputBufferWriteString(c_buffer, '\n')
-    tree.xmlOutputBufferFlush(c_buffer)
-
-cdef _dumpNextNode(tree.xmlOutputBuffer* c_buffer, xmlDoc* c_doc,
-                   xmlNode* c_node, char* encoding):
-    cdef xmlNode* c_next
-    c_next = c_node.next
-    if c_next is not NULL and c_next.type == tree.XML_TEXT_NODE:
-        tree.xmlNodeDumpOutput(c_buffer, c_doc, c_next, 0, 0, encoding)
-
-cdef object _stripDeclaration(object xml_string):
-    xml_string = xml_string.strip()
-    if xml_string[:5] == '<?xml':
-        i = xml_string.find('?>')
-        if i != -1:
-            if xml_string[i+2:i+3] == '\n':
-                i = i+1
-            xml_string = xml_string[i + 2:]
-    return xml_string
-
-cdef _collectText(xmlNode* c_node):
-    """Collect all text nodes and return them as a unicode string.
-
-    Start collecting at c_node.
-    
-    If there was no text to collect, return None
-    """
-    cdef int scount
-    cdef char* text
-    cdef xmlNode* c_node_cur
-    # check for multiple text nodes
-    scount = 0
-    text = NULL
-    c_node_cur = c_node
-    while c_node_cur is not NULL and c_node_cur.type == tree.XML_TEXT_NODE:
-        if c_node_cur.content[0] != c'\0':
-            text = c_node_cur.content
-            scount = scount + 1
-        c_node_cur = c_node_cur.next
-
-    # handle two most common cases first
-    if text is NULL:
-        return None
-    if scount == 1:
-        return funicode(text)
-
-    # the rest is not performance critical anymore
-    result = ''
-    while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
-        result = result + c_node.content
-        c_node = c_node.next
-    return funicode(result)
-
-cdef _removeText(xmlNode* c_node):
-    """Remove all text nodes.
-
-    Start removing at c_node.
-    """
-    cdef xmlNode* c_next
-    while c_node is not NULL and c_node.type == tree.XML_TEXT_NODE:
-        c_next = c_node.next
-        tree.xmlUnlinkNode(c_node)
-        # XXX cannot safely free in case of direct text node proxies..
-        tree.xmlFreeNode(c_node)
-        c_node = c_next
-
-cdef xmlNode* _findChild(xmlNode* c_node, int index):
-    if index < 0:
-        return _findChildBackwards(c_node, -index - 1)
-    else:
-        return _findChildForwards(c_node, index)
-    
-cdef xmlNode* _findChildForwards(xmlNode* c_node, int index):
-    """Return child element of c_node with index, or return NULL if not found.
-    """
-    cdef xmlNode* c_child
-    cdef int c
-    c_child = c_node.children
-    c = 0
-    while c_child is not NULL:
-        if _isElement(c_child):
-            if c == index:
-                return c_child
-            c = c + 1
-        c_child = c_child.next
-    else:
-        return NULL
-
-cdef xmlNode* _findChildBackwards(xmlNode* c_node, int index):
-    """Return child element of c_node with index, or return NULL if not found.
-    Search from the end.
-    """
-    cdef xmlNode* c_child
-    cdef int c
-    c_child = c_node.last
-    c = 0
-    while c_child is not NULL:
-        if _isElement(c_child):
-            if c == index:
-                return c_child
-            c = c + 1
-        c_child = c_child.prev
-    else:
-        return NULL
-    
-cdef xmlNode* _nextElement(xmlNode* c_node):
-    """Given a node, find the next sibling that is an element.
-    """
-    c_node = c_node.next
-    while c_node is not NULL:
-        if _isElement(c_node):
-            return c_node
-        c_node = c_node.next
-    return NULL
-
-cdef xmlNode* _previousElement(xmlNode* c_node):
-    """Given a node, find the next sibling that is an element.
-    """
-    c_node = c_node.prev
-    while c_node is not NULL:
-        if _isElement(c_node):
-            return c_node
-        c_node = c_node.prev
-    return NULL
-
-cdef void _removeNode(xmlNode* c_node):
-    """Unlink and free a node and subnodes if possible.
-    """
-    tree.xmlUnlinkNode(c_node)
-    attemptDeallocation(c_node)
-
-cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target):
-    cdef xmlNode* c_next
-    # tail support: look for any text nodes trailing this node and 
-    # move them too
-    while c_tail is not NULL and c_tail.type == tree.XML_TEXT_NODE:
-        c_next = c_tail.next
-        tree.xmlUnlinkNode(c_tail)
-        tree.xmlAddNextSibling(c_target, c_tail)
-        c_target = c_tail
-        c_tail = c_next
-
-### see etree.h:
-## cdef int _isElement(xmlNode* c_node):
-##     return (c_node.type == tree.XML_ELEMENT_NODE or
-##             c_node.type == tree.XML_COMMENT_NODE)
-
-cdef xmlNode* _deleteSlice(xmlNode* c_node, int start, int stop):
-    """Delete slice, starting with c_node, start counting at start, end at 
stop.
-    """
-    cdef xmlNode* c_next
-    cdef int c
-    if c_node is NULL:
-        return NULL
-    # now start deleting nodes
-    c = start
-    while c_node is not NULL and c < stop:
-        c_next = c_node.next
-        if _isElement(c_node):
-            _removeText(c_node.next)
-            c_next = c_node.next
-            _removeNode(c_node)
-            c = c + 1
-        c_node = c_next
-    return c_node
-
-cdef int isutf8(char* s):
-    cdef char c
-    c = s[0]
-    while c != c'\0':
-        if c & 0x80:
-            return 1
-        s = s + 1
-        c = s[0]
-    return 0
-
-cdef object funicode(char* s):
-    if isutf8(s):
-        return python.PyUnicode_DecodeUTF8(s, tree.strlen(s), NULL)
-    return python.PyString_FromString(s)
-
-cdef object _utf8(object s):
-    if python.PyString_Check(s):
-        assert not isutf8(_cstr(s)), "All strings must be Unicode or ASCII"
-        return s
-    elif python.PyUnicode_Check(s):
-        return python.PyUnicode_AsUTF8String(s)
-    else:
-        raise TypeError, "Argument must be string or unicode."
-
-cdef _getNsTag(tag):
-    """Given a tag, find namespace URI and tag name.
-    Return None for NS uri if no namespace URI available.
-    """
-    cdef char* c_tag
-    cdef char* c_pos
-    cdef int nslen
-    if isinstance(tag, QName):
-        tag = (<QName>tag).text
-    tag = _utf8(tag)
-    c_tag = _cstr(tag)
-    if c_tag[0] == c'{':
-        c_pos = tree.xmlStrchr(c_tag+1, c'}')
-        if c_pos is NULL:
-            raise ValueError, "Invalid tag name"
-        nslen = c_pos - c_tag - 1
-        ns  = python.PyString_FromStringAndSize(c_tag+1, nslen)
-        tag = python.PyString_FromString(c_pos+1)
-    else:
-        ns = None
-    return ns, tag
-    
-cdef object _namespacedName(xmlNode* c_node):
-    cdef char* href
-    cdef char* name
-    name = c_node.name
-    if c_node.ns is NULL or c_node.ns.href is NULL:
-        return funicode(name)
-    else:
-        href = c_node.ns.href
-        s = python.PyString_FromFormat("{%s}%s", href, name)
-        if isutf8(href) or isutf8(name):
-            return python.PyUnicode_FromEncodedObject(s, 'UTF-8', NULL)
-        else:
-            return s
-
-cdef _getFilenameForFile(source):
-    """Given a Python File or Gzip object, give filename back.
-
-    Returns None if not a file object.
-    """
-    # file instances have a name attribute
-    if hasattr(source, 'name'):
-        return source.name
-    # gzip file instances have a filename attribute
-    if hasattr(source, 'filename'):
-        return source.filename
-    return None
-
-cdef void changeDocumentBelow(_NodeBase node, _Document doc, int recursive):
-    """For a node and all nodes below, change document.
-
-    A node can change document in certain operations as an XML
-    subtree can move. This updates all possible proxies in the
-    tree below (including the current node). It also reconciliates
-    namespaces so they're correct inside the new environment.
-    """
-    if recursive:
-        changeDocumentBelowHelper(node._c_node, doc)
-    tree.xmlReconciliateNs(doc._c_doc, node._c_node)
-    
-cdef void changeDocumentBelowHelper(xmlNode* c_node, _Document doc):
-    cdef ProxyRef* ref
-    cdef xmlNode* c_current
-    cdef xmlAttr* c_attr_current
-    cdef _NodeBase proxy
-
-    if c_node is NULL:
-        return
-    # different _c_doc
-    c_node.doc = doc._c_doc
-    
-    if c_node._private is not NULL:
-        ref = <ProxyRef*>c_node._private
-        while ref is not NULL:
-            proxy = <_NodeBase>ref.proxy
-            proxy._doc = doc
-            ref = ref.next
-
-    # adjust all children
-    c_current = c_node.children
-    while c_current is not NULL:
-        changeDocumentBelowHelper(c_current, doc)
-        c_current = c_current.next
-        
-    # adjust all attributes
-    c_attr_current = c_node.properties
-    while c_attr_current is not NULL:
-        changeDocumentBelowHelper(c_current, doc)
-        c_attr_current = c_attr_current.next
-

Modified: lxml/trunk/src/lxml/extensions.pxi
==============================================================================
--- lxml/trunk/src/lxml/extensions.pxi  (original)
+++ lxml/trunk/src/lxml/extensions.pxi  Fri Apr 28 21:56:36 2006
@@ -1,5 +1,7 @@
 # supports for extension functions in XPath and XSLT
 
+cimport xpath
+
 class XPathError(LxmlError):
     pass
 

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi      (original)
+++ lxml/trunk/src/lxml/parser.pxi      Fri Apr 28 21:56:36 2006
@@ -465,3 +465,29 @@
     result = tree.xmlNewDoc("1.0")
     __GLOBAL_PARSER_CONTEXT._initDocDict(result)
     return result
+
+############################################################
+## API level helper functions for _Document creation
+############################################################
+
+cdef _Document _parseDocument(source, parser):
+    cdef xmlDoc* c_doc
+    filename = _getFilenameForFile(source)
+    # Support for unamed file-like object (StringIO, urlgrabber.urlopen, ...)
+    if not filename and hasattr(source, 'read'):
+        return _parseMemoryDocument(source.read(), parser)
+
+    # Otherwise parse the file directly from the filesystem
+    if filename is None:
+        filename = source
+    # open filename
+    c_doc = _parseDocFromFile(_utf8(filename), parser)
+    return _documentFactory(c_doc, parser)
+
+cdef _Document _parseMemoryDocument(text, parser):
+    cdef xmlDoc* c_doc
+    if python.PyUnicode_Check(text):
+        text = _stripDeclaration(_utf8(text))
+    c_doc = _parseDoc(text, parser)
+    return _documentFactory(c_doc, parser)
+

Modified: lxml/trunk/src/lxml/xmlerror.pxi
==============================================================================
--- lxml/trunk/src/lxml/xmlerror.pxi    (original)
+++ lxml/trunk/src/lxml/xmlerror.pxi    Fri Apr 28 21:56:36 2006
@@ -1,5 +1,6 @@
-################################################################################
-# DEBUG setup
+# DEBUG and error logging
+
+cimport xmlerror
 
 # module level API functions
 

Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi       (original)
+++ lxml/trunk/src/lxml/xpath.pxi       Fri Apr 28 21:56:36 2006
@@ -1,4 +1,4 @@
-# XSLT and XPath classes, supports for extension functions
+# XPath evaluation
 
 class XPathContextError(XPathError):
     pass

Modified: lxml/trunk/src/lxml/xslt.pxi
==============================================================================
--- lxml/trunk/src/lxml/xslt.pxi        (original)
+++ lxml/trunk/src/lxml/xslt.pxi        Fri Apr 28 21:56:36 2006
@@ -1,5 +1,7 @@
 # XSLT and XPath classes, supports for extension functions
 
+cimport xslt
+
 class XSLTError(LxmlError):
     pass


<Prev in Thread] Current Thread [Next in Thread>