logo       
Google Custom Search
    AddThis Social Bookmark Button
-->

r28682 - lxml/trunk/src/lxml: msg#00147

Subject: r28682 - lxml/trunk/src/lxml
Author: scoder
Date: Sun Jun 11 20:32:53 2006
New Revision: 28682

Modified:
   lxml/trunk/src/lxml/etree.pyx
   lxml/trunk/src/lxml/parser.pxi
   lxml/trunk/src/lxml/xpath.pxi
Log:
major cleanup and refactoring in _ParserContext, make clean which methods are 
public, use single code path for setting up thread parser dictionary

Modified: lxml/trunk/src/lxml/etree.pyx
==============================================================================
--- lxml/trunk/src/lxml/etree.pyx       (original)
+++ lxml/trunk/src/lxml/etree.pyx       Sun Jun 11 20:32:53 2006
@@ -306,7 +306,7 @@
     result._c_doc = c_doc
     result._ns_counter = 0
     if parser is None:
-        parser = __GLOBAL_PARSER_CONTEXT._getDefaultParser()
+        parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
     result._parser = parser.copy()
     return result
 

Modified: lxml/trunk/src/lxml/parser.pxi
==============================================================================
--- lxml/trunk/src/lxml/parser.pxi      (original)
+++ lxml/trunk/src/lxml/parser.pxi      Sun Jun 11 20:32:53 2006
@@ -15,15 +15,24 @@
     LXML_HTML_PARSER
 
 cdef class _ParserContext:
-    """Global parser context to share the string dictionary.
-    """
+    # Global parser context to share the string dictionary.
+    #
+    # This class is a singleton!
+    #
+    # It creates _ParserContext objects for each thread to keep thread state,
+    # but those must never be used directly.  Always stick to using the static
+    # __GLOBAL_PARSER_CONTEXT as defined below the class.
+    #
+
     cdef xmlDict* _c_dict
     cdef _BaseParser _default_parser
     def __dealloc__(self):
         if self._c_dict is not NULL:
             xmlparser.xmlDictFree(self._c_dict)
 
-    cdef void _initMainParserContext(self):
+    cdef void initMainParserContext(self):
+        """Put the global context into the thread dictionary of the main
+        thread.  To be called once and only in the main thread."""
         cdef python.PyObject* thread_dict
         cdef python.PyObject* result
         thread_dict = python.PyThreadState_GetDict()
@@ -31,9 +40,10 @@
             python.PyDict_SetItem(<object>thread_dict, "_ParserContext", self)
 
     cdef _ParserContext _findThreadParserContext(self):
-        "Find the _ParserContext for the current thread"
+        "Find (or create) the _ParserContext object for the current thread"
         cdef python.PyObject* thread_dict
         cdef python.PyObject* result
+        cdef _ParserContext context
         thread_dict = python.PyThreadState_GetDict()
         if thread_dict is NULL:
             return self
@@ -45,12 +55,14 @@
         python.PyDict_SetItem(d, "_ParserContext", context)
         return context
 
-    cdef void _setDefaultParser(self, _BaseParser parser):
+    cdef void setDefaultParser(self, _BaseParser parser):
+        "Set the default parser for the current thread"
         cdef _ParserContext context
         context = self._findThreadParserContext()
         context._default_parser = parser
 
-    cdef _BaseParser _getDefaultParser(self):
+    cdef _BaseParser getDefaultParser(self):
+        "Return (or create) the default parser of the current thread"
         cdef _ParserContext context
         context = self._findThreadParserContext()
         if context._default_parser is None:
@@ -59,52 +71,52 @@
             context._default_parser = self._default_parser.copy()
         return context._default_parser
 
-    cdef void _initParserDict(self, xmlParserCtxt* pctxt):
-        "Assure we always use the same string dictionary."
+    cdef xmlDict* _getThreadDict(self, xmlDict* default):
+        "Return the thread-local dict or create a new one if necessary."
         cdef _ParserContext context
         context = self._findThreadParserContext()
-        if context._c_dict is NULL or context._c_dict is pctxt.dict:
+        if context._c_dict is NULL:
+            # thread dict not yet set up => use default or create a new one
+            if default is not NULL:
+                context._c_dict = default
+                xmlparser.xmlDictReference(default)
+                return default
+            if self._c_dict is NULL:
+                self._c_dict = xmlparser.xmlDictCreate()
+            if context is not self:
+                context._c_dict = xmlparser.xmlDictCreateSub(self._c_dict)
+        return context._c_dict
+
+    cdef void _initThreadDictRef(self, xmlDict** c_dict_ref):
+        cdef xmlDict* c_dict
+        cdef xmlDict* c_thread_dict
+        c_dict = c_dict_ref[0]
+        c_thread_dict = self._getThreadDict(c_dict)
+        if c_dict is c_thread_dict:
             return
-        if pctxt.dict is not NULL:
-            xmlparser.xmlDictFree(pctxt.dict)
-        pctxt.dict = context._c_dict
-        xmlparser.xmlDictReference(pctxt.dict)
+        if c_dict is not NULL:
+            xmlparser.xmlDictFree(c_dict)
+        c_dict_ref[0] = c_thread_dict
+        xmlparser.xmlDictReference(c_thread_dict)
 
-    cdef void _initXPathParserDict(self, xpath.xmlXPathContext* pctxt):
+    cdef void initParserDict(self, xmlParserCtxt* pctxt):
         "Assure we always use the same string dictionary."
-        cdef _ParserContext context
-        context = self._findThreadParserContext()
-        if context._c_dict is NULL or context._c_dict is pctxt.dict:
-            return
-        if pctxt.dict is not NULL:
-            xmlparser.xmlDictFree(pctxt.dict)
-        pctxt.dict = context._c_dict
-        xmlparser.xmlDictReference(pctxt.dict)
+        self._initThreadDictRef(&pctxt.dict)
+
+    cdef void initXPathParserDict(self, xpath.xmlXPathContext* pctxt):
+        "Assure we always use the same string dictionary."
+        self._initThreadDictRef(&pctxt.dict)
 
-    cdef void _initDocDict(self, xmlDoc* result):
+    cdef void initDocDict(self, xmlDoc* result):
         "Store dict of last object parsed if no shared dict yet"
-        cdef _ParserContext context
-        if result is NULL:
-            return
-        context = self._findThreadParserContext()
-        if context._c_dict is NULL:
-            #print "storing shared dict"
-            if result.dict is NULL:
-                if self._c_dict is NULL:
-                    result.dict = xmlparser.xmlDictCreate()
-                else:
-                    result.dict = xmlparser.xmlDictCreateSub(self._c_dict)
-            context._c_dict = result.dict
-            xmlparser.xmlDictReference(context._c_dict)
-        elif result.dict != context._c_dict:
-            if result.dict is not NULL:
-                xmlparser.xmlDictFree(result.dict)
-            result.dict = context._c_dict
-            xmlparser.xmlDictReference(result.dict)
+        # XXX We also free the result dict here if there already was one.
+        # This case should only occur for new documents with empty dicts,
+        # otherwise we'd free data that's in use => segfault
+        self._initThreadDictRef(&result.dict)
 
 cdef _ParserContext __GLOBAL_PARSER_CONTEXT
 __GLOBAL_PARSER_CONTEXT = _ParserContext()
-__GLOBAL_PARSER_CONTEXT._initMainParserContext()
+__GLOBAL_PARSER_CONTEXT.initMainParserContext()
 
 ############################################################
 ## support for Python unicode I/O
@@ -391,7 +403,7 @@
         self._error_log.connect()
         try:
             pctxt = self._parser_ctxt
-            __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+            __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
 
             c_text = python.PyUnicode_AS_DATA(utext)
             state = python.PyEval_SaveThread()
@@ -425,7 +437,7 @@
         self._error_log.connect()
         try:
             pctxt = self._parser_ctxt
-            __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+            __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
 
             state = python.PyEval_SaveThread()
             if self._parser_type == LXML_HTML_PARSER:
@@ -452,7 +464,7 @@
         self._error_log.connect()
         try:
             pctxt = self._parser_ctxt
-            __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+            __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
 
             state = python.PyEval_SaveThread()
             if self._parser_type == LXML_HTML_PARSER:
@@ -483,7 +495,7 @@
         self._error_log.connect()
         try:
             pctxt = self._parser_ctxt
-            __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+            __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
             file_context = _FileParserContext(filelike, self._context, 
filename)
             result = file_context._readDoc(
                 pctxt, self._parse_options, self._parser_type)
@@ -519,7 +531,7 @@
 
     if result is not NULL:
         if ctxt.wellFormed or recover:
-            __GLOBAL_PARSER_CONTEXT._initDocDict(result)
+            __GLOBAL_PARSER_CONTEXT.initDocDict(result)
         else:
             # free broken document
             tree.xmlFreeDoc(result)
@@ -607,7 +619,7 @@
     pctxt = xmlparser.xmlNewParserCtxt()
     if pctxt is NULL:
         return NULL
-    __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+    __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
     pctxt._private = <python.PyObject*>context
     c_doc = xmlparser.xmlCtxtReadDoc(
         pctxt, c_text, NULL, NULL, options)
@@ -627,7 +639,7 @@
     pctxt = xmlparser.xmlNewParserCtxt()
     if pctxt is NULL:
         return NULL
-    __GLOBAL_PARSER_CONTEXT._initParserDict(pctxt)
+    __GLOBAL_PARSER_CONTEXT.initParserDict(pctxt)
     pctxt._private = <python.PyObject*>context
     c_doc = xmlparser.xmlCtxtReadFile(
         pctxt, c_filename, NULL, options)
@@ -642,7 +654,7 @@
 cdef XMLParser __DEFAULT_XML_PARSER
 __DEFAULT_XML_PARSER = XMLParser()
 
-__GLOBAL_PARSER_CONTEXT._setDefaultParser(__DEFAULT_XML_PARSER)
+__GLOBAL_PARSER_CONTEXT.setDefaultParser(__DEFAULT_XML_PARSER)
 
 def set_default_parser(_BaseParser parser=None):
     """Set a default parser for the current thread.  This parser is used
@@ -656,10 +668,10 @@
     """
     if parser is None:
         parser = __DEFAULT_XML_PARSER
-    __GLOBAL_PARSER_CONTEXT._setDefaultParser(parser)
+    __GLOBAL_PARSER_CONTEXT.setDefaultParser(parser)
 
 def get_default_parser():
-    return __GLOBAL_PARSER_CONTEXT._getDefaultParser()
+    return __GLOBAL_PARSER_CONTEXT.getDefaultParser()
 
 ############################################################
 ## HTML parser
@@ -708,7 +720,7 @@
     cdef char* c_text
     cdef Py_ssize_t c_len
     if parser is None:
-        parser = __GLOBAL_PARSER_CONTEXT._getDefaultParser()
+        parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
     if not filename:
         c_filename = NULL
     else:
@@ -722,27 +734,27 @@
 
 cdef xmlDoc* _parseDocFromFile(filename, _BaseParser parser) except NULL:
     if parser is None:
-        parser = __GLOBAL_PARSER_CONTEXT._getDefaultParser()
+        parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
     return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename))
 
 cdef xmlDoc* _parseDocFromFilelike(source, filename,
                                    _BaseParser parser) except NULL:
     cdef char* c_filename
     if parser is None:
-        parser = __GLOBAL_PARSER_CONTEXT._getDefaultParser()
+        parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
     return (<_BaseParser>parser)._parseDocFromFilelike(source, filename)
 
 cdef xmlDoc* _newDoc():
     cdef xmlDoc* result
     result = tree.xmlNewDoc("1.0")
-    __GLOBAL_PARSER_CONTEXT._initDocDict(result)
+    __GLOBAL_PARSER_CONTEXT.initDocDict(result)
     return result
 
 cdef xmlDoc* _copyDoc(xmlDoc* c_doc, int recursive):
     cdef xmlDoc* result
     result = tree.xmlCopyDoc(c_doc, recursive)
     _bugFixURL(c_doc, result)
-    __GLOBAL_PARSER_CONTEXT._initDocDict(result)
+    __GLOBAL_PARSER_CONTEXT.initDocDict(result)
     return result
 
 cdef xmlDoc* _copyDocRoot(xmlDoc* c_doc, xmlNode* c_new_root):
@@ -751,7 +763,7 @@
     cdef xmlNode* c_node
     result = tree.xmlCopyDoc(c_doc, 0) # non recursive
     _bugFixURL(c_doc, result)
-    __GLOBAL_PARSER_CONTEXT._initDocDict(result)
+    __GLOBAL_PARSER_CONTEXT.initDocDict(result)
     c_node = tree.xmlDocCopyNode(c_new_root, result, 1) # recursive
     tree.xmlDocSetRootElement(result, c_node)
     _copyTail(c_new_root.next, c_node)

Modified: lxml/trunk/src/lxml/xpath.pxi
==============================================================================
--- lxml/trunk/src/lxml/xpath.pxi       (original)
+++ lxml/trunk/src/lxml/xpath.pxi       Sun Jun 11 20:32:53 2006
@@ -50,7 +50,7 @@
             self._xpathCtxt, _cstr(name_utf), _wrapXPathObject(value))
 
 cdef void _setupDict(xpath.xmlXPathContext* xpathCtxt):
-    __GLOBAL_PARSER_CONTEXT._initXPathParserDict(xpathCtxt)
+    __GLOBAL_PARSER_CONTEXT.initXPathParserDict(xpathCtxt)
 
 cdef class _XPathEvaluatorBase:
     cdef xpath.xmlXPathContext* _xpathCtxt


<Prev in Thread] Current Thread [Next in Thread>