$include_dir="/home/hyper-archives/boost-commit/include"; include("$include_dir/msg-header.inc") ?>
From: grafikrobot_at_[hidden]
Date: 2008-05-22 16:04:59
Author: grafik
Date: 2008-05-22 16:04:59 EDT (Thu, 22 May 2008)
New Revision: 45667
URL: http://svn.boost.org/trac/boost/changeset/45667
Log:
Switch to using pulldom to be able to handle the very-large bjam XML log files.
Text files modified: 
   trunk/tools/regression/src/process_jam_log.py |   317 ++++++++++++++++++++++----------------- 
   1 files changed, 180 insertions(+), 137 deletions(-)
Modified: trunk/tools/regression/src/process_jam_log.py
==============================================================================
--- trunk/tools/regression/src/process_jam_log.py	(original)
+++ trunk/tools/regression/src/process_jam_log.py	2008-05-22 16:04:59 EDT (Thu, 22 May 2008)
@@ -7,6 +7,7 @@
 import optparse
 import time
 import xml.dom.minidom
+import xml.dom.pulldom
 from xml.sax.saxutils import unescape, escape
 import os.path
 
@@ -77,10 +78,37 @@
     
     def add_log(self):
         if self.input[0]:
-            bjam_log = xml.dom.minidom.parse(self.input[0])
+            bjam_xml = self.input[0]
         else:
-            bjam_log = xml.dom.minidom.parse(self.input[1])
-        self.x(bjam_log.documentElement)
+            bjam_xml = self.input[1]
+        events = xml.dom.pulldom.parse(bjam_xml)
+        context = []
+        test_run = self.results.documentElement
+        for (event,node) in events:
+            if event == xml.dom.pulldom.START_ELEMENT:
+                context.append(node)
+                if node.nodeType == xml.dom.Node.ELEMENT_NODE:
+                    x_f = self.x_name_(*context)
+                    if x_f:
+                        events.expandNode(node)
+                        # expanding eats the end element, hence walking us out one level
+                        context.pop()
+                        # call the translator, and add returned items to the result
+                        items = (x_f[1])(node)
+                        if items:
+                            for item in items:
+                                if item:
+                                    test_run.appendChild(self.results.createTextNode("\n"))
+                                    test_run.appendChild(item)
+            elif event == xml.dom.pulldom.END_ELEMENT:
+                context.pop()
+        #~ Add the log items nwo that we've collected all of them.
+        items = self.log.values()
+        if items:
+            for item in items:
+                if item:
+                    test_run.appendChild(self.results.createTextNode("\n"))
+                    test_run.appendChild(item)
     
     def gen_output(self):
         if self.output:
@@ -93,6 +121,24 @@
     def tostring(self):
         return self.results.toxml('utf-8')
     
+    def x_name_(self, *context, **kwargs):
+        node = None
+        names = [ ]
+        for c in context:
+            if c:
+                if not isinstance(c,xml.dom.Node):
+                    suffix = '_'+c.replace('-','_').replace('#','_')
+                else:
+                    suffix = '_'+c.nodeName.replace('-','_').replace('#','_')
+                    node = c
+                names.append('x')
+                names = map(lambda x: x+suffix,names)
+        if node:
+            for name in names:
+                if hasattr(self,name):
+                    return (name,getattr(self,name))
+        return None
+    
     def x(self, *context, **kwargs):
         node = None
         names = [ ]
@@ -113,26 +159,10 @@
                     assert False, 'Unknown node type %s'%(name)
         return None
     
-    #~ The single top-level build element...
-    def x_build( self, node ):
-        test_run = self.results.documentElement
-        #~ Iterate over the sub-sections in a specific order to build up the
-        #~ cross-reference information and the XML output.
-        for type in ('timestamp','comment','test','targets','action'):
-            items = self.x(node,type)
-            #~ Any items generated by the processing are inteserted into the results.
-            if items:
-                for item in items:
-                    if item:
-                        test_run.appendChild(self.results.createTextNode("\n"))
-                        test_run.appendChild(item)
-        return None
-    
     #~ The timestamp goes to the corresponding attribute in the result.
     def x_build_timestamp( self, node ):
         test_run = self.results.documentElement
-        timestamp = self.get_child(self.get_child(node,tag='timestamp'),tag='#cdata-section').data.strip()
-        test_run.setAttribute('timestamp',timestamp)
+        test_run.setAttribute('timestamp',self.get_data(node).strip())
         return None
     
     #~ Comment file becomes a comment node.
@@ -150,122 +180,122 @@
     #~ Tests are remembered for future reference.
     def x_build_test( self, node ):
         test_run = self.results.documentElement
-        test_node = self.get_child(node,tag='test')
-        while test_node:
-            test_name = test_node.getAttribute('name')
-            self.test[test_name] = {
-                'library' : '/'.join(test_name.split('/')[0:-1]),
-                'test-name' : test_name.split('/')[-1],
-                'test-type' : test_node.getAttribute('type').lower(),
-                'test-program' : self.get_child_data(test_node,tag='source').strip(),
-                'target' : self.get_child_data(test_node,tag='target').strip(),
-                'info' : self.get_child_data(test_node,tag='info',strip=True)
-                }
-            #~ Add a lookup for the test given the test target.
-            self.target_to_test[self.test[test_name]['target']] = test_name
-            #~ print "--- %s\n => %s" %(self.test[test_name]['target'],test_name)
-            test_node = self.get_sibling(test_node.nextSibling,tag='test')
+        test_node = node
+        test_name = test_node.getAttribute('name')
+        self.test[test_name] = {
+            'library' : '/'.join(test_name.split('/')[0:-1]),
+            'test-name' : test_name.split('/')[-1],
+            'test-type' : test_node.getAttribute('type').lower(),
+            'test-program' : self.get_child_data(test_node,tag='source',strip=True),
+            'target' : self.get_child_data(test_node,tag='target',strip=True),
+            'info' : self.get_child_data(test_node,tag='info',strip=True)
+            }
+        #~ Add a lookup for the test given the test target.
+        self.target_to_test[self.test[test_name]['target']] = test_name
+        #~ print "--- %s\n => %s" %(self.test[test_name]['target'],test_name)
         return None
     
     #~ Process the target dependency DAG into an ancestry tree so we can look up
     #~ which top-level library and test targets specific build actions correspond to.
-    def x_build_targets( self, node ):
+    def x_build_targets_target( self, node ):
         test_run = self.results.documentElement
-        target_node = self.get_child(self.get_child(node,tag='targets'),tag='target')
-        while target_node:
-            name = self.get_child_data(target_node,tag='name').strip()
-            path = self.get_child_data(target_node,tag='path').strip()
-            jam_target = self.get_child_data(target_node,tag='jam-target').strip()
-            #~ Map for jam targets to virtual targets.
-            self.target[jam_target] = {
-                'name' : name,
-                'path' : path
-                }
-            #~ Create the ancestry.
-            dep_node = self.get_child(self.get_child(target_node,tag='dependencies'),tag='dependency')
-            while dep_node:
-                child = self.get_data(dep_node).strip()
-                child_jam_target = '<p%s>%s' % (path,child.split('//',1)[1])
-                self.parent[child_jam_target] = jam_target
-                #~ print "--- %s\n  ^ %s" %(jam_target,child_jam_target)
-                dep_node = self.get_sibling(dep_node.nextSibling,tag='dependency')
-            target_node = self.get_sibling(target_node.nextSibling,tag='target')
+        target_node = node
+        name = self.get_child_data(target_node,tag='name',strip=True)
+        path = self.get_child_data(target_node,tag='path',strip=True)
+        jam_target = self.get_child_data(target_node,tag='jam-target',strip=True)
+        #~ print "--- target :: %s" %(name)
+        #~ Map for jam targets to virtual targets.
+        self.target[jam_target] = {
+            'name' : name,
+            'path' : path
+            }
+        #~ Create the ancestry.
+        dep_node = self.get_child(self.get_child(target_node,tag='dependencies'),tag='dependency')
+        while dep_node:
+            child = self.get_data(dep_node,strip=True)
+            child_jam_target = '<p%s>%s' % (path,child.split('//',1)[1])
+            self.parent[child_jam_target] = jam_target
+            #~ print "--- %s\n  ^ %s" %(jam_target,child_jam_target)
+            dep_node = self.get_sibling(dep_node.nextSibling,tag='dependency')
         return None
     
     #~ Given a build action log, process into the corresponding test log and
     #~ specific test log sub-part.
     def x_build_action( self, node ):
         test_run = self.results.documentElement
-        action_node = self.get_child(node,tag='action')
-        while action_node:
-            name = self.get_child(action_node,tag='name')
-            if name:
-                name = self.get_data(name)
-                #~ Based on the action, we decide what sub-section the log
-                #~ should go into.
-                action_type = None
-                if re.match('[^%]+%[^.]+[.](compile)',name):
-                    action_type = 'compile'
-                elif re.match('[^%]+%[^.]+[.](link|archive)',name):
-                    action_type = 'link'
-                elif re.match('[^%]+%testing[.](capture-output)',name):
-                    action_type = 'run'
-                elif re.match('[^%]+%testing[.](expect-failure|expect-success)',name):
-                    action_type = 'result'
-                #~ print "+   [%s] %s %s :: %s" %(action_type,name,'','')
-                if action_type:
-                    #~ Get the corresponding test.
-                    (target,test) = self.get_test(action_node,type=action_type)
-                    #~ And the log node, which we will add the results to.
-                    log = self.get_log(action_node,test)
-                    #~ print "--- [%s] %s %s :: %s" %(action_type,name,target,test)
-                    #~ Collect some basic info about the action.
-                    result_data = "%(info)s\n\n%(command)s\n%(output)s\n" % {
-                        'command' : self.get_action_command(action_node,action_type),
-                        'output' : self.get_action_output(action_node,action_type),
-                        'info' : self.get_action_info(action_node,action_type)
-                        }
-                    #~ For the test result status we find the appropriate node
-                    #~ based on the type of test. Then adjust the result status
-                    #~ acorrdingly. This makes the result status reflect the
-                    #~ expectation as the result pages post processing does not
-                    #~ account for this inversion.
-                    action_tag = action_type
-                    if action_type == 'result':
-                        if re.match(r'^compile',test['test-type']):
-                            action_tag = 'compile'
-                        elif re.match(r'^link',test['test-type']):
-                            action_tag = 'link'
-                        elif re.match(r'^run',test['test-type']):
-                            action_tag = 'run'
-                    #~ The result sub-part we will add this result to.
-                    result_node = self.get_child(log,tag=action_tag)
-                    if action_node.getAttribute('status') == '0':
-                        action_result = 'succeed'
-                    else:
-                        action_result = 'fail'
-                    if not result_node:
-                        #~ If we don't have one already, create it and add the result.
-                        result_node = self.new_text(action_tag,result_data,
-                            result = action_result,
-                            timestamp = action_node.getAttribute('start'))
-                        log.appendChild(self.results.createTextNode("\n"))
-                        log.appendChild(result_node)
+        action_node = node
+        name = self.get_child(action_node,tag='name')
+        if name:
+            name = self.get_data(name)
+            #~ Based on the action, we decide what sub-section the log
+            #~ should go into.
+            action_type = None
+            if re.match('[^%]+%[^.]+[.](compile)',name):
+                action_type = 'compile'
+            elif re.match('[^%]+%[^.]+[.](link|archive)',name):
+                action_type = 'link'
+            elif re.match('[^%]+%testing[.](capture-output)',name):
+                action_type = 'run'
+            elif re.match('[^%]+%testing[.](expect-failure|expect-success)',name):
+                action_type = 'result'
+            #~ print "+   [%s] %s %s :: %s" %(action_type,name,'','')
+            if action_type:
+                #~ Get the corresponding test.
+                (target,test) = self.get_test(action_node,type=action_type)
+                #~ Skip action that have no correspoding test as they are
+                #~ regular build actions and don't need to show up in the
+                #~ regression results.
+                if not test:
+                    return None
+                #~ And the log node, which we will add the results to.
+                log = self.get_log(action_node,test)
+                #~ print "--- [%s] %s %s :: %s" %(action_type,name,target,test)
+                #~ Collect some basic info about the action.
+                result_data = "%(info)s\n\n%(command)s\n%(output)s\n" % {
+                    'command' : self.get_action_command(action_node,action_type),
+                    'output' : self.get_action_output(action_node,action_type),
+                    'info' : self.get_action_info(action_node,action_type)
+                    }
+                #~ For the test result status we find the appropriate node
+                #~ based on the type of test. Then adjust the result status
+                #~ acorrdingly. This makes the result status reflect the
+                #~ expectation as the result pages post processing does not
+                #~ account for this inversion.
+                action_tag = action_type
+                if action_type == 'result':
+                    if re.match(r'^compile',test['test-type']):
+                        action_tag = 'compile'
+                    elif re.match(r'^link',test['test-type']):
+                        action_tag = 'link'
+                    elif re.match(r'^run',test['test-type']):
+                        action_tag = 'run'
+                #~ The result sub-part we will add this result to.
+                result_node = self.get_child(log,tag=action_tag)
+                if action_node.getAttribute('status') == '0':
+                    action_result = 'succeed'
+                else:
+                    action_result = 'fail'
+                if not result_node:
+                    #~ If we don't have one already, create it and add the result.
+                    result_node = self.new_text(action_tag,result_data,
+                        result = action_result,
+                        timestamp = action_node.getAttribute('start'))
+                    log.appendChild(self.results.createTextNode("\n"))
+                    log.appendChild(result_node)
+                else:
+                    #~ For an existing result node we set the status to fail
+                    #~ when any of the individual actions fail, except for result
+                    #~ status.
+                    if action_type != 'result':
+                        result = result_node.getAttribute('result')
+                        if action_node.getAttribute('status') != '0':
+                            result = 'fail'
                     else:
-                        #~ For an existing result node we set the status to fail
-                        #~ when any of the individual actions fail, except for result
-                        #~ status.
-                        if action_type != 'result':
-                            result = result_node.getAttribute('result')
-                            if action_node.getAttribute('status') != '0':
-                                result = 'fail'
-                        else:
-                            result = action_result
-                        result_node.setAttribute('result',result)
-                        result_node.appendChild(self.results.createTextNode("\n"))
-                        result_node.appendChild(self.results.createTextNode(result_data))
-            action_node = self.get_sibling(action_node.nextSibling,tag='action')
-        return self.log.values()
+                        result = action_result
+                    result_node.setAttribute('result',result)
+                    result_node.appendChild(self.results.createTextNode("\n"))
+                    result_node.appendChild(self.results.createTextNode(result_data))
+        return None
     
     #~ The command executed for the action. For run actions we omit the command
     #~ as it's just noise.
@@ -323,7 +353,11 @@
                     }
             test = self.test[lib]
         else:
-            test = self.test[self.target_to_test[self.target[target]['name']]]
+            target_name_ = self.target[target]['name']
+            if self.target_to_test.has_key(target_name_):
+                test = self.test[self.target_to_test[target_name_]]
+            else:
+                test = None
         return (base,test)
     
     #~ Find, or create, the test-log node to add results to.
@@ -357,10 +391,12 @@
     
     #~ XML utilities...
     
-    def get_sibling( self, sibling, tag = None, id = None, name = None ):
+    def get_sibling( self, sibling, tag = None, id = None, name = None, type = None ):
         n = sibling
         while n:
             found = True
+            if type and found:
+                found = found and type == n.nodeType
             if tag and found:
                 found = found and tag == n.nodeName
             if (id or name) and found:
@@ -377,23 +413,30 @@
             n = n.nextSibling
         return None
     
-    def get_child( self, root, tag = None, id = None, name = None ):
-        return self.get_sibling(root.firstChild,tag=tag,id=id,name=name)
+    def get_child( self, root, tag = None, id = None, name = None, type = None ):
+        return self.get_sibling(root.firstChild,tag=tag,id=id,name=name,type=type)
     
     def get_data( self, node, strip = False, default = None ):
         data = None
         if node:
-            if not data:
-                data = self.get_child(node,tag='#text')
-            if not data:
-                data = self.get_child(node,tag='#cdata-section')
-            if data:
-                if not strip:
-                    data = data.data
-                else:
-                    data = data.data.strip()
+            data_node = None
+            if not data_node:
+                data_node = self.get_child(node,tag='#text')
+            if not data_node:
+                data_node = self.get_child(node,tag='#cdata-section')
+            data = ""
+            while data_node:
+                data += data_node.data
+                data_node = data_node.nextSibling
+                if data_node:
+                    if data_node.nodeName != '#text' \
+                        and data_node.nodeName != '#cdata-section':
+                        data_node = None
         if not data:
             data = default
+        else:
+            if strip:
+                data = data.strip()
         return data
     
     def get_child_data( self, root, tag = None, id = None, name = None, strip = False, default = None ):