Source code for fastr.utils.compare

# Copyright 2011-2014 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Module to compare various fastr specific things such as a execution directory
or a reference directory.
"""

import os

import fastr
from fastr import Network, SourceNode
from fastr.execution.job import Job
from fastr.execution.networkanalyzer import DefaultNetworkAnalyzer
from fastr.utils.iohelpers import load_gpickle


[docs]def compare_execution_dir(path1, path2): # Compare network dumps network_file1 = os.path.join(path1, Network.NETWORK_DUMP_FILE_NAME) network_file2 = os.path.join(path2, Network.NETWORK_DUMP_FILE_NAME) network1 = Network.loadf(network_file1) network2 = Network.loadf(network_file2) fastr.log.debug('Network1 filename: {}'.format(network1.filename)) fastr.log.debug('Network2 filename: {}'.format(network2.filename)) if network1 != network2: yield "Networks dumps are not equal!" else: del network2 # Get the order of the Nodes analyzer = DefaultNetworkAnalyzer() execution_order = analyzer.analyze_network( network1, (None, network1.nodelist.values()) ) fastr.log.debug('Execution order: "{}"'.format(execution_order)) # Compare node outputs in execution order for node in execution_order: fastr.log.debug('Checking node {}'.format(node.id)) # Get the sample present node_dir1 = os.path.join(path1, node.id) node_dir2 = os.path.join(path2, node.id) if isinstance(node, SourceNode): # Possible source nodes do not exist if not os.path.isdir(node_dir1): if not os.path.isdir(node_dir2): # Non-existing in both continue else: yield("Node '{}' does not have output for result 2") continue elif not os.path.isdir(node_dir2): yield("Node '{}' does not have output for result 1") continue samples1 = set(os.listdir(node_dir1)) samples2 = set(os.listdir(node_dir2)) # Compare the samples if samples1 != samples2: yield ("Node '{}' contains different samples, set 1 exclusively" " contains {} and set 2 exclusively contains {}").format( node.id, samples1.difference(samples2), samples2.difference(samples1) ) # Inspect the individual samples that are in both sets for sample in sorted(samples1.intersection(samples2)): fastr.log.debug('Checking sample {}'.format(sample)) result1 = os.path.join(node_dir1, sample, Job.RESULT_DUMP) result2 = os.path.join(node_dir2, sample, Job.RESULT_DUMP) job1 = load_gpickle(result1) job2 = load_gpickle(result2) # Compare output data fastr.log.debug('Job1 data: {}'.format(job1.output_data)) fastr.log.debug('Job2 data: {}'.format(job2.output_data)) if job1.output_data != job2.output_data: yield "Output data for job {} does not match".format(job1.jobid)