Source code for fastr.utils.compare

# Copyright 2011-2014 Biomedical Imaging Group Rotterdam, Departments of
# Medical Informatics and Radiology, Erasmus MC, Rotterdam, The Netherlands
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""
Module to compare various fastr specific things such as a execution directory
or a reference directory.
"""

import os

import fastr
from fastr.execution.job import Job
from fastr.execution.networkanalyzer import DefaultNetworkAnalyzer
from fastr.execution.networkchunker import DefaultNetworkChunker
from fastr.helpers.iohelpers import load_gpickle


[docs]def compare_set(set1, set2, path, sub_compare_func, f_args=None, f_kwargs=None): """ Compare two sets and dispatch each item to a sub comparison function :param Iterable set1: first set of items :param Iterable set2: second set of items :param str path: identifier of the data location :param sub_compare_func: function to apply to items :param f_args: args to pass to sub_compare_func :param f_kwargs: kwargs to pass to sub_compare_func :return: generator that iterates over the differences :rtype: generator """ if not isinstance(set1, set): set1 = set(set1) if not isinstance(set2, set): set2 = set(set2) if f_args is None: f_args = () if f_kwargs is None: f_kwargs = {} if set1 != set2: yield ("{} contains different keys, set 1 exclusively" " contains {} and set 2 exclusively contains {}").format( path, set1.difference(set2), set2.difference(set1) ) for item in sorted(set1.intersection(set2)): for diff in sub_compare_func(item, *f_args, **f_kwargs): yield diff
[docs]def compare_execution_dir(path1, path2): # Compare network dumps network_file1 = os.path.join(path1, fastr.planning.network.Network.NETWORK_DUMP_FILE_NAME) network_file2 = os.path.join(path2, fastr.planning.network.Network.NETWORK_DUMP_FILE_NAME) fastr.log.info('Loading network 1: {}'.format(network_file1)) network1 = fastr.planning.network.Network.loadf(network_file1) fastr.log.info('Loading network 2: {}'.format(network_file2)) network2 = fastr.planning.network.Network.loadf(network_file2) fastr.log.debug('Network1 filename: {}'.format(network1.filename)) fastr.log.debug('Network2 filename: {}'.format(network2.filename)) if network1 != network2: yield "Networks dumps are not equal!" else: del network2 # Get the order of the Nodes execution_order = [] # Create execution objects chuncker = DefaultNetworkChunker() analyzer = DefaultNetworkAnalyzer() # Create a network chuncker to Chunk the Network in executable blocks chunks = chuncker.chunck_network(network1) for chunk in chunks: # Create a network analyzer to create the optimal execution order execution_order.extend(analyzer.analyze_network(None, chunk)) fastr.log.debug('Execution order: "{}"'.format(execution_order)) # Compare node outputs in execution order for node in execution_order: fastr.log.debug('Checking node {}'.format(node.id)) # Get the sample present node_dir1 = os.path.join(path1, node.id) node_dir2 = os.path.join(path2, node.id) if isinstance(node, fastr.SourceNode): # Possible source nodes do not exist if not os.path.isdir(node_dir1): if not os.path.isdir(node_dir2): # Non-existing in both continue else: yield("NodeRun '{}' does not have output for result 2") continue elif not os.path.isdir(node_dir2): yield("NodeRun '{}' does not have output for result 1") continue for diff in compare_set( os.listdir(node_dir1), os.listdir(node_dir2), node.id, compare_job_dirs, (node, node_dir1, node_dir2)): yield diff
[docs]def compare_job_dirs(sample, node, node_dir1, node_dir2): fastr.log.debug('Checking sample {}'.format(sample)) result1 = os.path.join(node_dir1, sample, Job.RESULT_DUMP) result2 = os.path.join(node_dir2, sample, Job.RESULT_DUMP) result1_exists = os.path.exists(result1) result2_exists = os.path.exists(result2) if result1_exists and not result2_exists: yield "Ouput data for {} exists, but {} does not exist".format( result1, result2, ) elif not result1_exists and result2_exists: yield "Ouput data for {} exists, but {} does not exist".format( result2, result1, ) if result1_exists and result2_exists: job1 = load_gpickle(result1) job2 = load_gpickle(result2) for diff in compare_set( list(job1.output_data.keys()), list(job2.output_data.keys()), '{}/{}'.format(job1.node_id, job1.sample_id), compare_job_output_data, (job1, job2)): yield diff
[docs]def compare_job_output_data(output, job1, job2): # Compare output data data1 = job1.output_data[output] data2 = job2.output_data[output] if isinstance(data1, list): for diff in compare_value_list(data1, data2, '{}/{}'.format(job1.id, output)): yield diff else: for diff in compare_set( list(data1.keys()), list(data2.keys()), '{}/{}'.format(job1.node_id, job1.sample_id), compare_value_dict_item, (data1, data2, '{}/{}'.format(job1.id, output))): yield diff
[docs]def compare_value_dict_item(key, data1, data2, path): if key is not None: data1 = data1[key] data2 = data2[key] path = '{}/{}'.format(path, key) for diff in compare_value_list(data1, data2, path): yield diff
[docs]def compare_value_list(data1, data2, path, key=None): if key is not None: data1 = data1[key] data2 = data2[key] fastr.log.debug('Job1 data: {}'.format(data1)) fastr.log.debug('Job2 data: {}'.format(data2)) if len(data1) != len(data2): yield "Cardinality for {} differs ({} vs {})".format( path, len(data1), len(data2), ) else: for index, (item1, item2) in enumerate(zip(data1, data2)): if item1 != item2: yield "Output data for path {}, index {} does not match ({} vs {})".format( path, index, item1, item2 )