Source code for jobs.tools.comp_nc

#!/usr/bin/env python
# -*- coding: utf-8 -*-

# Compare two netCDF-files

# Author: dao, david.ochsner@empa.ch

import argparse
import netCDF4 as nc
import numpy as np

colors = {"green": '\033[32m', "red": '\033[31m', "yellow": '\033[33m'}


def ccprint(text, color=None, verbose=True):
    """Print-wrapper that Conditionally prints Colored text

    Parameters
    ----------
    text : str
        Text to print
    color : str
        Color of the text
        One of 'red','green' or 'yellow', or 'None' for black
    verbose : bool
        If 'True' the text is printed, if 'False' nothing happens
    """
    if not verbose:
        return

    if color is not None:
        try:
            print(colors[color] + text + '\033[0m')
        except KeyError:
            raise ValueError("Unrecognized color")
    else:
        print(text)


def import_data(path):
    """Imports the data at path into a netCDF4-Dataset

    Parameters
    ----------
    path : str
        Path to the .nc file
    """
    return nc.Dataset(path)


[docs]def datasets_equal(dataset1, dataset2, variables, verbose=True): """Compare the contents of dataset1 and dataset2 Compare with numpy.isclose whether the two datasets are equal. No check for equality (of the values or bitwise of the files) is performed, as numerical errors can produce slightly different files for essentially identical computations. Rather, the values are compared to absolute and relative tolerances, check np.isclose documentation for more detail. If variables is not empty, only the provided variables are compared. Parameters ---------- dataset1 : netCDF4.Dataset dataset2 : netCDF4.Dataset variables : list of str List of the variables to be compared. If it is empty, all variables are compared. verbose : bool If True, results will be printed to stdout. Returns ------- bool True if the datasets, or if provided the selected variables, are equal, False otherwise. """ if not variables: variables = set(dataset1.variables.keys()) variables2 = set(dataset2.variables.keys()) if not variables == variables2: ccprint("Files don't contain the same variables.", "red", verbose) ccprint("The following variables are in only " "one of the files:", None, verbose) ccprint(variables.symmetric_difference(variables2), None, verbose) ccprint("The common variables are:", None, verbose) ccprint(variables.intersection(variables2), None, verbose) return False else: assert set(dataset1.variables.keys()).issuperset(variables), ( "Dataset 1 doesn't contain all variables that should be compared") assert set(dataset2.variables.keys()).issuperset(variables), ( "Dataset 2 doesn't contain all variables that should be compared") result = True for var in variables: if not dataset1[var].dtype == dataset2[var].dtype: ccprint("{} has different types.".format(var), "red", verbose) result = False if (dataset1[var].dtype in np.sctypes['float'] or dataset1[var].dtype in np.sctypes['int']): if np.allclose(dataset1[var], dataset2[var]): ccprint("{} is equal.".format(var), None, verbose) else: ccprint("{} is not equal".format(var), "red", verbose) result = False else: ccprint( "{} is not a numeric type " "and not compared.".format(var), None, verbose) return result
if __name__ == '__main__': parser = argparse.ArgumentParser("Compare two netCDF files.") parser.add_argument("Dataset1", type=str, help="Path to the first dataset.") parser.add_argument("Dataset2", type=str, help="Path to the second dataset.") parser.add_argument("-v", "--variables", nargs='*', default=[], dest="variables", help="Variables to be compared. If " "none are given, all variables in the files are " "compared.") args = parser.parse_args() if datasets_equal(import_data(args.Dataset1), import_data(args.Dataset2), args.variables): ccprint("Provided files are equal", color="green") else: ccprint("Provided files are not equal", color="red")