Source code for gunittest.checkers

"""
GRASS Python testing framework checkers

Copyright (C) 2014 by the GRASS Development Team
This program is free software under the GNU General Public
License (>=v2). Read the file COPYING that comes with GRASS GIS
for details.

:authors: Vaclav Petras, Soeren Gebbert
"""

import os
import sys
import re
import doctest
import hashlib

from grass.script.utils import encode

try:
    from grass.script.core import KeyValue
except (ImportError, AttributeError):
    # TODO: we are silent about the error and use a object with different
    # interface, should be replaced by central keyvalue module
    # this can happen when translations are not available
    # TODO: grass should survive and give better errors when translations are not
    # available
    # even the lazy loading after first _ call would be interesting
    # File "...grass/script/core.py", line 40, in <module>
    # AttributeError: 'NoneType' object has no attribute 'endswith'
    KeyValue = dict

# alternative term to check(er(s)) would be compare


[docs]def unify_projection(dic):
    """Unifies names of projections.

    Some projections are referred using different names like
    'Universal Transverse Mercator' and 'Universe Transverse Mercator'.
    This function replaces synonyms by a unified name.

    Example of common typo in UTM replaced by correct spelling::

        >>> unify_projection({"name": ["Universe Transverse Mercator"]})
        {'name': ['Universal Transverse Mercator']}

    :param dic: The dictionary containing information about projection

    :return: The dictionary with the new values if needed or a copy of old one
    """
    # the lookup variable is a list of list, each list contains all the
    # possible name for a projection system
    lookup = [["Universal Transverse Mercator", "Universe Transverse Mercator"]]
    dic = dict(dic)
    for item in lookup:
        for n in range(len(dic["name"])):
            if dic["name"][n] in item:
                dic["name"][n] = item[0]
    return dic


[docs]def unify_units(dic):
    """Unifies names of units.

    Some units have different spelling although they are the same units.
    This functions replaces different spelling options by unified one.

    Example of British English spelling replaced by US English spelling::

        >>> unify_units({"units": ["metres"], "unit": ["metre"]})  # doctest: +SKIP
        {'units': ['meters'], 'unit': ['meter']}

    :param dic: The dictionary containing information about units

    :return: The dictionary with the new values if needed or a copy of old one
    """
    # the lookup variable is a list of list, each list contains all the
    # possible name for a units
    lookup = [
        ["meter", "metre"],
        ["meters", "metres"],
        ["Meter", "Metre"],
        ["Meters", "Metres"],
        ["kilometer", "kilometre"],
        ["kilometers", "kilometres"],
        ["Kilometer", "Kilometre"],
        ["Kilometers", "Kilometres"],
    ]
    dic = dict(dic)
    for item in lookup:
        if not isinstance(dic["unit"], str):
            for n in range(len(dic["unit"])):
                if dic["unit"][n] in item:
                    dic["unit"][n] = item[0]
        else:
            if dic["unit"] in item:
                dic["unit"] = item[0]
        if not isinstance(dic["units"], str):
            for n in range(len(dic["units"])):
                if dic["units"][n] in item:
                    dic["units"][n] = item[0]
        else:
            if dic["units"] in item:
                dic["units"] = item[0]
    return dic


[docs]def value_from_string(value):
    """Create value of a most fitting type from a string.

    Type conversions are applied in order ``int``, ``float``, ``string``
    where string is no conversion.

    >>> value_from_string("1")
    1
    >>> value_from_string("5.6")
    5.6
    >>> value_from_string("  5.6\t  ")
    5.6
    >>> value_from_string("hello")
    'hello'
    """
    not_float = False
    not_int = False
    # Convert values into correct types
    # We first try integer then float because
    # int('1.0') is ValueError (although int(1.0) is not)
    # while float('1') is not
    try:
        value_converted = int(value)
    except ValueError:
        not_int = True
    if not_int:
        try:
            value_converted = float(value)
        except ValueError:
            not_float = True
    # strip strings from whitespace (expecting spaces and tabs)
    if not_int and not_float:
        value_converted = value.strip()
    return value_converted


# TODO: what is the default separator?
[docs]def text_to_keyvalue(
    text,
    sep=":",
    val_sep=",",
    functions=None,
    skip_invalid=False,
    skip_empty=False,
    from_string=value_from_string,
):
    """Convert test to key-value pairs (dictionary-like KeyValue object).

    Converts a key-value text file, where entries are separated
    by newlines and the key and value are separated by `sep`,
    into a key-value dictionary and discovers/uses the correct
    data types (float, int or string) for values.

    Besides key-value pairs it also parses values itself. Value is created
    with the best fitting type using `value_from_string()` function by default.
    When val_sep is present in value part, the resulting value is
    a list of values.

    :param text: string to convert
    :param sep: character that separates the keys and values
    :param val_sep: character that separates the values of a single key
    :param functions: list of functions to apply on the resulting dictionary
    :param skip_invalid: skip all lines which does not contain separator
    :param skip_empty: skip empty lines
    :param from_string: a function used to convert strings to values,
        use ``lambda x: x`` for no conversion

    :return: a dictionary representation of text
    :return type: grass.script.core.KeyValue or dict

    And example of converting text with text, floats, integers and list
    to a dictionary::

        >>> sorted(
        ...     text_to_keyvalue(
        ...         '''a: Hello
        ... b: 1.0
        ... c: 1,2,3,4,5
        ... d : hello,8,0.1'''
        ...     ).items()
        ... )  # sorted items from the dictionary
        [('a', 'Hello'), ('b', 1.0), ('c', [1, 2, 3, 4, 5]), ('d', ['hello', 8, 0.1])]

    .. warning::
        And empty string is a valid input because empty dictionary is a valid
        dictionary. You need to test this separately according
        to the circumstances.
    """
    # splitting according to universal newlines approach
    # TODO: add also general split with vsep
    text = text.splitlines()
    kvdict = KeyValue()
    functions = [] if functions is None else functions

    for line in text:
        if line.find(sep) >= 0:
            key, value = line.split(sep, 1)
            key = key.strip()
            value = value.strip()
            # this strip may not be necessary, we strip each item in list
            # and also if there is only one value
        else:
            # lines with no separator (empty or invalid)
            if not line:
                if not skip_empty:
                    # TODO: here should go _ for translation
                    # TODO: the error message is not really informative
                    # in case of skipping lines we may get here with no key
                    msg = "Empty line in the parsed text."
                    if kvdict:
                        # key is the one from previous line
                        msg = (
                            "Empty line in the parsed text."
                            " Previous line's key is <%s>"
                        ) % key
                    raise ValueError(msg)
            else:
                # line contains something but not separator
                if not skip_invalid:
                    # TODO: here should go _ for translation
                    raise ValueError(
                        ("Line <{l}> does not contain" " separator <{s}>.").format(
                            l=line, s=sep
                        )
                    )
            # if we get here we are silently ignoring the line
            # because it is invalid (does not contain key-value separator) or
            # because it is empty
            continue
        if value.find(val_sep) >= 0:
            # lists
            values = value.split(val_sep)
            value_list = []
            for value in values:
                value_converted = from_string(value)
                value_list.append(value_converted)
            kvdict[key] = value_list
        else:
            # single values
            kvdict[key] = from_string(value)
    for function in functions:
        kvdict = function(kvdict)
    return kvdict


# TODO: decide if there should be some default for precision
# TODO: define standard precisions for DCELL, FCELL, CELL, mm, ft, cm, ...
# TODO: decide if None is valid, and use some default or no compare
# TODO: is None a valid value for precision?
[docs]def values_equal(value_a, value_b, precision=0.000001):
    """
    >>> values_equal(1.022, 1.02, precision=0.01)
    True
    >>> values_equal([1.2, 5.3, 6.8], [1.1, 5.2, 6.9], precision=0.2)
    True
    >>> values_equal(7, 5, precision=2)
    True
    >>> values_equal(1, 5.9, precision=10)
    True
    >>> values_equal("Hello", "hello")
    False
    """
    # each if body needs to handle only not equal state

    if isinstance(value_a, float) and isinstance(value_b, float):
        # both values are float
        # this could be also changed to is None and raise TypeError
        # in Python 2 None is smaller than anything
        # in Python 3 None < 3 raises TypeError
        precision = float(precision)
        if precision < 0:
            raise ValueError(
                "precision needs to be greater than or equal to zero: {precision} < 0"
            )
        if abs(value_a - value_b) > precision:
            return False

    elif (isinstance(value_a, float) and isinstance(value_b, int)) or (
        isinstance(value_b, float) and isinstance(value_a, int)
    ):
        # on is float the other is int
        # don't accept None
        precision = float(precision)
        # we will apply precision to int-float comparison
        # rather than converting both to integer
        # (as in the original function from grass.script.core)
        if abs(value_a - value_b) > precision:
            return False

    elif (
        isinstance(value_a, int)
        and isinstance(value_b, int)
        and precision
        and int(precision) > 0
    ):
        # both int but precision applies for them
        if abs(value_a - value_b) > precision:
            return False

    elif isinstance(value_a, list) and isinstance(value_b, list):
        if len(value_a) != len(value_b):
            return False
        for i in range(len(value_a)):
            # apply this function for comparison of items in the list
            if not values_equal(value_a[i], value_b[i], precision):
                return False
    else:
        if value_a != value_b:
            return False
    return True


[docs]def keyvalue_equals(
    dict_a, dict_b, precision, def_equal=values_equal, key_equal=None, a_is_subset=False
):
    """Compare two dictionaries.

    .. note::
        Always use keyword arguments for all parameters with defaults.
        It is a good idea to use keyword arguments also for the first
        two parameters.

    An example of key-value texts comparison::

        >>> keyvalue_equals(
        ...     text_to_keyvalue(
        ...         '''a: Hello
        ... b: 1.0
        ... c: 1,2,3,4,5
        ... d: hello,8,0.1'''
        ...     ),
        ...     text_to_keyvalue(
        ...         '''a: Hello
        ... b: 1.1
        ... c: 1,22,3,4,5
        ... d: hello,8,0.1'''
        ...     ),
        ...     precision=0.1,
        ... )
        False

    :param dict_a: first dictionary
    :param dict_b: second dictionary
    :param precision: precision with which the floating point values
        are compared (passed to equality functions)
    :param callable def_equal: function used for comparison by default
    :param dict key_equal: dictionary of functions used for comparison
        of specific keys, `def_equal` is used for the rest,
        keys in dictionary are keys in `dict_a` and `dict_b` dictionaries,
        values are the functions used to comapare the given key
    :param a_is_subset: `True` if `dict_a` is a subset of `dict_b`,
        `False` otherwise

    :return: `True` if identical, `False` if different

    Use `diff_keyvalue()` to get information about differeces.
    You can use this function to find out if there is a difference and then
    use `diff_keyvalue()` to determine all the differences between
    dictionaries.
    """
    key_equal = {} if key_equal is None else key_equal

    if not a_is_subset and sorted(dict_a.keys()) != sorted(dict_b.keys()):
        return False
    b_keys = dict_b.keys() if a_is_subset else None

    # iterate over subset or just any if not a_is_subset
    # check for missing keys in superset
    # compare matching keys
    for key in dict_a.keys():
        if a_is_subset and key not in b_keys:
            return False
        equal_fun = key_equal.get(key, def_equal)
        if not equal_fun(dict_a[key], dict_b[key], precision):
            return False
    return True


# TODO: should the return depend on the a_is_subset parameter?
# this function must have the same interface and behavior as keyvalue_equals
[docs]def diff_keyvalue(
    dict_a, dict_b, precision, def_equal=values_equal, key_equal=None, a_is_subset=False
):
    """Determine the difference of two dictionaries.

    The function returns missing keys and different values for common keys::

        >>> a = {"c": 2, "b": 3, "a": 4}
        >>> b = {"c": 1, "b": 3, "d": 5}
        >>> diff_keyvalue(a, b, precision=0)
        (['d'], ['a'], [('c', 2, 1)])

    You can provide only a subset of values in dict_a, in this case
    first item in tuple is an emptu list::

        >>> diff_keyvalue(a, b, a_is_subset=True, precision=0)
        ([], ['a'], [('c', 2, 1)])

    This function behaves the same as `keyvalue_equals()`.

    :returns: A tuple of lists, fist is list of missing keys in dict_a,
        second missing keys in dict_b and third is a list of mismatched
        values as tuples (key, value_from_a, value_from_b)
    :rtype: (list, list, list)

    Comparing to the Python ``difflib`` package this function does not create
    any difference output. It just returns the dictionaries.
    Comparing to the Python ``unittest`` ``assertDictEqual()``,
    this function does not issues error or exception, it just determines
    what it the difference.
    """
    key_equal = {} if key_equal is None else key_equal

    a_keys = dict_a.keys()
    b_keys = dict_b.keys()

    missing_in_a = []
    missing_in_b = []
    mismatched = []

    if not a_is_subset:
        for key in b_keys:
            if key not in a_keys:
                missing_in_a.append(key)

    # iterate over a, so we know that it is in a
    for key in a_keys:
        # check if it is in b
        if key not in b_keys:
            missing_in_b.append(key)
        else:
            equal_fun = key_equal.get(key, def_equal)
            if not equal_fun(dict_a[key], dict_b[key], precision):
                mismatched.append((key, dict_a[key], dict_b[key]))

    return sorted(missing_in_a), sorted(missing_in_b), sorted(mismatched)


[docs]def proj_info_equals(text_a, text_b):
    """Test if two PROJ_INFO texts are equal."""

    def compare_sums(list_a, list_b, precision):
        """Compare difference of sums of two list using precision"""
        # derived from the code in grass.script.core
        if abs(sum(list_a) - sum(list_b)) > precision:
            return False

    sep = ":"
    val_sep = ","
    key_equal = {"+towgs84": compare_sums}
    dict_a = text_to_keyvalue(
        text_a, sep=sep, val_sep=val_sep, functions=[unify_projection]
    )
    dict_b = text_to_keyvalue(
        text_b, sep=sep, val_sep=val_sep, functions=[unify_projection]
    )
    return keyvalue_equals(
        dict_a, dict_b, precision=0.000001, def_equal=values_equal, key_equal=key_equal
    )


[docs]def proj_units_equals(text_a, text_b):
    """Test if two PROJ_UNITS texts are equal."""

    def lowercase_equals(string_a, string_b, precision=None):
        # we don't need a warning for unused precision
        # pylint: disable=W0613
        """Test equality of two strings ignoring their case using ``lower()``.

        Precision is accepted as require by `keyvalue_equals()` but ignored.
        """
        return string_a.lower() == string_b.lower()

    sep = ":"
    val_sep = ","
    key_equal = {"unit": lowercase_equals, "units": lowercase_equals}
    dict_a = text_to_keyvalue(text_a, sep=sep, val_sep=val_sep, functions=[unify_units])
    dict_b = text_to_keyvalue(text_b, sep, val_sep, functions=[unify_units])
    return keyvalue_equals(
        dict_a, dict_b, precision=0.000001, def_equal=values_equal, key_equal=key_equal
    )


# TODO: support also float (with E, e, inf, nan, ...?) and int (###, ##.)
# http://hg.python.org/cpython/file/943d3e289ab4/Lib/decimal.py#l6098
# perhaps a separate function?
# alternative names: looks like, correspond with/to
# TODO: change checking over lines?
# TODO: change parameter order?
# TODO: the behavior with last \n is strange but now using DOTALL and $
[docs]def check_text_ellipsis(reference, actual):
    r"""
    >>> check_text_ellipsis(
    ...     "Vector map <...> contains ... points.",
    ...     "Vector map <bridges> contains 5268 points.",
    ... )
    True
    >>> check_text_ellipsis(
    ...     "user: ...\\nname: elevation", "user: some_user\\nname: elevation"
    ... )
    True
    >>> check_text_ellipsis("user: ...\\nname: elevation", "user: \\nname: elevation")
    False

    The ellipsis is always considered even if it is followed by another
    dots. Consequently, a dot at the end of the sentence with preceding
    ellipsis will work as well as a line filled with undefined number of dots.

    >>> check_text_ellipsis("The result is ....", "The result is 25.")
    True
    >>> check_text_ellipsis("max ..... ...", "max ....... 6")
    True

    However, there is no way how to express that the dot should be in the
    beginning and the ellipsis is at the end of the group of dots.

    >>> check_text_ellipsis("The result is ....", "The result is .25")
    False

    The matching goes over lines (TODO: should this be changed?):
    >>> check_text_ellipsis("a=11\nb=...", "a=11\nb=22\n")
    True

    This function is based on regular expression containing .+ but no other
    regular expression matching will be done.

    >>> check_text_ellipsis("Result: [569] (...)", "Result: 9 (too high)")
    False
    """
    ref_escaped = re.escape(reference)
    exp = re.compile(r"\\\.\\\.\\\.")  # matching escaped ...
    ref_regexp = exp.sub(".+", ref_escaped) + "$"
    if re.match(ref_regexp, actual, re.DOTALL):
        return True
    else:
        return False


[docs]def check_text_ellipsis_doctest(reference, actual):
    """
    >>> check_text_ellipsis_doctest(
    ...     "user: ...\\nname: elevation", "user: some_user\\nname: elevation"
    ... )
    True
    >>> check_text_ellipsis_doctest(
    ...     "user: ...\\nname: elevation", "user: \\nname: elevation"
    ... )
    True

    This function is using doctest's function to check the result, so we
    will discuss here how the underlying function behaves.

    >>> checker = doctest.OutputChecker()
    >>> checker.check_output(
    ...     "user: some_user\\nname: elevation",
    ...     "user: some_user\\nname: elevation",
    ...     optionflags=None,
    ... )
    True
    >>> checker.check_output(
    ...     "user: user1\\nname: elevation",
    ...     "user: some_user\\nname: elevation",
    ...     optionflags=doctest.ELLIPSIS,
    ... )
    False
    >>> checker.check_output(
    ...     "user: ...\\nname: elevation",
    ...     "user: some_user\\nname: elevation",
    ...     optionflags=doctest.ELLIPSIS,
    ... )
    True

    The ellipsis matches also an empty string, so the following matches:

    >>> checker.check_output(
    ...     "user: ...\\nname: elevation",
    ...     "user: \\nname: elevation",
    ...     optionflags=doctest.ELLIPSIS,
    ... )
    True

    It is robust concerning misspelled matching string but does not allow
    ellipsis followed by a dot, e.g. at the end of the sentence:

    >>> checker.check_output(
    ...     "user: ....\\nname: elevation",
    ...     "user: some_user\\nname: elevation",
    ...     optionflags=doctest.ELLIPSIS,
    ... )
    False
    """
    # this can be also global
    checker = doctest.OutputChecker()
    return checker.check_output(reference, actual, optionflags=doctest.ELLIPSIS)


# optimal size depends on file system and maybe on hasher.block_size
_BUFFER_SIZE = 2**16


# TODO: accept also open file object
[docs]def file_md5(filename):
    """Get MD5 (check) sum of a file."""
    hasher = hashlib.md5()
    with open(filename, "rb") as f:
        buf = f.read(_BUFFER_SIZE)
        while len(buf) > 0:
            hasher.update(buf)
            buf = f.read(_BUFFER_SIZE)
    return hasher.hexdigest()


[docs]def text_file_md5(
    filename, exclude_lines=None, exclude_re=None, prepend_lines=None, append_lines=None
):
    """Get a MD5 (check) sum of a text file.

    Works in the same way as `file_md5()` function but ignores newlines
    characters and excludes lines from the file as well as prepend or
    append them if requested.

    :param exclude_lines: list of strings to be excluded
        (newline characters should not be part of the strings)
    :param exclude_re: regular expression string;
        lines matching this regular expression will not be considered
    :param prepend_lines: list of lines to be prepended to the file
        before computing the sum
    :param append_lines: list of lines  to be appended to the file
        before computing the sum
    """
    hasher = hashlib.md5()
    if exclude_re:
        regexp = re.compile(exclude_re)
    if prepend_lines:
        for line in prepend_lines:
            hasher.update(encode(line))
    with open(filename, "r") as f:
        for line in f:
            # replace platform newlines by standard newline
            if os.linesep != "\n":
                line = line.rstrip(os.linesep) + "\n"
            if exclude_lines and line in exclude_lines:
                continue
            if exclude_re and regexp.match(line):
                continue
            hasher.update(encode(line))
    if append_lines:
        for line in append_lines:
            hasher.update(encode(line))
    return hasher.hexdigest()


[docs]def files_equal_md5(filename_a, filename_b):
    """Check equality of two files according to their MD5 sums"""
    return file_md5(filename_a) == file_md5(filename_b)


[docs]def main():  # pragma: no cover
    """Run the doctest"""
    ret = doctest.testmod()
    return ret.failed


if __name__ == "__main__":  # pragma: no cover
    sys.exit(main())