"""
GRASS Python testing framework checkers
Copyright (C) 2014 by the GRASS Development Team
This program is free software under the GNU General Public
License (>=v2). Read the file COPYING that comes with GRASS GIS
for details.
:authors: Vaclav Petras, Soeren Gebbert
"""
from __future__ import annotations
import doctest
import hashlib
import os
import re
import sys
from typing import TYPE_CHECKING, Any
from grass.script.utils import encode
if TYPE_CHECKING:
from collections.abc import Callable, Mapping
try:
from grass.script.core import KeyValue
except (ImportError, AttributeError):
# TODO: we are silent about the error and use a object with different
# interface, should be replaced by central keyvalue module
# this can happen when translations are not available
# TODO: grass should survive and give better errors when translations are not
# available
# even the lazy loading after first _ call would be interesting
# File "...grass/script/core.py", line 40, in <module>
# AttributeError: 'NoneType' object has no attribute 'endswith'
KeyValue = dict
# alternative term to check(er(s)) would be compare
[docs]def unify_projection(dic):
"""Unifies names of projections.
Some projections are referred using different names like
'Universal Transverse Mercator' and 'Universe Transverse Mercator'.
This function replaces synonyms by a unified name.
Example of common typo in UTM replaced by correct spelling::
>>> unify_projection({"name": ["Universe Transverse Mercator"]})
{'name': ['Universal Transverse Mercator']}
:param dic: The dictionary containing information about projection
:return: The dictionary with the new values if needed or a copy of old one
"""
# the lookup variable is a list of list, each list contains all the
# possible name for a projection system
lookup = [["Universal Transverse Mercator", "Universe Transverse Mercator"]]
dic = dict(dic)
for item in lookup:
for n in range(len(dic["name"])):
if dic["name"][n] in item:
dic["name"][n] = item[0]
return dic
[docs]def unify_units(dic):
"""Unifies names of units.
Some units have different spelling although they are the same units.
This functions replaces different spelling options by unified one.
Example of British English spelling replaced by US English spelling::
>>> unify_units({"units": ["metres"], "unit": ["metre"]}) # doctest: +SKIP
{'units': ['meters'], 'unit': ['meter']}
:param dic: The dictionary containing information about units
:return: The dictionary with the new values if needed or a copy of old one
"""
# the lookup variable is a list of list, each list contains all the
# possible name for a units
lookup = [
["meter", "metre"],
["meters", "metres"],
["Meter", "Metre"],
["Meters", "Metres"],
["kilometer", "kilometre"],
["kilometers", "kilometres"],
["Kilometer", "Kilometre"],
["Kilometers", "Kilometres"],
]
dic = dict(dic)
for item in lookup:
if not isinstance(dic["unit"], str):
for n in range(len(dic["unit"])):
if dic["unit"][n] in item:
dic["unit"][n] = item[0]
else: # noqa: PLR5501
if dic["unit"] in item:
dic["unit"] = item[0]
if not isinstance(dic["units"], str):
for n in range(len(dic["units"])):
if dic["units"][n] in item:
dic["units"][n] = item[0]
else: # noqa: PLR5501
if dic["units"] in item:
dic["units"] = item[0]
return dic
[docs]def value_from_string(value):
"""Create value of a most fitting type from a string.
Type conversions are applied in order ``int``, ``float``, ``string``
where string is no conversion.
>>> value_from_string("1")
1
>>> value_from_string("5.6")
5.6
>>> value_from_string(" 5.6\t ")
5.6
>>> value_from_string("hello")
'hello'
"""
not_float = False
not_int = False
# Convert values into correct types
# We first try integer then float because
# int('1.0') is ValueError (although int(1.0) is not)
# while float('1') is not
try:
value_converted = int(value)
except ValueError:
not_int = True
if not_int:
try:
value_converted = float(value)
except ValueError:
not_float = True
# strip strings from whitespace (expecting spaces and tabs)
if not_int and not_float:
value_converted = value.strip()
return value_converted
# TODO: what is the default separator?
[docs]def text_to_keyvalue(
text,
sep=":",
val_sep=",",
functions=None,
skip_invalid=False,
skip_empty=False,
from_string=value_from_string,
):
"""Convert test to key-value pairs (dictionary-like KeyValue object).
Converts a key-value text file, where entries are separated
by newlines and the key and value are separated by `sep`,
into a key-value dictionary and discovers/uses the correct
data types (float, int or string) for values.
Besides key-value pairs it also parses values itself. Value is created
with the best fitting type using `value_from_string()` function by default.
When val_sep is present in value part, the resulting value is
a list of values.
:param text: string to convert
:param sep: character that separates the keys and values
:param val_sep: character that separates the values of a single key
:param functions: list of functions to apply on the resulting dictionary
:param skip_invalid: skip all lines which does not contain separator
:param skip_empty: skip empty lines
:param from_string: a function used to convert strings to values,
use ``lambda x: x`` for no conversion
:return: a dictionary representation of text
:return type: grass.script.core.KeyValue or dict
And example of converting text with text, floats, integers and list
to a dictionary::
>>> sorted(
... text_to_keyvalue(
... '''a: Hello
... b: 1.0
... c: 1,2,3,4,5
... d : hello,8,0.1'''
... ).items()
... ) # sorted items from the dictionary
[('a', 'Hello'), ('b', 1.0), ('c', [1, 2, 3, 4, 5]), ('d', ['hello', 8, 0.1])]
.. warning::
And empty string is a valid input because empty dictionary is a valid
dictionary. You need to test this separately according
to the circumstances.
"""
# splitting according to universal newlines approach
# TODO: add also general split with vsep
text = text.splitlines()
kvdict = KeyValue()
functions = [] if functions is None else functions
for line in text:
if line.find(sep) >= 0:
key, value = line.split(sep, 1)
key = key.strip()
value = value.strip()
# this strip may not be necessary, we strip each item in list
# and also if there is only one value
else:
# lines with no separator (empty or invalid)
if not line:
if not skip_empty:
# TODO: here should go _ for translation
# TODO: the error message is not really informative
# in case of skipping lines we may get here with no key
msg = "Empty line in the parsed text."
if kvdict:
# key is the one from previous line
msg = (
"Empty line in the parsed text."
" Previous line's key is <%s>"
) % key
raise ValueError(msg)
else: # noqa: PLR5501
# line contains something but not separator
if not skip_invalid:
# TODO: here should go _ for translation
msg = ("Line <{l}> does not contain separator <{s}>.").format(
l=line, s=sep
)
raise ValueError(msg)
# if we get here we are silently ignoring the line
# because it is invalid (does not contain key-value separator) or
# because it is empty
continue
if value.find(val_sep) >= 0:
# lists
values = value.split(val_sep)
value_list = []
for value in values:
value_converted = from_string(value)
value_list.append(value_converted)
kvdict[key] = value_list
else:
# single values
kvdict[key] = from_string(value)
for function in functions:
kvdict = function(kvdict)
return kvdict
# TODO: decide if there should be some default for precision
# TODO: define standard precisions for DCELL, FCELL, CELL, mm, ft, cm, ...
# TODO: decide if None is valid, and use some default or no compare
# TODO: is None a valid value for precision?
[docs]def values_equal(value_a, value_b, precision: float = 0.000001) -> bool:
"""
>>> values_equal(1.022, 1.02, precision=0.01)
True
>>> values_equal([1.2, 5.3, 6.8], [1.1, 5.2, 6.9], precision=0.2)
True
>>> values_equal(7, 5, precision=2)
True
>>> values_equal(1, 5.9, precision=10)
True
>>> values_equal("Hello", "hello")
False
""" # noqa: D402; Add a summary
# each if body needs to handle only not equal state
if isinstance(value_a, float) and isinstance(value_b, float):
# both values are float
# this could be also changed to is None and raise TypeError
# in Python 2 None is smaller than anything
# in Python 3 None < 3 raises TypeError
precision = float(precision)
if precision < 0:
msg = "precision needs to be greater than or equal to zero: {precision} < 0"
raise ValueError(msg)
if abs(value_a - value_b) > precision:
return False
elif (isinstance(value_a, float) and isinstance(value_b, int)) or (
isinstance(value_b, float) and isinstance(value_a, int)
):
# on is float the other is int
# don't accept None
precision = float(precision)
# we will apply precision to int-float comparison
# rather than converting both to integer
# (as in the original function from grass.script.core)
if abs(value_a - value_b) > precision:
return False
elif (
isinstance(value_a, int)
and isinstance(value_b, int)
and precision
and int(precision) > 0
):
# both int but precision applies for them
if abs(value_a - value_b) > precision:
return False
elif isinstance(value_a, list) and isinstance(value_b, list):
if len(value_a) != len(value_b):
return False
for i in range(len(value_a)):
# apply this function for comparison of items in the list
if not values_equal(value_a[i], value_b[i], precision):
return False
elif value_a != value_b:
return False
return True
[docs]def keyvalue_equals(
dict_a: Mapping,
dict_b: Mapping,
precision: float,
def_equal: Callable = values_equal,
key_equal: Mapping[Any, Callable] | None = None,
a_is_subset: bool = False,
) -> bool:
"""Compare two dictionaries.
.. note::
Always use keyword arguments for all parameters with defaults.
It is a good idea to use keyword arguments also for the first
two parameters.
An example of key-value texts comparison::
>>> keyvalue_equals(
... text_to_keyvalue(
... '''a: Hello
... b: 1.0
... c: 1,2,3,4,5
... d: hello,8,0.1'''
... ),
... text_to_keyvalue(
... '''a: Hello
... b: 1.1
... c: 1,22,3,4,5
... d: hello,8,0.1'''
... ),
... precision=0.1,
... )
False
:param dict_a: first dictionary
:param dict_b: second dictionary
:param precision: precision with which the floating point values
are compared (passed to equality functions)
:param def_equal: function used for comparison by default
:param key_equal: dictionary of functions used for comparison
of specific keys, `def_equal` is used for the rest,
keys in dictionary are keys in `dict_a` and `dict_b` dictionaries,
values are the functions used to comapare the given key
:param a_is_subset: `True` if `dict_a` is a subset of `dict_b`,
`False` otherwise
:return: `True` if identical, `False` if different
Use `diff_keyvalue()` to get information about differences.
You can use this function to find out if there is a difference and then
use `diff_keyvalue()` to determine all the differences between
dictionaries.
"""
key_equal = {} if key_equal is None else key_equal
if not a_is_subset and sorted(dict_a.keys()) != sorted(dict_b.keys()):
return False
b_keys = dict_b.keys() if a_is_subset else set()
# iterate over subset or just any if not a_is_subset
# check for missing keys in superset
# compare matching keys
for key in dict_a.keys():
if a_is_subset and key not in b_keys:
return False
equal_fun = key_equal.get(key, def_equal)
if not equal_fun(dict_a[key], dict_b[key], precision):
return False
return True
# TODO: should the return depend on the a_is_subset parameter?
# this function must have the same interface and behavior as keyvalue_equals
[docs]def diff_keyvalue(
dict_a, dict_b, precision, def_equal=values_equal, key_equal=None, a_is_subset=False
):
"""Determine the difference of two dictionaries.
The function returns missing keys and different values for common keys::
>>> a = {"c": 2, "b": 3, "a": 4}
>>> b = {"c": 1, "b": 3, "d": 5}
>>> diff_keyvalue(a, b, precision=0)
(['d'], ['a'], [('c', 2, 1)])
You can provide only a subset of values in dict_a, in this case
first item in tuple is an empty list::
>>> diff_keyvalue(a, b, a_is_subset=True, precision=0)
([], ['a'], [('c', 2, 1)])
This function behaves the same as `keyvalue_equals()`.
:returns: A tuple of lists, fist is list of missing keys in dict_a,
second missing keys in dict_b and third is a list of mismatched
values as tuples (key, value_from_a, value_from_b)
:rtype: (list, list, list)
Comparing to the Python ``difflib`` package this function does not create
any difference output. It just returns the dictionaries.
Comparing to the Python ``unittest`` ``assertDictEqual()``,
this function does not issues error or exception, it just determines
what it the difference.
"""
key_equal = {} if key_equal is None else key_equal
a_keys = dict_a.keys()
b_keys = dict_b.keys()
missing_in_a = []
missing_in_b = []
mismatched = []
if not a_is_subset:
for key in b_keys:
if key not in a_keys:
missing_in_a.append(key)
# iterate over a, so we know that it is in a
for key in a_keys:
# check if it is in b
if key not in b_keys:
missing_in_b.append(key)
else:
equal_fun = key_equal.get(key, def_equal)
if not equal_fun(dict_a[key], dict_b[key], precision):
mismatched.append((key, dict_a[key], dict_b[key]))
return sorted(missing_in_a), sorted(missing_in_b), sorted(mismatched)
[docs]def proj_info_equals(text_a, text_b):
"""Test if two PROJ_INFO texts are equal."""
def compare_sums(list_a, list_b, precision):
"""Compare difference of sums of two list using precision"""
# derived from the code in grass.script.core
if abs(sum(list_a) - sum(list_b)) > precision:
return False
sep = ":"
val_sep = ","
key_equal = {"+towgs84": compare_sums}
dict_a = text_to_keyvalue(
text_a, sep=sep, val_sep=val_sep, functions=[unify_projection]
)
dict_b = text_to_keyvalue(
text_b, sep=sep, val_sep=val_sep, functions=[unify_projection]
)
return keyvalue_equals(
dict_a, dict_b, precision=0.000001, def_equal=values_equal, key_equal=key_equal
)
[docs]def proj_units_equals(text_a, text_b):
"""Test if two PROJ_UNITS texts are equal."""
def lowercase_equals(string_a, string_b, precision=None):
# we don't need a warning for unused precision
# pylint: disable=W0613
"""Test equality of two strings ignoring their case using ``lower()``.
Precision is accepted as require by `keyvalue_equals()` but ignored.
"""
return string_a.lower() == string_b.lower()
sep = ":"
val_sep = ","
key_equal = {"unit": lowercase_equals, "units": lowercase_equals}
dict_a = text_to_keyvalue(text_a, sep=sep, val_sep=val_sep, functions=[unify_units])
dict_b = text_to_keyvalue(text_b, sep, val_sep, functions=[unify_units])
return keyvalue_equals(
dict_a, dict_b, precision=0.000001, def_equal=values_equal, key_equal=key_equal
)
# TODO: support also float (with E, e, inf, nan, ...?) and int (###, ##.)
# http://hg.python.org/cpython/file/943d3e289ab4/Lib/decimal.py#l6098
# perhaps a separate function?
# alternative names: looks like, correspond with/to
# TODO: change checking over lines?
# TODO: change parameter order?
# TODO: the behavior with last \n is strange but now using DOTALL and $
[docs]def check_text_ellipsis(reference, actual) -> bool:
r"""
>>> check_text_ellipsis(
... "Vector map <...> contains ... points.",
... "Vector map <bridges> contains 5268 points.",
... )
True
>>> check_text_ellipsis(
... "user: ...\\nname: elevation", "user: some_user\\nname: elevation"
... )
True
>>> check_text_ellipsis("user: ...\\nname: elevation", "user: \\nname: elevation")
False
The ellipsis is always considered even if it is followed by another
dots. Consequently, a dot at the end of the sentence with preceding
ellipsis will work as well as a line filled with undefined number of dots.
>>> check_text_ellipsis("The result is ....", "The result is 25.")
True
>>> check_text_ellipsis("max ..... ...", "max ....... 6")
True
However, there is no way how to express that the dot should be in the
beginning and the ellipsis is at the end of the group of dots.
>>> check_text_ellipsis("The result is ....", "The result is .25")
False
The matching goes over lines (TODO: should this be changed?):
>>> check_text_ellipsis("a=11\nb=...", "a=11\nb=22\n")
True
This function is based on regular expression containing .+ but no other
regular expression matching will be done.
>>> check_text_ellipsis("Result: [569] (...)", "Result: 9 (too high)")
False
""" # noqa: D402; Add a summary
ref_escaped = re.escape(reference)
exp = re.compile(r"\\\.\\\.\\\.") # matching escaped ...
ref_regexp = exp.sub(".+", ref_escaped) + "$"
return bool(re.match(ref_regexp, actual, re.DOTALL))
[docs]def check_text_ellipsis_doctest(reference, actual):
"""
>>> check_text_ellipsis_doctest(
... "user: ...\\nname: elevation", "user: some_user\\nname: elevation"
... )
True
>>> check_text_ellipsis_doctest(
... "user: ...\\nname: elevation", "user: \\nname: elevation"
... )
True
This function is using doctest's function to check the result, so we
will discuss here how the underlying function behaves.
>>> checker = doctest.OutputChecker()
>>> checker.check_output(
... "user: some_user\\nname: elevation",
... "user: some_user\\nname: elevation",
... optionflags=None,
... )
True
>>> checker.check_output(
... "user: user1\\nname: elevation",
... "user: some_user\\nname: elevation",
... optionflags=doctest.ELLIPSIS,
... )
False
>>> checker.check_output(
... "user: ...\\nname: elevation",
... "user: some_user\\nname: elevation",
... optionflags=doctest.ELLIPSIS,
... )
True
The ellipsis matches also an empty string, so the following matches:
>>> checker.check_output(
... "user: ...\\nname: elevation",
... "user: \\nname: elevation",
... optionflags=doctest.ELLIPSIS,
... )
True
It is robust concerning misspelled matching string but does not allow
ellipsis followed by a dot, e.g. at the end of the sentence:
>>> checker.check_output(
... "user: ....\\nname: elevation",
... "user: some_user\\nname: elevation",
... optionflags=doctest.ELLIPSIS,
... )
False
""" # noqa: D402; Add a summary
# this can be also global
checker = doctest.OutputChecker()
return checker.check_output(reference, actual, optionflags=doctest.ELLIPSIS)
# optimal size depends on file system and maybe on hasher.block_size
_BUFFER_SIZE = 2**16
# TODO: accept also open file object
[docs]def file_md5(filename):
"""Get MD5 (check) sum of a file."""
hasher = hashlib.md5()
with open(filename, "rb") as f:
buf = f.read(_BUFFER_SIZE)
while len(buf) > 0:
hasher.update(buf)
buf = f.read(_BUFFER_SIZE)
return hasher.hexdigest()
[docs]def text_file_md5(
filename, exclude_lines=None, exclude_re=None, prepend_lines=None, append_lines=None
):
"""Get a MD5 (check) sum of a text file.
Works in the same way as `file_md5()` function but ignores newlines
characters and excludes lines from the file as well as prepend or
append them if requested.
:param exclude_lines: list of strings to be excluded
(newline characters should not be part of the strings)
:param exclude_re: regular expression string;
lines matching this regular expression will not be considered
:param prepend_lines: list of lines to be prepended to the file
before computing the sum
:param append_lines: list of lines to be appended to the file
before computing the sum
"""
hasher = hashlib.md5()
if exclude_re:
regexp = re.compile(exclude_re)
if prepend_lines:
for line in prepend_lines:
hasher.update(encode(line))
with open(filename) as f:
for line in f:
# replace platform newlines by standard newline
if os.linesep != "\n":
line = line.rstrip(os.linesep) + "\n"
if exclude_lines and line in exclude_lines:
continue
if exclude_re and regexp.match(line):
continue
hasher.update(encode(line))
if append_lines:
for line in append_lines:
hasher.update(encode(line))
return hasher.hexdigest()
[docs]def files_equal_md5(filename_a, filename_b):
"""Check equality of two files according to their MD5 sums"""
return file_md5(filename_a) == file_md5(filename_b)
[docs]def main(): # pragma: no cover
"""Run the doctest"""
ret = doctest.testmod()
return ret.failed
if __name__ == "__main__": # pragma: no cover
sys.exit(main())