Source code for eva.utilities.stats

# (C) Copyright 2021-2023 NOAA/NWS/EMC
#
# (C) Copyright 2021-2022 United States Government as represented by the Administrator of the
# National Aeronautics and Space Administration. All Rights Reserved.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.


# --------------------------------------------------------------------------------------------------


import numpy as np

from eva.utilities.utils import slice_var_from_str


# --------------------------------------------------------------------------------------------------


[docs]def get_field_data(logger, field, data_collections):

    """
    Retrieve field data based on the field configuration.

    Args:
        logger (Logger): The logger object for logging messages.
        field (dict): The field configuration containing the field name and optional channel.
        data_collections (DataCollections): The data collections object to retrieve data from.

    Returns:
        ndarray: The flattened and masked field data.
    """

    # Field name
    field_name = field['field_name']

    # Get collection, group, variable name for field
    var_cgv = field_name.split('::')
    if len(var_cgv) != 3:
        logger.abort('In stats_helper the variable \'var_cgv\' does not appear to ' +
                     'be in the required format of collection::group::variable.')

    # Optionally get the channel to plot
    channel = None
    if 'channel' in field:
        channel = field['channel']

    # Get the field data
    field_data = data_collections.get_variable_data(var_cgv[0], var_cgv[1], var_cgv[2], channel)

    # See if we need to slice data
    field_data = slice_var_from_str(field, field_data, logger)

    # Flatten and mask missing data
    field_data = field_data.flatten()
    mask = ~np.isnan(field_data)
    field_data = field_data[mask]

    return field_data


# --------------------------------------------------------------------------------------------------


[docs]def stats_helper(logger, plot_obj, data_collections, config):

    """
    Add specified statistics to a plot.

    Args:
        logger (Logger): The logging object.
        plot_obj: The declarative plotting object.
        data_collections (DataCollections): The eva data collections object.
        config (dict): The input configuration dictionary.

    Returns:
        None
    """

    # List of data to make stats for
    fields = config['fields']

    # List of statistics to include
    stats_variables = config['statistics_variables']

    # Rounding
    digits = config.get('round', 3)

    # Find the max data length in order to format the string
    counts = []
    for field in fields:
        # Field name
        field_name = field['field_name']
        field_data = get_field_data(logger, field, data_collections)
        counts.append(len(field_data))
    n_len = str(len(str(np.max(counts))))

    # Format dictionary
    double_format = "{:.4E}"
    format_dict = {}
    format_dict['n'] = "{:" + n_len + "d}"
    format_dict['min'] = double_format
    format_dict['max'] = double_format
    format_dict['mean'] = double_format
    format_dict['median'] = double_format
    format_dict['std'] = double_format
    format_dict['var'] = double_format

    # Loop over fields and assemble statistics as a string
    for field in fields:

        # Field name
        field_name = field['field_name']

        # Get field data
        field_data = get_field_data(logger, field, data_collections)

        # Stats will fail if the field_data list is empty
        if not field_data.any():
            field_data = [1.0e38, 1.0e38, 1.0e38]

        # Initialize the stats string
        stats_string = ''

        # Loop over statistics list and assemble string
        for index, stats_variable in enumerate(stats_variables):

            if stats_variable in ['n']:
                stat_value = len(field_data)
            elif stats_variable in ['min', 'max', 'mean', 'median', 'std', 'var']:
                stat_value = eval(f'np.nan{stats_variable}(field_data)')
                stat_value = eval(f'np.round(stat_value, {digits})')
            else:
                logger.abort(f'In stats_helper the statistic {stats_variable} is not supported.')

            stat_formatted = format_dict[stats_variable].format(stat_value)
            stats_string = stats_string + f'{stats_variable} = ' + stat_formatted

            if index < len(stats_variables) - 1:
                stats_string = stats_string + ' | '

        # Get the location for the annotation
        x_loc = field.get('xloc', 0.5)
        y_loc = field.get('yloc', -0.15)

        # Get any additional kwargs
        kwargs = field.get('kwargs', {})

        # call plot object method
        plot_obj.add_text(x_loc, y_loc, stats_string, transform='axcoords', ha='center', **kwargs)


# --------------------------------------------------------------------------------------------------