Source code for eva.utilities.stats

# (C) Copyright 2021-2023 NOAA/NWS/EMC
#
# (C) Copyright 2021-2022 United States Government as represented by the Administrator of the
# National Aeronautics and Space Administration. All Rights Reserved.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.


# --------------------------------------------------------------------------------------------------


import numpy as np

from eva.utilities.utils import slice_var_from_str


# --------------------------------------------------------------------------------------------------


[docs]def get_field_data(logger, field, data_collections): """ Retrieve field data based on the field configuration. Args: logger (Logger): The logger object for logging messages. field (dict): The field configuration containing the field name and optional channel. data_collections (DataCollections): The data collections object to retrieve data from. Returns: ndarray: The flattened and masked field data. """ # Field name field_name = field['field_name'] # Get collection, group, variable name for field var_cgv = field_name.split('::') if len(var_cgv) != 3: logger.abort('In stats_helper the variable \'var_cgv\' does not appear to ' + 'be in the required format of collection::group::variable.') # Optionally get the channel to plot channel = None if 'channel' in field: channel = field['channel'] # Get the field data field_data = data_collections.get_variable_data(var_cgv[0], var_cgv[1], var_cgv[2], channel) # See if we need to slice data field_data = slice_var_from_str(field, field_data, logger) # Flatten and mask missing data field_data = field_data.flatten() mask = ~np.isnan(field_data) field_data = field_data[mask] return field_data
# --------------------------------------------------------------------------------------------------
[docs]def stats_helper(logger, plot_obj, data_collections, config): """ Add specified statistics to a plot. Args: logger (Logger): The logging object. plot_obj: The declarative plotting object. data_collections (DataCollections): The eva data collections object. config (dict): The input configuration dictionary. Returns: None """ # List of data to make stats for fields = config['fields'] # List of statistics to include stats_variables = config['statistics_variables'] # Rounding digits = config.get('round', 3) # Find the max data length in order to format the string counts = [] for field in fields: # Field name field_name = field['field_name'] field_data = get_field_data(logger, field, data_collections) counts.append(len(field_data)) n_len = str(len(str(np.max(counts)))) # Format dictionary double_format = "{:.4E}" format_dict = {} format_dict['n'] = "{:" + n_len + "d}" format_dict['min'] = double_format format_dict['max'] = double_format format_dict['mean'] = double_format format_dict['median'] = double_format format_dict['std'] = double_format format_dict['var'] = double_format # Loop over fields and assemble statistics as a string for field in fields: # Field name field_name = field['field_name'] # Get field data field_data = get_field_data(logger, field, data_collections) # Stats will fail if the field_data list is empty if not field_data.any(): field_data = [1.0e38, 1.0e38, 1.0e38] # Initialize the stats string stats_string = '' # Loop over statistics list and assemble string for index, stats_variable in enumerate(stats_variables): if stats_variable in ['n']: stat_value = len(field_data) elif stats_variable in ['min', 'max', 'mean', 'median', 'std', 'var']: stat_value = eval(f'np.nan{stats_variable}(field_data)') stat_value = eval(f'np.round(stat_value, {digits})') else: logger.abort(f'In stats_helper the statistic {stats_variable} is not supported.') stat_formatted = format_dict[stats_variable].format(stat_value) stats_string = stats_string + f'{stats_variable} = ' + stat_formatted if index < len(stats_variables) - 1: stats_string = stats_string + ' | ' # Get the location for the annotation x_loc = field.get('xloc', 0.5) y_loc = field.get('yloc', -0.15) # Get any additional kwargs kwargs = field.get('kwargs', {}) # call plot object method plot_obj.add_text(x_loc, y_loc, stats_string, transform='axcoords', ha='center', **kwargs)
# --------------------------------------------------------------------------------------------------