Source code for eva.data.soca_restart

# (C) Copyright 2023- NOAA/NWS/EMC
#
# (C) Copyright 2023- United States Government as represented by the Administrator of the
# National Aeronautics and Space Administration. All Rights Reserved.
#
# This software is licensed under the terms of the Apache Licence Version 2.0
# which can be obtained at http://www.apache.org/licenses/LICENSE-2.0.

# --------------------------------------------------------------------------------------------------

import numpy as np
import xarray as xr
from netCDF4 import Dataset
from eva.utilities.config import get
from eva.data.eva_dataset_base import EvaDatasetBase

# --------------------------------------------------------------------------------------------------


[docs]class SocaRestart(EvaDatasetBase): """ A class for reading and processing SOCA restart data. This class inherits from `EvaDatasetBase` and provides methods to read and process SOCA restart data, including orographic fields and SOCA variables. The processed data is added to the data collections. Args: EvaDatasetBase (class): The base class for EVITA dataset operations. Methods: execute(dataset_config, data_collections, timing): Process SOCA restart data and add it to the data collections. Args: dataset_config (dict): Configuration for the dataset. data_collections (EvaDataCollections): Data collections to which the processed data will be added. timing: Timing information. generate_default_config(filenames, collection_name): Generate the default configuration for the dataset. Args: filenames: Filenames. collection_name: Name of the collection. """
[docs] def execute(self, dataset_config, data_collections, timing): """ Process SOCA restart data and add it to the data collections. Args: dataset_config (dict): Configuration for the dataset. data_collections (EvaDataCollections): Data collections to which the processed data will be added. timing: Timing information. """ # Filenames to be read into this collection # ----------------------------------------- soca_filenames = get(dataset_config, self.logger, 'soca_filenames') geometry_file = get(dataset_config, self.logger, 'geometry_file') # Get missing value threshold # --------------------------- threshold = float(get(dataset_config, self.logger, 'missing_value_threshold', 1.0e20)) # Get collection name # --------------------------- collection_name = dataset_config['name'] # Get the variables to be read # ------------------------- soca_vars = get(dataset_config, self.logger, 'variables', default=[]) coord_vars = get(dataset_config, self.logger, 'coordinate variables', default=None) # Read orographic fields first # ------------------------- var_dict = {} group_name = 'SOCAgrid' for var in coord_vars: dims, data = read_soca(geometry_file, var, self.logger) var_dict[group_name + '::' + var] = (dims, data) # SOCA variables # ------------------------- group_name = 'SOCAVars' for var in soca_vars: dims, data = read_soca(soca_filenames, var, self.logger) var_dict[group_name + '::' + var] = (dims, data) # Create dataset_config from data dictionary # ------------------------- ds = xr.Dataset(var_dict) # Assert that the collection contains at least one variable # ------------------------- if not ds.keys(): self.logger.abort('Collection \'' + collection_name + '\', group \'' + group_name + '\' does not have any variables.') # Add the dataset_config to the collections # ------------------------- data_collections.create_or_add_to_collection(collection_name, ds) # Nan out unphysical values # ------------------------- data_collections.nan_float_values_outside_threshold(threshold) # Display the contents of the collections for helping the user with making plots # ------------------------- data_collections.display_collections()
# ----------------------------------------------------------------------------------------------
[docs] def generate_default_config(self, filenames, collection_name): """ Generate a default configuration for the dataset. This method generates a default configuration for the dataset based on the provided filenames and collection name. It can be used as a starting point for creating a configuration for the dataset. Args: filenames: Filenames or file paths relevant to the dataset. collection_name (str): Name of the collection for the dataset. Returns: dict: A dictionary representing the default configuration for the dataset. """ pass
# --------------------------------------------------------------------------------------------------
[docs]def read_soca(file, variable, logger): """ Read SOCA data from the specified file for the given variable. Args: file (str): Path to the SOCA data file. variable (str): Name of the variable to read. logger (Logger): Logger for logging messages. Returns: tuple: A tuple containing dimensions (list) and data (numpy.ndarray) for the specified variable. """ with Dataset(file, mode='r') as f: try: dims = ["lon", "lat"] if len(f.variables[variable].dimensions) > 3: dims = ["lev", "lon", "lat"] var = np.squeeze(f.variables[variable][:]) except KeyError: logger.abort(f"{variable} is not a valid variable. \nExiting ...") return dims, var
# --------------------------------------------------------------------------------------------------