Source code for eispac.core.read_fit

__all__ = ['read_fit']

import os
import sys
import pathlib
import numpy as np
import h5py
import eispac.core.fitting_functions as fit_fns
from eispac.core.eisfitresult import EISFitResult

def walk_and_load(hdf5_file, hdf5_path, verbose=False):
    """Helper function for loading EISFitResult data from a HDF5 file.

    Walks the internal structure of an HDF5 file and recursively loads data it
    finds along the way.

    Parameters
    ----------
    hdf5_file : h5py.File object
        HDF5 file object currently open in read mode
    hdf5_path : str
        String giving the internal path to a HDF5 group or dataset
    verbose : bool, optional
        If set to True, will print the name of each data variable it read.
        Default is False.

    Returns
    -------
    output : dict, list, scalar, or numpy array
        Loaded data values. In the case of an HDF5 group, will return a dict
        (with possible nested dicts) with the discovered values.
    """
    if isinstance(hdf5_file[hdf5_path], h5py.Group):
        # Create a dict (or list) and recursively loop over each item
        grp_keys = hdf5_file[hdf5_path].keys()
        # if hdf5_path == 'parinfo' or hdf5_path == 'funcinfo':
        #     # Special exception for restoring the 'parinfo' & 'funcinfo' lists
        if all([key.isdigit() for key in grp_keys]):
            # Restoring lists such as parinfo and funcinfo
            grp_list = list()
            for key in grp_keys:
                new_path = hdf5_path+'/'+key
                sub_val = walk_and_load(hdf5_file, new_path, verbose=verbose)
                grp_list.append(sub_val)
            output = grp_list
        else:
            grp_dict = {}
            for key in grp_keys:
                new_path = hdf5_path+'/'+key
                sub_val = walk_and_load(hdf5_file, new_path, verbose=verbose)
                grp_dict[key] = sub_val
            output = grp_dict
    elif isinstance(hdf5_file[hdf5_path], h5py.Dataset):
        # Read dataset from the file
        output = np.array(hdf5_file[hdf5_path])
        if verbose:
            print('   ', hdf5_path)
        if (isinstance(output.dtype, np.bytes_) or str(output.dtype) == 'object'
            or str(output.dtype).startswith('S', 1)):
            # Convert objects, byte or ascii strings to uncode
            try:
                # Old method, for consistency
                output = output.astype(np.unicode_) # strings to unicode
            except:
                # Better handling of unicode characters
                output = np.array(output.item().decode('utf-8'))
        if output.size == 1:
            # Extract value from a single-element array (0- or 1-D arrays only)
            output = output.item()

    return output

# function to read fit dictionary
[docs] def read_fit(filename, verbose=False): """Load an EISFitResult object from an HDF5 file Parameters ---------- filename : str or `pathlib.Path` object String or path to the fit result file that should be loaded. verbose : bool, optional If set to True, will print the name of each data variable read in. Default is False. Returns ------- fit_result : `~eispac.core.EISFitResult` object Copy of the fit results loaded from the file. """ # Input type validation (value checks are implemented later) if not isinstance(filename, (str, pathlib.Path)): print('Error: Please input a valid filepath as ' +'either a string or pathlib.Path object', file=sys.stderr) return None # Parse filename and determine the directory and filename abs_filepath = pathlib.Path(filename).resolve() input_name = str(abs_filepath.name) input_dir = abs_filepath.parent if str(input_dir) == '.': input_dir = pathlib.Path().cwd() fit_filepath = input_dir.joinpath(input_name) if not fit_filepath.is_file(): print('Error: fit result file does not exist, ' + str(filename), file=sys.stderr) return None # Initialize the output EISFitResult object fit_result = EISFitResult(empty=True) # Loop over each EISFitResult attribute and load data structure from the file print('Reading fit result from, ') print(' '+str(filename)) with h5py.File(filename, 'r') as fit_file: top_key_list = list(fit_file.keys()) fit_key_list = list(fit_file['fit'].keys()) for attr_name in fit_file.keys(): attr_val = walk_and_load(fit_file, attr_name, verbose=verbose) setattr(fit_result, attr_name, attr_val) # Restore the fit function fit_result.fit_func = getattr(fit_fns, fit_result.func_name) # Make sure the .fit['Line_ids'] is ALWAYS an array (for code consistency) fit_result.fit['line_ids'] = np.atleast_1d(fit_result.fit['line_ids']) # If version number is missing, try to guess it if 'eispac_version' not in top_key_list: if 'data_units' in top_key_list: fit_result.eispac_version = '0.9.1' else: fit_result.eispac_version = '0.8.0' # Add 'wave_range' to fits saved before 2021-02-19 if 'wave_range' not in fit_key_list: fit_result.fit['wave_range'] = np.zeros(2) fit_result.fit['wave_range'][0] = fit_result.template['data_x'][0] fit_result.fit['wave_range'][1] = fit_result.template['data_x'][-1] # Restore recarray of .meta['wininfo'] (stored in HDF5 as dict of arrays) if 'wininfo' in fit_result.meta.keys(): wi_dict = fit_result.meta['wininfo'] # key_names = list(wi_dict.keys()) # give alphabetical ordering... key_names = ['iwin', 'line_id', 'wvl_min', 'wvl_max', 'nl', 'xs'] key_dtypes = [wi_dict[key].dtype for key in key_names] num_recs = len(wi_dict[key_names[0]]) rec_list = [tuple([wi_dict[key][num] for key in key_names]) for num in range(num_recs)] wininfo_rec = np.rec.fromrecords(rec_list, names=key_names, formats=key_dtypes) fit_result.meta['wininfo'] = wininfo_rec return fit_result