Source code for eispac.core.eisfittemplate

__all__ = ['EISFitTemplate']

import sys
import copy
import pathlib
import warnings

import numpy as np
import h5py

if sys.version_info >= (3, 11):
    import tomllib
else:
    import tomli as tomllib

[docs] class EISFitTemplate: """Multigaussian fitting template for use with MPFIT and `~eispac.core.fit_spectra` Parameters ---------- filename : str or `pathlib.Path`, optional Path to the template file. Default is "unknown_file". Note: passing a filename directly to EISFitTemplate will NOT automatically load the file; please use `~eispac.core.read_template` instead. template : dict, optional Dictionary of template parameters. Valid keys include: * ``n_gauss`` (int) - number of Gaussian components * ``n_poly`` (int) - Number of background polynomial terms. Common values are: 0 (no background), 1 (constant), and 2 (linear). * ``line_ids`` (array_like) - Strings giving the line identification for each Gaussian component. For example, "Fe XII 195.119". If not specified, placeholder values of "unknown I {INITAL CENTROID VALUE}" will be used. * ``wmin`` (float) - min wavelength value of data to use for fitting * ``wmax`` (float) - max wavelength value of data to use for fitting parinfo : list or dict, optional Either a list of dicts or a dict of lists giving the initial fitting parameters and constraints. If given a `list`, each entry must be a dict with the correct keys for a single fit parameter. If given a `dict`, each key must contain an array of list for all parameters. The order of parameters is assumed to be sets of [PEAK, CENTROID, WIDTH] for each Gaussian component followed by any coefficients for the background polynomial, starting with the LOWEST (constant) order term first. Valid keys include: * ``value`` (float) - initial parameter guess * ``fixed`` (0 or 1) - If set to "1", will not fit and just use initial value * ``limited`` (two-element array_like) - If set to "1" in the first/second value, will apply and limit to the parameter on the lower/upper side * ``limits`` (two-element array_like) - Values of the limits on the lower/upper side. Both "limited" and "limits" must be give together. * ``tied`` (str) - String defining a fixed relationship between this parameters one or more others. For example "p[0] / 2" would define a parameter to ALWAYS be exactly half the value of the first parameter. Additional keys are available, please see the MPFIT documentation in the EISPAC user's guide for more details. kwargs : value or array_like, optional For convenience, users can also pass in any "template" or "parinfo" value or array as a seperate keyword. Any values defined in this way will take priority and overwrite any previous values for that key. Attributes ---------- template : dict Dictoary of template paramaters parinfo : list of dicts List of parameter constraint dicts """ def __init__(self, filename=None, template=None, parinfo=None, **kwargs): if filename is None: self.filename_temp = 'unknown_file' else: self.filename_temp = str(filename) self.template = dict() if isinstance(template, dict): self.template = copy.deepcopy(template) # Ensure that all keys are lower case self.template = {KEY.lower():VAL for KEY, VAL in self.template.items()} elif template is not None: print('Error: Invalid datatype for "template". Please input a ' +'dictionary. Initializing an empty dict using the other input ' +'parameters.', file=sys.stderr) self.parinfo = dict() if isinstance(parinfo, (list, tuple, dict)): self.parinfo = copy.deepcopy(parinfo) elif parinfo is not None: print('Error: Invalid datatype for "parinfo". Please input a ' +'list of dicts or a dict of lists with the correct keys. ' +'Initializing an empty list of dicts using the other input ' +'parameters.', file=sys.stderr) # Copy over kwargs (if any) to the correct dict or list if len(kwargs) > 0: # Ensure that all keys are lower case kwargs = {KEY.lower():VAL for KEY, VAL in kwargs.items()} for KEY in kwargs.keys(): if KEY in ['component', 'n_gauss', 'n_poly', 'line_ids', 'wmin', 'wmax', 'data_e', 'data_x', 'data_y', 'fit', 'fit_back', 'fit_gauss', 'order']: # .template keys self.template[KEY] = copy.deepcopy(kwargs[KEY]) elif KEY in ['value', 'fixed', 'limited', 'limits', 'tied', 'parname', 'step', 'mpmaxstep', 'mpside', 'mpprint']: # .parinfo keys if isinstance(self.parinfo, dict): self.parinfo[KEY] = copy.deepcopy(kwargs[KEY]) elif isinstance(self.parinfo, list): n_new = len(kwargs[KEY]) n_par = len(self.parinfo) for p in range(min(n_new, n_par)): self.parinfo[p][KEY] = copy.deepcopy(kwargs[KEY][p]) else: print(f'Warning: "{KEY}" is not a valid template or parinfo ' +f'keyword and will be ignored. Please see the docs ' +f'for a list of supported inputs.', file=sys.stderr) # Initialize values of n_gauss and n_poly (if not given) if ('n_gauss' not in self.template.keys()) or ('n_poly' not in self.template.keys()): if len(self.parinfo) > 0: # If parinfo is defined, guess from number of values if isinstance(self.parinfo, (list, tuple)): # Standard list of dicts n_params = len(self.parinfo) elif isinstance(self.parinfo, dict): # Alternative dict of arrays n_params = 1 for KEY in self.parinfo.keys(): n_params = max(n_params, len(self.parinfo[KEY])) # Add the missing value to the template if 'n_gauss' not in self.template.keys(): self.template['n_gauss'] = int(n_params / 3) if 'n_poly' not in self.template.keys(): self.template['n_poly'] = int(n_params % 3) else: # If neither template nor parinfo are defined, use defaults if 'n_gauss' not in self.template.keys(): self.template['n_gauss'] = 1 if 'n_poly' not in self.template.keys(): self.template['n_poly'] = 1 # If parinfo is not given, initialize minimal parinfo list of dicts if len(self.parinfo) == 0: n_gauss = max(1, self.template.get('n_gauss', 1)) n_poly = max(0, self.template.get('n_poly', 1)) par_list = [] for g in range(3*n_gauss): par_list.append({'value': 1.0, 'limited': np.array([1, 0], dtype='int16')}) for p in range(n_poly): par_list.append({'value': 1.0, 'limited': np.array([0, 0], dtype='int16')}) self.parinfo = par_list # Finally, validate the actual values and fix format issues # Note: we need to validate parinfo first, so we can convert the format # and more easily check the template values of n_gauss and n_poly self._validate_parinfo_list() self._validate_template_dict() def _validate_parinfo_list(self): """Helper function for validating the length and keys of .parinfo """ # Default dict for a single parameter default_parinfo = {'fixed': 0, 'limited': np.array([1, 0], dtype='int16'), 'limits': np.zeros(2), 'tied': np.array(' ', dtype='<U16'), 'value': 1.0} # If given a dict of arrays (or lists), convert to a list of dicts if isinstance(self.parinfo, dict): # First, ensure all keys are lower case input_parinfo = {KEY.lower():VAL for KEY, VAL in self.parinfo.items()} if 'value' in input_parinfo.keys(): n_params = len(self.parinfo['value']) else: max_len = 0 for KEY in input_parinfo.keys(): this_len = len(input_parinfo[KEY]) max_len = max(this_len, max_len) n_params = max_len print('Warning: No initial values given in parinfo! Please ' +'define a "value" key for each parameter', file=sys.stderr) # Extract the values and assemble the correct list of dicts par_list = [] for p in range(n_params): info_dict = {} for KEY in input_parinfo.keys(): try: if KEY in ['tied', 'parname']: # strings info_dict[KEY] = np.array(str(input_parinfo[KEY][p]), dtype='<U16') elif KEY in ['limited', 'limits']: # two-element info_dict[KEY] = copy.deepcopy(default_parinfo[KEY]) info_dict[KEY][0] = input_parinfo[KEY][p][0] info_dict[KEY][1] = input_parinfo[KEY][p][1] elif KEY in ['fixed', 'mpside', 'mpprint']: # integers info_dict[KEY] = int(input_parinfo[KEY][p]) elif KEY in ['value', 'step', 'mpmaxstep']: # floats info_dict[KEY] = float(input_parinfo[KEY][p]) else: print(f'Warning: {KEY} is not a valid parinfo key ' +'and will be ignored.', file=sys.stderr) except: # If there is an issue, give a warning and continue print(f'Warning: There was an issue loading the {KEY} ' +f'key for parameter index {p}. Skipping for now; ' +f'please check the format and datatype.', file=sys.stderr) continue par_list.append(info_dict) self.parinfo = par_list # Check for missing keys expected by eispac and fill with default values print_limit_warning = False n_params = len(self.parinfo) for p in range(n_params): # First, ensure all keys are lower case self.parinfo[p] = {KEY.lower():VAL for KEY, VAL in self.parinfo[p].items()} for KEY, VALUE in default_parinfo.items(): # Add missing keys that are required for printing details if KEY not in self.parinfo[p].keys(): self.parinfo[p][KEY] = copy.deepcopy(VALUE) else: # Check shape of two-element keys that DO exist if KEY in ['limited', 'limits']: if len(self.parinfo[p][KEY]) != 2: # Overwrite if given wrong shape self.parinfo[p][KEY] = copy.deepcopy(VALUE) print_limit_warning = True if print_limit_warning: print(f'Warning: incorrect length of "limited" and/or "limits" ' +f'in found parinfo. Both "limited" and "limits" should be ' +f'two-element lists or arrays. Invalid inputs have been ' +f'replaced with default values.', file=sys.stderr) def _validate_template_dict(self): """Helper function for validating the keys and values of .template """ # Ensure that all keys are lower case self.template = {KEY.lower():VAL for KEY, VAL in self.template.items()} # Validate keys actaully used by eispac n_gauss = self.template.get('n_gauss', -1) if isinstance(n_gauss, (int, float)) and int(n_gauss) > 0: self.template['n_gauss'] = int(n_gauss) else: self.template['n_gauss'] = -1 print('Error: Invalid value for n_gauss. ' +'Please input a non-zero integer', file=sys.stderr) n_poly = self.template.get('n_poly', -1) if isinstance(n_poly, (int, float)) and int(n_poly) >= 0: self.template['n_poly'] = int(n_poly) else: self.template['n_poly'] = -1 print('Error: Invalid value for n_poly. ' +'Please input an integer >= 0.', file=sys.stderr) wmin = self.template.get('wmin', 170.0) if isinstance(wmin, (int, float)): self.template['wmin'] = float(wmin) else: self.template['wmin'] = 170.0 print('Error: Invalid datatype for wmin. Please input a float. ' +'Using a default value of 170.', file=sys.stderr) wmax = self.template.get('wmax', 292.0) if isinstance(wmax, (int, float)): self.template['wmax'] = float(wmax) else: self.template['wmax'] = 292.0 print('Error: Invalid datatype for wmax. Please input a float. ' +'Using a default value of 292.', file=sys.stderr) comp_num = self.template.get('component', 1) if isinstance(comp_num, (int, float)): self.template['component'] = int(comp_num) else: self.template['component'] = 1 print('Error: Invalid datatype for component. Please input an ' +'integer. Using a default value of 1.', file=sys.stderr) # Check number of parameters and components n_params = len(self.parinfo) if n_params != (3*self.template['n_gauss'] + self.template['n_poly']): # When mismatched, assume input parinfo is correct # Then, update/overwrite parts of the template as needed n_gauss = int(n_params / 3) n_poly = int(n_params % 3) self.template['n_gauss'] = n_gauss self.template['n_poly'] = n_poly self.template['line_ids'] = np.zeros(n_gauss, dtype='<U32') for g in range(n_gauss): self.template['line_ids'][g] = 'unknown I 001.000' print(f'Warning: the values of n_gauss and n_poly do not match the ' +f'length of parinfo. Defaulting to the best guess values of ' +f'n_gauss = {n_gauss} and n_poly = {n_poly}. Template dict ' +f' values and line_ids have been overwritten to match.', file=sys.stderr) # Check for line_id and convert as needed n_gauss = self.template['n_gauss'] n_poly = self.template['n_poly'] if 'line_ids' not in self.template.keys(): self.template['line_ids'] = np.zeros(n_gauss, dtype='<U32') for g in range(n_gauss): self.template['line_ids'][g] = 'unknown I 001.000' elif isinstance(self.template['line_ids'], (str, int, float)): # Convert bare values to an array (actual values checked later) bare_id = self.template['line_ids'] self.template['line_ids'] = np.array([bare_id], dtype='<U24') else: # Force everything else to be an array of strings current_ids = self.template['line_ids'] self.template['line_ids'] = np.array(current_ids, dtype='<U24') # Trim or expand line_ids array to have the correct length if len(self.template['line_ids']) != n_gauss: old_id_arr = self.template['line_ids'] self.template['line_ids'] = np.zeros(n_gauss, dtype='<U32') for g in range(n_gauss): if g < len(old_id_arr): self.template['line_ids'][g] = old_id_arr[g] else: self.template['line_ids'][g] = 'unknown I 001.000' # Validate format and values of line_id strings for g in range(n_gauss): parinfo_wave_str = f"{self.parinfo[3*g+1]['value']:07.3f}" this_id = self.template['line_ids'][g].lower() if this_id.startswith(('unknown', 'no line')) or len(this_id) < 1: # If a line is unknown, just update the wavelength substring self.template['line_ids'][g] = f"unknown I {parinfo_wave_str}" else: # Parse the ID string and check values warn_bad_id = False input_id = self.template['line_ids'][g] split_id = input_id.split() # Element - can be any string, really elem_id = split_id[0] # Ionization state - only allow roman numerals less than 100 ion_id = 'I' if len(split_id) >= 2: test_ion = split_id[1].upper() num_roman = sum(map(test_ion.count, ['I','V','X','L'])) if len(test_ion) == num_roman: ion_id = test_ion else: warn_bad_id = True else: warn_bad_id = True # Wavelength - needs to have a decimal point wave_id = parinfo_wave_str if len(split_id) >= 3: test_wave = split_id[2] if test_wave.isdigit(): wave_id = test_wave+'.000' elif '.' in test_wave: wave_id = test_wave else: warn_bad_id = True else: warn_bad_id = True # Copy over the clean ID (all other string parts are ignored) clean_id = ' '.join([elem_id, ion_id, wave_id]) self.template['line_ids'][g] = clean_id if warn_bad_id: print(f'Warning: "{input_id}" is not a valid or complete ' +f'line ID string. Using a replacement ID of ' +f'"{clean_id}" instead.', file=sys.stderr) # Extract initial values and update the 'fit' array # Note: this is important for using the scale_guess() function during # the fitting process. self.template['fit'] = np.zeros(3*n_gauss + n_poly) for p in range(3*n_gauss + n_poly): self.template['fit'][p] = self.parinfo[p]['value'] def __repr__(self): rows = [] rows.append('--- EISFitTemplate SUMMARY ---') rows.append(f"filename_temp: {self.filename_temp}") rows.append(f"n_gauss: {self.template['n_gauss']}") rows.append(f"n_poly: {self.template['n_poly']}") rows.append(f"line_ids: {self.template['line_ids']}") rows.append(f"wmin, wmax: {self.template['wmin']}, {self.template['wmax']}") rows.append('') rows.append('--- PARAMETER CONSTRAINTS ---') rows.append(f"{'*':>4} {'Value':>16} {'Fixed':>7} " +f"{'Limited':>11} {'Limits':>19} {'Tied':>19}") for i, p in enumerate(self.parinfo): rows.append(f"{f'p[{i}]':>6} {p['value']:14.4f} {p['fixed']:7d} " +f"{p['limited'][0]:5d} {p['limited'][1]:5d} " +f"{p['limits'][0]:12.4f} {p['limits'][1]:12.4f} {p['tied']:>18}") return '\n'.join(rows) @property def central_wave(self): """Wavelength value in the center of the template wavelength range Note: this is calculated using the current values of 'wmin' and 'wmax' contained in the template dict. """ return self.template['wmin'] + (self.template['wmax'] - self.template['wmin']) * 0.5 @property def funcinfo(self): """List of dicts specifying each subcomponent function used in the template Note: this is generated on-demand using the current template dict and the ``get_funcinfo`` class method. """ return self.get_funcinfo(self.template)
[docs] @staticmethod def get_funcinfo(template): """Generate the ``funcinfo`` dict Returns a list of dictionaries where each entry describes the parameters for one of the fitting basis functions Parameters ---------- template : dict Returns ------- funcinfo : list """ funcinfo = [] for g in range(template['n_gauss']): funcinfo.append({'func': 'Gaussian1D', 'name': template['line_ids'][g], 'n_params': 3}) if template['n_poly'] > 0: funcinfo.append({'func': 'Polynomial1D', 'name': 'Background', 'n_params': template['n_poly']}) return funcinfo
[docs] @classmethod def read_template(cls, filename): """Load an `EISFitTemplate` from an HDF5 or TOML template file Parameters ---------- filename : str or `pathlib.Path` Path to a HDF5 template file provided with eispac or a user-made TOML template file. Returns ------- cls : `EISFitTemplate` class instance Object containing the fit template """ # NOTE: return None here rather than allow h5py to handle # exception so that spectral fitting pipeline can error # more gracefully # FIXME: replace with proper exception handling and logging if not isinstance(filename, (str, pathlib.Path)): warnings.warn('Error: Template filepath must be either a string or pathlib.Path') return None filename = pathlib.Path(filename) if not filename.is_file(): warnings.warn(f'Error: Template filepath {filename} does not exist') return None file_type = filename.suffix if file_type.lower() in ['.h5', '.hdf5']: # Load a standard format HDF5 file (probably packaged with eispac) with h5py.File(filename, 'r') as f_temp: # Template template = {} for key in f_temp['template']: val = f_temp['template/'+key] if key == 'line_ids': val = np.char.decode(val).flatten() # convert bytes to unicode elif len(val) > 1: val = np.array(val) else: val = val[0] template[key] = val # Parinfo nstr = len(f_temp['parinfo/value']) parinfo = [] for istr in range(nstr): parameter = {} for key in f_temp['parinfo']: val = f_temp['parinfo/'+key][istr] if key == 'tied': val = np.char.decode(val) # convert bytes to unicode parameter[key] = val parinfo.append(parameter) # Fix the datatypes of important keys template['n_gauss'] = int(template.get('n_gauss', 1)) template['n_poly'] = int(template.get('n_poly', 1)) template['wmin'] = float(template.get('wmin', 100)) template['wmax'] = float(template.get('wmax', 1000)) template['component'] = int(template.get('component', 1)) template['line_ids'] = template['line_ids'].astype('<U24') elif file_type.lower() == '.toml': # Load a custom template stored in a TOML file with open(filename, 'rb') as f_temp: toml_dict = tomllib.load(f_temp) # Ensure top-level keys are all lower case toml_dict = {KEY.lower(): VALUE for KEY, VALUE in toml_dict.items()} template = toml_dict.get('template', None) parinfo = toml_dict.get('parinfo', None) # note: this parinfo will be an alternative dict of lists return cls(filename, template, parinfo)