Source code for StellarPopulationModel

"""
.. moduleauthor:: Johan Comparat <johan.comparat__at__gmail.com>

General purpose:
................

The class StellarPopulationModel is a wrapper dedicated to handling the fit of stellar population models on observed spectra. 
It gathers all inputs : from the model and from the data.

*Imports*::

	import numpy as np
	import astropy.io.fits as pyfits
	import astropy.units as u
	import glob
	import pandas as pd
	import os
	from firefly_instrument import *
	from firefly_dust import *
	from firefly_fitter import *
	from firefly_library import *

"""
import numpy as np
import astropy.io.fits as pyfits
import astropy.units as u
import glob
import pandas as pd
import os
from os.path import join
#from scipy.stats import sigmaclip

#from firefly_dust import *
#import firefly_dust as f_dust
from firefly_dust import hpf, unred, determine_attenuation
from firefly_instrument import downgrade 
from firefly_fitter import fitter
from firefly_library import airtovac, convert_chis_to_probs, light_weights_to_mass, calculate_averages_pdf, normalise_spec, match_data_models


[docs]class StellarPopulationModel: """ :param specObs: specObs observed spectrum object initiated with the GalaxySpectrumFIREFLY class. :param models: choose between 'm11', 'bc03' or 'm09'. * m11 corresponds to all the models compared in `Maraston and Stromback 2011 <http://adsabs.harvard.edu/abs/2011MNRAS.418.2785M>`_. * m09 to `Maraston et al. 2009 <http://adsabs.harvard.edu/abs/2009A%26A...493..425M>`_. * bc03 to the `Bruzual and Charlot 2003 models <http://adsabs.harvard.edu/abs/2003MNRAS.344.1000B>`_. :param model_libs: only necessary if using m11. Choose between `MILES <http://adsabs.harvard.edu/abs/2011A%26A...532A..95F>`_, MILES revisednearIRslope, MILES UVextended, `STELIB <http://adsabs.harvard.edu/abs/2003A%26A...402..433L>`_, `ELODIE <http://adsabs.harvard.edu/abs/2007astro.ph..3658P>`_, `MARCS <http://adsabs.harvard.edu/abs/2008A%26A...486..951G>`_. * MILES, MILES revisednearIRslope, MILES UVextended, STELIB, ELODIE are empirical libraries. * MARCS is a theoretical library. :param imfs: choose the `initial mass function <https://en.wikipedia.org/wiki/Initial_mass_function>`_: * 'ss' for `Salpeter <http://adsabs.harvard.edu/abs/1955ApJ...121..161S>`_or * 'kr' for `Kroupa <http://adsabs.harvard.edu/cgi-bin/bib_query?arXiv:1112.3340>`_ or * 'cha' for `Chabrier <http://adsabs.harvard.edu/abs/2003PASP..115..763C>`_. Notes ----- .. note:: *This is how it proceeds :* #. reads the parameter file by using parameters_obtain(parameters.py) #. It opens the data file, model files, then it matches their resolutions by downgrading the models to instrumental and velocity dispersion resolution #. Determines dust attenuation curve to be applied to the models. Two options : through HPF fitting (3.1.) or through filtered values to determing SP properties (3.2.). #. It fits the models to the data #. Gets mass-weighted SSP contributions using saved M/L ratio. #. Convert chis into probabilities and calculates all average properties and errors (assuming the number of degrees of freedom is the number of wavelength points) #. Optionally produces a plot #. Finally, it writes the output files """ def __init__(self, specObs, outputFile, cosmo, models = 'm11', model_libs = ['MILES_UVextended'], imfs = ['ss','kr'], hpf_mode = 'on', age_limits = [6,10.1], downgrade_models = True, dust_law = 'calzetti', max_ebv = 1.5, num_dust_vals = 200, dust_smoothing_length = 200, max_iterations = 10, fit_per_iteration_cap = 1000, pdf_sampling = 300, data_wave_medium = 'vacuum', Z_limits = [-0.1,0.1], wave_limits = [0,99999990], suffix = "-fireflyFits.fits",use_downgraded_models = False): self.cosmo = cosmo self.specObs = specObs self.outputFile = outputFile #################### STARTS HERE #################### # sets the models self.models = models # m11/bc03 / m09 self.model_libs = model_libs self.suffix = suffix self.deltal_libs = [] self.vdisp_round = int(round(self.specObs.vdisp/5.0)*5.0) # rounding vDisp for the models self.use_downgraded_models = use_downgraded_models if self.models == 'm11': for m in self.model_libs: if m == 'MILES' or m == 'MILES_revisednearIRslope' or m == 'MILES_UVextended': self.deltal_libs.append(2.55) elif m == 'STELIB': self.deltal_libs.append(3.40) elif m == 'ELODIE': self.deltal_libs.append(0.55) elif m == 'MARCS': self.deltal_libs.append(0.1) elif self.models=='bc03': self.model_libs = ['STELIB_BC03'] imfs = ['cha'] self.deltal_libs = [3.00] elif self.models == 'm09': self.model_libs = ['M09'] if downgrade_models: self.deltal_libs = [0.4] else: self.deltal_libs = [3.6] # sets the Initial mass function self.imfs = imfs self.hpf_mode = hpf_mode self.age_limits = age_limits self.downgrade_models = downgrade_models self.dust_law = dust_law self.max_ebv = max_ebv self.num_dust_vals = num_dust_vals self.dust_smoothing_length = dust_smoothing_length # Specific fitting options self.max_iterations = max_iterations self.fit_per_iteration_cap = fit_per_iteration_cap # Sampling size when calculating the maximum pdf (100=recommended) self.pdf_sampling = pdf_sampling # Default is air, unless manga is used self.data_wave_medium = data_wave_medium self.Z_limits = Z_limits self.wave_limits = wave_limits
[docs] def get_model(self, model_used, imf_used, deltal, vdisp, wave_instrument, r_instrument, ebv_mw): """ Retrieves all relevant model files, in their downgraded format. If they aren't downgraded to the correct resolution / velocity dispersion, takes the base models in their native form and converts to downgraded files. :param model_used: list of models to be used, for example ['m11', 'm09']. :param imf_used: list of imf to be used, for example ['ss', 'cha']. :param deltal: delta lambda in the models. :param vdisp: velocity dispersion observed in the galaxy. :param wave_instrument: wavelength array from the observations :param r_instrument: resolution array from the observations :param ebv_mw: E(B-V) from the dust maps for the galaxy. Workflow ---------- A. loads the models m11 or m09: maps parameters to the right files. Then it constructs the model array. Finally converts wavelengths to air or vacuum. B. downgrades the model to match data resolution C. applies attenuation D. stores models in self.model_wavelength, self.model_flux, self.age_model, self.metal_model and returns it as well """ # first the m11 case if self.models == 'm11': first_file = True model_files = [] if self.use_downgraded_models : if model_used == 'MILES_UVextended' or model_used == 'MILES_revisedIRslope': model_path = join(os.environ['STELLARPOPMODELS_DIR'],'data','SSP_M11_MILES_downgraded','ssp_M11_' + model_used+ '.' + imf_used) else: model_path = join(os.environ['STELLARPOPMODELS_DIR'],'data','SSP_M11_'+ model_used + '_downgraded', 'ssp_M11_' +model_used +'.' + imf_used) else: if model_used == 'MILES_UVextended' or model_used == 'MILES_revisedIRslope': model_path = join(os.environ['STELLARPOPMODELS_DIR'],'data','SSP_M11_MILES', 'ssp_M11_'+model_used+'.'+imf_used) else: model_path = join(os.environ['STELLARPOPMODELS_DIR'],'data','SSP_M11_'+model_used ,'ssp_M11_' +model_used +'.' + imf_used) # Constructs the metallicity array of models : all_metal_files = glob.glob(model_path+'*') #print all_metal_files metal_files = [] metal = [] for z in range(len(all_metal_files)): zchar = all_metal_files[z][len(model_path):] if zchar == 'z001': znum = -0.3 elif zchar == 'z002': znum = 0.0 elif zchar == 'z004': znum = 0.3 elif zchar == 'z0001.bhb': znum = -1.301 elif zchar == 'z0001.rhb': znum = -1.302 elif zchar == 'z10m4.bhb': znum = -2.301 elif zchar == 'z10m4.rhb': znum = -2.302 elif zchar == 'z10m4': znum = -2.300 else: raise NameError('Unrecognised metallicity! Check model file names.') if znum>self.Z_limits[0] and znum<self.Z_limits[1]: metal_files.append(all_metal_files[z]) metal.append(znum) # constructs the model array model_flux, age_model, metal_model = [],[],[] for zi,z in enumerate(metal_files): print "Retrieving and downgrading models for "+z model_table = pd.read_table(z,converters={'Age':np.float64}, header=None ,usecols=[0,2,3], names=['Age','wavelength_model','flux_model'], delim_whitespace=True) age_data = np.unique(model_table['Age'].values.ravel()) for a in age_data: logyrs_a = np.log10(a)+9.0 #print "age model selection:", self.age_limits[0], logyrs_a, self.age_limits[1] if logyrs_a < self.age_limits[0] or logyrs_a > self.age_limits[1]: continue spectrum = model_table.loc[model_table.Age == a, ['wavelength_model', 'flux_model'] ].values wavelength_int,flux = spectrum[:,0],spectrum[:,1] # converts to air wavelength if self.data_wave_medium == 'vacuum': wavelength = airtovac(wavelength_int) else: wavelength = wavelength_int # downgrades the model if self.downgrade_models: mf = downgrade(wavelength,flux,deltal,self.vdisp_round, wave_instrument, r_instrument) else: mf = copy.copy(flux) # Reddens the models if ebv_mw != 0: attenuations = unred(wavelength,ebv=0.0-ebv_mw) model_flux.append(mf*attenuations) else: model_flux.append(mf) age_model.append(a) metal_model.append(metal[zi]) first_model = False print "Retrieved all models!" self.model_wavelength, self.model_flux, self.age_model, self.metal_model = wavelength, model_flux, age_model, metal_model return wavelength, model_flux, age_model, metal_model elif self.models == 'm09': first_file = True model_files = [] if self.use_downgraded_models: model_path = join(os.environ['STELLARPOPMODELS_DIR'],'data', 'UVmodels_Marastonetal08b_downgraded') else: model_path = join(os.environ['STELLARPOPMODELS_DIR'],'data', 'UVmodels_Marastonetal08b') # Gathers the list of models with metallicities and ages of interest: all_metal_files = glob.glob(model_path+'*') metal_files = [] metal = [] for z in range(len(all_metal_files)): zchar = all_metal_files[z].split('.')[1][2:] if zchar == 'z001': znum = -0.3 elif zchar == 'z002': znum = 0.0 elif zchar == 'z004': znum = 0.3 elif zchar == 'z0001': znum = -1.300 else: raise NameError('Unrecognised metallicity! Check model file names.') if znum>self.Z_limits[0] and znum<self.Z_limits[1]: metal_files.append(all_metal_files[z]) metal.append(znum) # constructs the model array model_flux, age_model, metal_model = [],[],[] for zi,z in enumerate(metal_files): print "Retrieving and downgrading models for "+z model_table = pd.read_table(z,converters={'Age':np.float64}, header=None ,usecols=[0,2,3], names=['Age','wavelength_model','flux_model'], delim_whitespace=True) age_data = np.unique(model_table['Age'].values.ravel()) for a in age_data: logyrs_a = np.log10(a)+9.0 #print "age model selection:", self.age_limits[0], logyrs_a, self.age_limits[1] if logyrs_a < self.age_limits[0] or logyrs_a > self.age_limits[1]: continue spectrum = model_table.loc[model_table.Age == a, ['wavelength_model', 'flux_model'] ].values wavelength_int,flux = spectrum[:,0],spectrum[:,1] # converts to air wavelength if self.data_wave_medium == 'vacuum': wavelength = airtovac(wavelength_int) else: wavelength = wavelength_int # downgrades the model if self.downgrade_models: mf = downgrade(wavelength,flux,deltal,self.vdisp_round, wave_instrument, r_instrument) else: mf = copy.copy(flux) # Reddens the models if ebv_mw != 0: attenuations = unred(wavelength,ebv=0.0-ebv_mw) model_flux.append(mf*attenuations) else: model_flux.append(mf) age_model.append(a) metal_model.append(metal[zi]) first_model = False print "Retrieved all models!" self.model_wavelength, self.model_flux, self.age_model, self.metal_model = wavelength, model_flux, age_model, metal_model return wavelength, model_flux, age_model, metal_model
[docs] def fit_models_to_data(self): """ Once the data and models are loaded, then execute this function to find the best model. It loops overs the models to be fitted on the data: #. gets the models #. matches the model and data to the same resolution #. normalises the spectra """ for mi,mm in enumerate(self.model_libs): # loop over the models for ii in self.imfs: # loop over the IMFs # A. gets the models print "getting the models" deltal = self.deltal_libs[mi] model_wave_int, model_flux_int, age, metal = self.get_model( mm, ii, deltal, self.specObs.vdisp, self.specObs.restframe_wavelength, self.specObs.r_instrument, self.specObs.ebv_mw) # B. matches the model and data to the same resolution print "Matching models to data" wave, data_flux, error_flux, model_flux_raw = match_data_models( self.specObs.restframe_wavelength, self.specObs.flux, self.specObs.bad_flags, self.specObs.error, model_wave_int, model_flux_int, self.wave_limits[0], self.wave_limits[1], saveDowngradedModel = False) # C. normalises the models to the median value of the data print "Normalising the models" model_flux, mass_factors = normalise_spec(data_flux, model_flux_raw) # 3. Corrects from dust attenuation if self.hpf_mode=='on': # 3.1. Determining attenuation curve through HPF fitting, apply attenuation curve to models and renormalise spectra best_ebv, attenuation_curve = determine_attenuation(wave, data_flux, error_flux, model_flux, self, age, metal) model_flux_atten = np.zeros(np.shape(model_flux_raw)) for m in range(len(model_flux_raw)): model_flux_atten[m] = attenuation_curve * model_flux_raw[m] model_flux, mass_factors = normalise_spec(data_flux, model_flux_atten) # 4. Fits the models to the data light_weights, chis, branch = fitter(wave, data_flux, error_flux, model_flux, self) elif self.hpf_mode == 'hpf_only': # 3.2. Uses filtered values to determing SP properties only." smoothing_length = self.dust_smoothing_length hpf_data = hpf(data_flux) hpf_models = np.zeros(np.shape(model_flux)) for m in range(len(model_flux)): hpf_models[m] = hpf(model_flux[m]) zero_dat = np.where( (np.isnan(hpf_data)) & (np.isinf(hpf_data)) ) hpf_data[zero_dat] = 0.0 for m in range(len(model_flux)): hpf_models[m,zero_dat] = 0.0 hpf_error = np.zeros(len(error_flux)) hpf_error[:] = np.median(error_flux)/np.median(data_flux) * np.median(hpf_data) hpf_error[zero_dat] = np.max(hpf_error)*999999.9 best_ebv = -99 hpf_models,mass_factors = normalise_spec(hpf_data,hpf_models) # 4. Fits the models to the data light_weights, chis, branch = fitter(wave, hpf_data,hpf_error, hpf_models, self) # 5. Get mass-weighted SSP contributions using saved M/L ratio. unnorm_mass, mass_weights = light_weights_to_mass(light_weights, mass_factors) print "Fitting complete" print "Calculating average properties and outputting" # 6. Convert chis into probabilities and calculates all average properties and errors self.dof = len(wave) probs = convert_chis_to_probs(chis, self.dof) dist_lum = self.cosmo.luminosity_distance( self.specObs.redshift).to( u.cm ).value averages = calculate_averages_pdf(probs, light_weights, mass_weights, unnorm_mass, age, metal, self.pdf_sampling, dist_lum) unique_ages = np.unique(age) marginalised_age_weights = np.zeros(np.shape(unique_ages)) marginalised_age_weights_int = np.sum(mass_weights.T,1) for ua in range(len(unique_ages)): marginalised_age_weights[ua] = np.sum(marginalised_age_weights_int[np.where(age==unique_ages[ua])]) best_fit_index = [np.argmin(chis)] best_fit = np.dot(light_weights[best_fit_index],model_flux)[0] # stores outputs in the object self.best_fit_index = best_fit_index self.best_fit = best_fit self.model_flux = model_flux self.dist_lum = dist_lum self.age = np.array(age) self.metal = np.array(metal) self.mass_weights = mass_weights self.light_weights = light_weights self.chis = chis self.branch = branch self.unnorm_mass = unnorm_mass self.probs = probs self.wave = wave self.best_fit = best_fit self.averages = averages bf_mass = (self.mass_weights[self.best_fit_index]>0)[0] bf_light = (self.light_weights[self.best_fit_index]>0)[0] mass_per_ssp = self.unnorm_mass[self.best_fit_index[0]][bf_mass]*10.0**(-17) * 4 * np.pi * self.dist_lum**2.0 age_per_ssp = self.age[bf_mass]*10**9 metal_per_ssp = self.metal[bf_mass] weight_mass_per_ssp = self.mass_weights[self.best_fit_index[0]][bf_mass] weight_light_per_ssp = self.light_weights[self.best_fit_index[0]][bf_light] order = np.argsort(-weight_light_per_ssp) print "M Msun", self.averages['stellar_mass'], np.log10(mass_per_ssp[order]) print "age Gyr", 10**self.averages['light_age'], 10**self.averages['mass_age'], age_per_ssp[order]/1e9 print "Z", self.averages['light_metal'], self.averages['mass_metal'], metal_per_ssp[order] print "SFR Msun/yr", mass_per_ssp[order]/age_per_ssp[order] print "wm", weight_mass_per_ssp[order] print "wl", weight_light_per_ssp[order] print "z, age Gyr", self.specObs.redshift, self.cosmo.age(self.specObs.redshift).value # 8. It writes the output file waveCol = pyfits.Column(name="wavelength",format="D", unit="Angstrom", array= wave) best_fitCol = pyfits.Column(name="firefly_model",format="D", unit="1e-17erg/s/cm2/Angstrom", array= best_fit) cols = pyfits.ColDefs([ waveCol, best_fitCol]) tbhdu = pyfits.BinTableHDU.from_columns(cols) tbhdu.header['HIERARCH age_universe'] = np.log10(self.cosmo.age(self.specObs.redshift).value*10**9) tbhdu.header['HIERARCH redshift'] = self.specObs.redshift # mean quantities tbhdu.header['HIERARCH age_lightW_mean'] = np.log10(10**9 * 10**averages['light_age']) tbhdu.header['HIERARCH age_lightW_mean_up'] = np.log10(10**9 * 10**averages['light_age_1_sig_plus']) # log(Gyrs) tbhdu.header['HIERARCH age_lightW_mean_low'] = np.log10(10**9 * 10**averages['light_age_1_sig_minus']) # log(Gyrs) tbhdu.header['HIERARCH metallicity_lightW_mean'] = averages['light_metal'] tbhdu.header['HIERARCH metallicity_lightW_mean_up'] = averages['light_metal_1_sig_plus'] tbhdu.header['HIERARCH metallicity_lightW_mean_low'] = averages['light_metal_1_sig_minus'] tbhdu.header['HIERARCH age_massW_mean'] = np.log10(10**9 * 10**averages['mass_age']) tbhdu.header['HIERARCH age_massW_mean_up'] = np.log10(10**9 * 10**averages['mass_age_1_sig_plus']) # log(Gyrs) tbhdu.header['HIERARCH age_massW_mean_low'] = np.log10(10**9 * 10**averages['mass_age_1_sig_minus']) # log(Gyrs) tbhdu.header['HIERARCH metallicity_massW_mean'] = averages['mass_metal'] tbhdu.header['HIERARCH metallicity_massW_mean_up'] = averages['mass_metal_1_sig_plus'] tbhdu.header['HIERARCH metallicity_massW_mean_low'] = averages['mass_metal_1_sig_minus'] tbhdu.header['HIERARCH EBV'] = best_ebv tbhdu.header['HIERARCH stellar_mass_mean'] = averages['stellar_mass'] tbhdu.header['HIERARCH stellar_mass_mean_up'] = averages['stellar_mass_1_sig_plus'] tbhdu.header['HIERARCH stellar_mass_mean_low'] = averages['stellar_mass_1_sig_minus'] tbhdu.header['HIERARCH ssp_number'] =len(order) # quantities per SSP for iii in range(len(order)): tbhdu.header['HIERARCH stellar_mass_ssp_'+str(iii)] = np.log10(mass_per_ssp[order])[iii] tbhdu.header['HIERARCH age_ssp_'+str(iii)] = np.log10(age_per_ssp[order][iii]) tbhdu.header['HIERARCH metal_ssp_'+str(iii)] = metal_per_ssp[order][iii] tbhdu.header['HIERARCH SFR_ssp_'+str(iii)] = mass_per_ssp[order][iii]/age_per_ssp[order][iii] tbhdu.header['HIERARCH weightMass_ssp_'+str(iii)] = weight_mass_per_ssp[order][iii] tbhdu.header['HIERARCH weightLight_ssp_'+str(iii)] = weight_light_per_ssp[order][iii] prihdr = pyfits.Header() prihdr['file'] = self.specObs.path_to_spectrum prihdr['model'] = self.models prihdr['ageMin'] = self.age_limits[0] prihdr['ageMax'] = self.age_limits[1] prihdr['Zmin'] = self.Z_limits[0] prihdr['Zmax'] = self.Z_limits[1] prihdu = pyfits.PrimaryHDU(header=prihdr) thdulist = pyfits.HDUList([prihdu, tbhdu]) if os.path.isfile(self.outputFile + self.suffix ): os.remove(self.outputFile + self.suffix ) #print self.outputFile + self.suffix , thdulist, thdulist[1].data, thdulist[0].header thdulist.writeto(self.outputFile + self.suffix )