Advertisement
dhubax

Retrieve and stack Pipe3D datacubes data

Mar 4th, 2021
1,547
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 15.08 KB | None | 0 0
  1. #!/usr/bin/env python3
  2.  
  3. # System imports
  4. import sys
  5. import glob
  6. import numpy as np
  7. import pandas as pd
  8. from astropy.io import fits
  9. from copy import deepcopy as copy
  10. from os.path import basename, isfile, join, isdir
  11.  
  12. # Local imports
  13. # from pyFIT3D.common.io import get_data_from_fits
  14. # from pyFIT3D.common.tools import read_flux_elines, read_ELINES, read_SSP, read_SFH, read_indices
  15.  
  16. def read_flux_elines(filename, filename_lines=None, cube=True):
  17.     # _flux_elines, h = get_data_from_fits(filename, header=True)
  18.     t = fits.open(filename)
  19.     h = t[0].header
  20.     _flux_elines = t[0].data
  21.     flux_elines = {}
  22.     units = {}
  23.     desc = {}
  24.     ids = {}
  25.     if cube:
  26.         n = h['NAXIS3']
  27.         nx, ny = h['NAXIS1'], h['NAXIS2']
  28.         _dim = (ny, nx)
  29.     else:
  30.         n = h['NAXIS1']
  31.         ns = h['NAXIS2']
  32.         _dim = (ns)
  33.         _flux_elines =  _flux_elines.T
  34.     wavelengths = np.array([])
  35.     if filename_lines is not None:
  36.         # TODO create lines
  37.         ne = 0
  38.         with open(filename_lines) as fp:
  39.             line = fp.readline()
  40.             while line:
  41.                 if not line.startswith('#'):
  42.                     tmp_line = line.strip().split()
  43.                     if len(tmp_line) > 1:
  44.                         wavelengths = np.append(wavelengths, tmp_line[0])
  45.                     else:
  46.                         wavelengths = np.append(wavelengths, 0)
  47.                     ne += 1
  48.                 line = fp.readline()
  49.     else:
  50.         if 'WAVE0' not in h.keys():
  51.             print('missing wavelength information.')
  52.             return None
  53.         else:
  54.             for i in range(n):
  55.                 wavelengths = np.append(wavelengths, h[f'WAVE{i}'])
  56.     props = np.unique([h[f'NAME{i}'].split(' ')[0] for i in range(n)])
  57.     n_props = len(props)
  58.     _wavelengths = copy(wavelengths)
  59.     wavelengths = np.unique(_wavelengths)
  60.     nw = len(wavelengths)
  61.     for l in wavelengths:
  62.         flux_elines[l] = {}
  63.         units[l] = {}
  64.         desc[l] = {}
  65.         ids[l] = {}
  66.         for p in props:
  67.             flux_elines[l][p] = np.zeros(_dim)
  68.     for i in range(n):
  69.         wl = _wavelengths[i]
  70.         prop_lname = h[f'NAME{i}'].split(' ')
  71.         if len(prop_lname) < 2: continue
  72.         p = prop_lname[0]
  73.         flux_elines[wl][p] = copy(_flux_elines[i])
  74.         ids[wl][p] = i
  75.         if h.get(f'UNIT{i}', None) is not None:
  76.             units[wl][p] = h[f'UNIT{i}']
  77.         else:
  78.             units[wl][p] = None
  79.         if h.get(f'NAME{i}', None) is not None:
  80.             desc[wl][p] = h[f'NAME{i}']
  81.         else:
  82.             desc[wl][p] = None
  83.     del _flux_elines
  84.     return flux_elines, h, ids, desc, units
  85.  
  86. def read_SSP(filename):
  87.     # _SSP, h = get_data_from_fits(filename, header=True)
  88.     t = fits.open(filename)
  89.     h = t[0].header
  90.     _SSP = t[0].data
  91.     units = {}
  92.     desc = {}
  93.     SSP = {
  94.         'V': copy(_SSP[0]),
  95.         'cont_seg': copy(_SSP[1]),
  96.         'scale_seg': copy(_SSP[2]),
  97.     }
  98.     ids = {
  99.         'V': 0,
  100.         'cont_seg': 1,
  101.         'scale_seg': 2,
  102.     }
  103.     units['V'] = h['UNITS_0'].strip()
  104.     units['cont_seg'] = h['UNITS_1'].strip()
  105.     units['scale_seg'] = h['UNITS_2'].strip()
  106.     desc['V'] = h['DESC_0'].strip()
  107.     desc['cont_seg'] = h['DESC_1'].strip()
  108.     desc['scale_seg'] = h['DESC_2'].strip()
  109.     for i in range(3, h['NAXIS3']):
  110.         _s = h[f'FILE_{i}'].strip().split('.')[2].split('_')
  111.         _tmp = '_'.join(_s[1:-1])
  112.         SSP[_tmp] = copy(_SSP[i])
  113.         ids[_tmp] = i
  114.         units[_tmp] = h[f'UNITS_{i}'].strip()
  115.         desc[_tmp] = h[f'DESC_{i}'].strip()
  116.     del _SSP
  117.     return SSP, h, ids, desc, units
  118.  
  119. def read_ELINES(filename):
  120.     # _ELINES, h = get_data_from_fits(filename, header=True)
  121.     t = fits.open(filename)
  122.     h = t[0].header
  123.     _ELINES = t[0].data
  124.     n_ELINES = h['NAXIS3']
  125.     units = {}
  126.     desc = {}
  127.     ids = {
  128.         'v_Halpha': 0,
  129.         'disp_Halpha': 1,
  130.     }
  131.     ELINES = {
  132.         'v_Halpha': copy(_ELINES[0]),
  133.         'disp_Halpha': copy(_ELINES[1])
  134.     }
  135.     units['v_Halpha'] = h['UNITS_0'].strip()
  136.     units['disp_Halpha'] = h['UNITS_1'].strip()
  137.     desc['v_Halpha'] = h['DESC_0'].strip()
  138.     desc['disp_Halpha'] = h['DESC_1'].strip()
  139.     for i in range(2, n_ELINES):
  140.         _tmp = h[f'DESC_{i}'].strip().split(' ')[0]
  141.         ELINES[_tmp] = copy(_ELINES[i])
  142.         units[_tmp] = h[f'UNITS_{i}'].strip()
  143.         desc[_tmp] = h[f'DESC_{i}'].strip()
  144.         ids[_tmp] = i
  145.     del _ELINES
  146.     return ELINES, h, ids, desc, units
  147.  
  148. def read_SFH(filename):
  149.     # _SFH, h = get_data_from_fits(filename, header=True)
  150.     t = fits.open(filename)
  151.     h = t[0].header
  152.     _SFH = t[0].data
  153.     name = h['OBJECT'].strip()
  154.     n_models = len([i for i in range(h['NAXIS3']) if f'{name}_NORM' in h[f'FILE_{i}']])
  155.     mets_str = [
  156.         h[f'FILE_{i}'].split('_')[1]
  157.         for i in range(h['NAXIS3']) if 'NORM_met' in h[f'FILE_{i}']
  158.     ]
  159.     ages_str = [
  160.         h[f'FILE_{i}'].split('_')[1]
  161.         for i in range(h['NAXIS3']) if 'NORM_age' in h[f'FILE_{i}']
  162.     ]
  163.     age_models = np.asarray([eval(x) for x in ages_str])
  164.     met_models = np.asarray([eval(x) for x in mets_str])
  165.     n_age = len(age_models)
  166.     n_met = len(met_models)
  167.     n_models = n_models
  168.     age_met_models = [
  169.          h[f'DESC_{i}'].strip('Luminosity Fraction for age-met ').strip(' SSP').split('-')
  170.          for i in range(h['NAXIS3']) if f'{name}_NORM' in h[f'FILE_{i}']
  171.     ]
  172.     age_met_models = copy(age_met_models)
  173.     age_met_models_index = []
  174.     for age, met in age_met_models:
  175.         i_age = np.arange(n_age)[eval(age) == age_models]
  176.         i_met = np.arange(n_met)[eval(met) == met_models]
  177.         age_met_models_index.append((i_age, i_met))
  178.     nx, ny = h['NAXIS1'], h['NAXIS2']
  179.     SFH = {
  180.         'models': np.zeros((n_models, ny, nx)),
  181.         'age': np.zeros((n_age, ny, nx)),
  182.         'met': np.zeros((n_met, ny, nx))
  183.     }
  184.     units = {
  185.         'models': [],
  186.         'age': [],
  187.         'met': []
  188.     }
  189.     desc = {
  190.         'models': [],
  191.         'age': [],
  192.         'met': []
  193.     }
  194.     ids = {
  195.         'models': [],
  196.         'age': [],
  197.         'met': []
  198.     }
  199.     j, k, l = 0, 0, 0
  200.     for i in range(h['NAXIS3']):
  201.         _id = h[f'ID_{i}'] - 1
  202.         val = _SFH[_id]
  203.         try:
  204.             _desc = h[f'DESC_{i}']
  205.         except KeyError:
  206.             _desc = None
  207.         try:
  208.             _units = h['UNITS_{i}']
  209.         except KeyError:
  210.             _units = 'fraction'
  211.         if f'{name}_NORM_{_id}' in h[f'FILE_{i}']:
  212.             SFH['models'][j] = copy(val)
  213.             units['models'].append(_units)
  214.             desc['models'].append(_desc)
  215.             ids['models'].append(i)
  216.             j += 1
  217.         elif 'NORM_age' in h[f'FILE_{i}']:
  218.             SFH['age'][k] = copy(val)
  219.             units['age'].append(_units)
  220.             desc['age'].append(_desc)
  221.             ids['age'].append(i)
  222.             k += 1
  223.         elif 'NORM_met' in h[f'FILE_{i}']:
  224.             SFH['met'][l] = copy(val)
  225.             units['met'].append(_units)
  226.             desc['met'].append(_desc)
  227.             ids['met'].append(i)
  228.             l += 1
  229.     del _SFH
  230.     models_metainfo = [age_met_models, age_met_models_index, age_models, met_models]
  231.     return SFH, h, ids, desc, units, models_metainfo
  232.  
  233. def read_indices(filename):
  234.     # _indices, h = get_data_from_fits(filename, header=True)
  235.     t = fits.open(filename)
  236.     h = t[0].header
  237.     _indices = t[0].data
  238.     indices = {}
  239.     units = None
  240.     desc = None
  241.     ids = {}
  242.     n = h['NAXIS3']
  243.     nx, ny = h['NAXIS1'], h['NAXIS2']
  244.     for i in range(n):
  245.         desc = h[f'INDEX{i}']
  246.         indices[desc] = copy(_indices[i])
  247.         ids[desc] = i
  248.     del _indices
  249.     return indices, h, ids, desc, units
  250.  
  251. def iter_CALIFA_gals_from_datacubes(workdir, list_gals=None):
  252.     """
  253.    Iterator by galaxy names from CALIFA datacubes workdir.
  254.  
  255.    Parameters
  256.    ----------
  257.    workdir : str
  258.        Datacubes directory.
  259.  
  260.    list_gals : str or None
  261.        CSV o single column file with a list of galaxies. If the file `list_gals`
  262.        is a CSV file it will read the first column only (usable to input
  263.        get_proc_elines.csv as a list of galaxies). If it is None, the function
  264.        will generate the list of galaxies based on all SSP datacubes filenames
  265.        inside `workdir`. Defaults to None.
  266.  
  267.    Yield
  268.    -----
  269.        [str, str] :
  270.            The workdir and the galaxy name.
  271.    """
  272.     if list_gals is not None:
  273.         gals = np.genfromtxt(list_gals, usecols=[0], comments='#', delimiter=',', dtype='str')
  274.         gals = gals.tolist()
  275.     else:
  276.         gals = []
  277.         for _f in glob.glob(join(workdir, '*.SSP.cube.fits.gz')):
  278.             gals.append(_f.split('.')[0])
  279.     for g in gals:
  280.         yield workdir, g
  281.  
  282. def iter_MaNGA_gals_from_datacubes(workdir, list_gals=None):
  283.     """
  284.    Iterator by galaxy names from MaNGA datacubes workdir. It differs from
  285.    `iter_CALIFA_gals_from_datacubes` because of the hierarchy of MaNGA
  286.    directories (the datacubes are divided in workdir/plate/ifu/*)
  287.  
  288.    Parameters
  289.    ----------
  290.    workdir : str
  291.        Datacubes directory.
  292.  
  293.    list_gals : str or None
  294.        CSV o single column file with a list of galaxies. If the file `list_gals`
  295.        is a CSV file it will read the first column only (usable to input
  296.        get_proc_elines.csv as a list of galaxies). If it is None, the function
  297.        will generate the list of galaxies based on all SSP datacubes filenames
  298.        inside `workdir`. Defaults to None.
  299.  
  300.    Yield
  301.    -----
  302.        [str, str] :
  303.            The workdir (i.e. a join from `workdir` and the plate and ifu from
  304.            the galaxy name) and the galaxy name.
  305.    """
  306.     if list_gals is not None:
  307.         gals = np.genfromtxt(list_gals, usecols=[0], comments='#', delimiter=',', dtype='str')
  308.         gals = gals.tolist()
  309.     else:
  310.         gals = []
  311.         for wd_plate in filter(isdir, glob.glob(join(workdir, '*'))):
  312.             for wd_ifu in filter(isdir, glob.glob(join(wd_plate, '*'))):
  313.                 for _f in glob.glob(join(wd_ifu, '*.SSP.cube.fits.gz')):
  314.                     gals.append(basename(_f).split('.')[0])
  315.     for g in gals:
  316.         _split = g.split('-')
  317.         plate = _split[1]
  318.         ifu = _split[2]
  319.         yield join(workdir, join(plate, ifu)), g
  320.  
  321. if __name__ == '__main__':
  322.     ELINES_file = '{}.ELINES.cube.fits.gz'
  323.     flux_elines_file = 'flux_elines.{}.cube.fits.gz'
  324.     SSP_file = '{}.SSP.cube.fits.gz'
  325.     SFH_file = '{}.SFH.cube.fits.gz'
  326.     indices_file = 'indices.CS.{}.cube.fits.gz'
  327.     mask_file = 'mask.{}.g.fits.gz'
  328.  
  329.     # SOME FLUX ELINES KEYS
  330.     fek = {'Ha': '6562.68', 'NII6548': '6548.08', 'NII6583': '6583.41',
  331.            '[OIII]': '5006.84', '[OII]': '3727.4', 'Hb': '4861.32'}
  332.  
  333.     # SOME ELINES KEYS
  334.     Ek = {'Ha': 'Halpha', 'NII6583': '[NII]6583', 'NII6548': '[NII]6548'}
  335.  
  336.     df = []
  337.  
  338.     ##############################################################
  339.     # FOR MANGA DATACUBES UNCOMMENT AND EDIT THE FOLLOWING LINES
  340.  
  341.     # workdir = '/mnt/NASfilemon/MaNGA/v3_1_1/out_newver'
  342.     # list_gals = None
  343.     # for _wdir, g in iter_MaNGA_gals_from_datacubes(workdir=workdir, list_gals=list_gals):
  344.  
  345.     ##############################################################
  346.  
  347.  
  348.     ##############################################################
  349.     # FOR CALIFA DATACUBES UNCOMMENT AND EDIT THE FOLLOWING LINES
  350.    
  351.     workdir = '/mnt/NASfilemon/disk-d/sanchez/ppak/legacy/DATA/SSP/analysis_v2.2_MaStars_sLOG/dataproducts'
  352.     list_gals = '/mnt/NASfilemon/califa/get_proc_elines_CALIFA.all_good.csv'
  353.     for _wdir, g in iter_CALIFA_gals_from_datacubes(workdir=workdir, list_gals=list_gals):
  354.    
  355.     ##############################################################
  356.  
  357.         _fE = join(_wdir, flux_elines_file.format(g))
  358.         _ffe = join(_wdir, ELINES_file.format(g))
  359.         _fSSP = join(_wdir, SSP_file.format(g))
  360.         _fSFH = join(_wdir, SFH_file.format(g))
  361.         _find = join(_wdir, indices_file.format(g))
  362.         _fmsk = join(_wdir, mask_file.format(g))
  363.  
  364.         if not isfile(_fmsk):
  365.             print(f'{basename(sys.argv[0])}: {g}: {_fmsk}: missing mask file')
  366.             _mask = None
  367.         else:
  368.             # _mask = get_data_from_fits(_fmsk)
  369.             _mask = fits.open(_fmsk)[0].data
  370.  
  371.         if not (isfile(_fE) and isfile(_ffe) and isfile(_fSSP) and isfile(_fSFH) and isfile(_find)):
  372.             print(f'{basename(sys.argv[0])}: {g}: missing cubes')
  373.             continue
  374.  
  375.         # read cubes
  376.         r = read_flux_elines(_fE)
  377.         _flux_elines, fe_h, fe_ids, fe_desc, fe_units = r
  378.         r = read_ELINES(_ffe)
  379.         _ELINES, E_h, E_ids, E_desc, E_units = r
  380.         r = read_SSP(_fSSP)
  381.         _SSP, SSP_h, SSP_ids, SSP_desc, SSP_units = r
  382.         _V = _SSP['V']
  383.         r = read_SFH(_fSFH)
  384.         _SFH, SFH_h, SFH_ids, SFH_desc, SFH_units, SFH_metainfo = r
  385.         r = read_indices(_find)
  386.         _ind, ind_h, ind_ids, ind_desc, ind_units = r
  387.  
  388.         V = _V.flatten().tolist()
  389.         name = [g]*len(V)
  390.  
  391.         if _mask is None:
  392.             mask = np.ones_like(V)
  393.         else:
  394.             mask = _mask.flatten().tolist()
  395.  
  396.         # HERE YOU HAVE TO BUILD YOUR OWN DATAFRAME WITH THE NEEDED DATA FLATTENED
  397.         df.append(
  398.             pd.DataFrame(
  399.                 {
  400.                     # NAME TAG
  401.                     'name': name,
  402.  
  403.                     # V-BAND MAP
  404.                     'V': V,
  405.  
  406.                     # stellar mask used in CALIFA
  407.                     'mask': mask,
  408.  
  409.                     'fedOII': _flux_elines[fek['[OII]']]['disp'].flatten().tolist(),
  410.                     'fedOIII': _flux_elines[fek['[OIII]']]['disp'].flatten().tolist(),
  411.                     'fedOII_err': _flux_elines[fek['[OII]']]['e_disp'].flatten().tolist(),
  412.                     'fedOIII_err': _flux_elines[fek['[OIII]']]['e_disp'].flatten().tolist(),
  413.  
  414.                     #### HA DATA FROM ELINES AND FLUX_ELINES ####
  415.                     # 'EfHa': _ELINES[Ek['Ha']].flatten().tolist(),
  416.                     # 'EvHa': _ELINES[f"v_{Ek['Ha']}"].flatten().tolist(),
  417.                     # 'EdHa': _ELINES[f"disp_{Ek['Ha']}"].flatten().tolist(),
  418.                     # 'fefHa': _flux_elines[fek['Ha']]['flux'].flatten().tolist(),
  419.                     # 'fevHa': _flux_elines[fek['Ha']]['vel'].flatten().tolist(),
  420.                     # 'fedHa': _flux_elines[fek['Ha']]['disp'].flatten().tolist(),
  421.                     #############################################
  422.  
  423.                     #### [NII] DATA FROM ELINES AND FLUX_ELINES ####
  424.                     # 'EfNII6583': _ELINES[Ek['NII6583']].flatten().tolist(),
  425.                     # 'EfNII6548': _ELINES[Ek['NII6548']].flatten().tolist(),
  426.                     # 'fefNII6583': _flux_elines[fek['NII6583']]['flux'].flatten().tolist(),
  427.                     # 'fefNII6548': _flux_elines[fek['NII6583']]['flux'].flatten().tolist(),
  428.                     ################################################
  429.  
  430.                 }
  431.             )
  432.         )
  433.  
  434.     # generate final dataframe and outputs in a CSV file
  435.     df = pd.concat(df, ignore_index=True)
  436.     df.to_csv('datacubes_retrieve.csv')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement