import os
import glob
import re
import json
import numpy as np
from astropy.io import fits
import desiutil.log
import desisurvey.etc
import desisurvey.tiles
import desisurvey.ephem
import astropy.table
log = desiutil.log.get_logger()
[docs]
class subslices:
"Iterator for looping over subsets of an array"
def __init__(self, data, uind=None):
if uind is None:
self.uind = np.unique(data, return_index=True)[1]
else:
self.uind = uind.copy()
self.ind = 0
self.length = len(data)
def __iter__(self):
return self
def __len__(self):
return len(self.uind)
def __next__(self):
if self.ind == len(self.uind):
raise StopIteration
if self.ind == len(self.uind)-1:
last = self.length
else:
last = self.uind[self.ind+1]
first = self.uind[self.ind]
self.ind += 1
return first, last
def next(self):
return self.__next__()
[docs]
def cull_old_files(files, start_from):
"""Return only subset of files with EXPID larger than any EXPID in
start_from.
"""
expid = np.array([int(os.path.basename(f)[5:13]) for f in files])
# extract just the expid; better to do this with a regex, but...
maxexpid = np.max(start_from['EXPID'])
return [f for (f, e) in zip(files, expid) if e > maxexpid]
[docs]
def scan_directory(dirname, start_from=None,
offlinedepth=None, mtldone=None,
offlinetiles=None, badexp=None):
"""Scan directory for spectra with ETC statistics to collect.
Parameters
----------
dirname : str
directory path to scan. All fits files under dirname are searched
for ETC statistics.
This needs to be updated to ~DESI files only, with more care given
to where these keywords are actually found.
start_from : str
etc_stats file to start from. Nights already in the etc_stats file
will not be collected. If a YYYMMDD string, look for etc_stats file
in DESISURVEY_OUTPUT/YYYYMMDD/etc_stats-{YYYYMMDD}.fits
None indicates starting fresh.
offlinedepth : str
offline depth file to use. Fills out donefracs according to
R_DEPTH in the file, plus config.nominal_exposure_time
offlinetiles : str
offline tile completeness file to use. Fills out obsstatus according
to OBSSSTATUS in the file.
mtldone : str
mtl done file to use. Fills out done status according to presence
in mtl done file.
badexp : str
bad exposure file to use. EXPID marked bad in this file are also
marked bad in the exposures file.
"""
log.info('Scanning {} for desi exposures...'.format(dirname))
if start_from is None:
files = glob.glob(os.path.join(dirname, '**/desi*.fits.fz'),
recursive=True)
start_exps = None
else:
files = []
subdirs = os.listdir(dirname)
subdirs = np.sort(subdirs)[::-1]
if os.path.exists(start_from):
fn = start_from
else:
fn = os.path.join(os.environ['DESISURVEY_OUTPUT'], start_from)
if not os.path.exists(fn):
log.error('Could not find file {} to start from!'.format(
start_from))
return
start_exps = astropy.table.Table.read(fn)
# painful, fragile invocations to make sure that columns
# aren't truncated at fewer characters than we might want.
obstype = np.zeros(len(start_exps), dtype='U20')
obstype[:] = start_exps['OBSTYPE']
start_exps['OBSTYPE'] = obstype
program = np.zeros(len(start_exps), dtype='U20')
program[:] = start_exps['PROGRAM']
start_exps['PROGRAM'] = program
quality = np.zeros(len(start_exps), dtype='U20')
quality[:] = start_exps['QUALITY']
start_exps['QUALITY'] = quality
comments = np.zeros(len(start_exps), dtype='U80')
comments[:] = start_exps['COMMENTS']
m = start_exps['COMMENTS'].mask
comments[m] = ''
start_exps['COMMENTS'] = comments
maxexpid = np.max(start_exps['EXPID'])
for subdir in subdirs:
if not os.path.isdir(os.path.join(dirname, subdir)):
continue
files0 = glob.glob(os.path.join(dirname, subdir,
'**/desi*.fits.fz'))
expids = [re.findall(r'-(\d+)\.', os.path.basename(f)) for f
in files0]
if len(expids) == 0:
log.info('No desi exposures on night {}'.format(subdir))
continue
expids = [int(expid[0]) for expid in expids if len(expid) > 0]
files += files0
if min(expids) <= maxexpid:
break
files = cull_old_files(files, start_exps)
log.info(('Found {} new raw spectra, extracting header ' +
'information...').format(len(files)))
exps = np.zeros(len(files), dtype=[
('NIGHT', 'U8'), ('TILEID', 'i4'), ('EXPID', 'i4'),
('OBSTYPE', 'U20'), ('PROGRAM', 'U20'), ('EXPTIME', 'f4'),
('EFFTIME_ETC', 'f4'), ('EFFTIME_SPEC', 'f4'), ('EFFTIME', 'f4'),
('GOALTIME', 'f4'),
('QUALITY', 'U20'), ('COMMENTS', 'U80')])
for i, fn in enumerate(files):
if (i % 1000) == 0:
log.info('Extracting headers from file {} of {}...'.format(
i+1, len(files)))
hdr = {}
for ename in ['SPEC', 'SPS']:
try:
hdr = fits.getheader(fn, ename)
break
except Exception:
continue
hasrequest = os.path.exists(
fn.replace('desi-', 'request-').replace('.fits.fz', '.json'))
etcfn = fn.replace('desi-', 'etc-').replace('.fits.fz', '.json')
hasetc = os.path.exists(etcfn)
etctime = -1
if hasetc:
try:
etc = json.load(open(etcfn))
etctime = etc['accum']['efftime'][-1]
except Exception as e:
print('Exception reading file ', etcfn, e)
exps['NIGHT'][i] = hdr.get('NIGHT', -1)
exps['TILEID'][i] = hdr.get('TILEID', -1)
exps['EXPID'][i] = hdr.get('EXPID', -1)
exps['OBSTYPE'][i] = str(hdr.get('OBSTYPE', '')).upper().strip()
exps['PROGRAM'][i] = str(hdr.get('PROGRAM', '')).strip()
exps['EXPTIME'][i] = hdr.get('EXPTIME', -1)
exps['EFFTIME_ETC'][i] = etctime
exps['EFFTIME_SPEC'][i] = -1
exps['GOALTIME'][i] = hdr.get('GOALTIME', -1)
exps['QUALITY'][i] = 'good' if hasrequest else 'bad'
exps['COMMENTS'][i] = ''
exps['EFFTIME'] = -1
exptime_clipped = np.where(exps['EXPTIME'] < 60.0, 0, exps['EXPTIME'])
exps['EFFTIME'] = np.where(exps['EFFTIME_ETC'] >= 0, exps['EFFTIME_ETC'],
exptime_clipped)
obstypes = np.array([f.upper().strip() for f in exps['OBSTYPE']],
dtype=exps['OBSTYPE'].dtype)
m = (exps['TILEID'] == -1) | (obstypes != 'SCIENCE')
if np.any(m):
log.info('Ignoring {} files due to weird OBSTYPE or TILEID'.format(
np.sum(m)))
exps = exps[~m]
exps = exps[np.argsort(exps['EXPID'])]
if start_exps is not None:
exps = astropy.table.vstack([start_exps, astropy.table.Table(exps)])
if offlinedepth is not None:
exps = update_donefrac_from_offline(exps, offlinedepth)
# replace EFFTIME with EFFTIME_SPEC where available
exps['EFFTIME'] = np.where(exps['EFFTIME_SPEC'] < 0,
exps['EFFTIME'], exps['EFFTIME_SPEC'])
if badexp is not None:
exps = mark_bad_expsosures(exps, badexp)
quality = np.array([q.strip().lower() for q in exps['QUALITY']])
exps['EFFTIME'][quality == 'bad'] = 0
ntiles = len(np.unique(exps['TILEID']))
tiles = np.zeros(ntiles, dtype=[
('TILEID', 'i4'), ('PROGRAM', 'U20'), ('EFFTIME', 'f4'),
('DONEFRAC', 'f4'),
('NOBS', 'i4'), ('OFFLINE_DONE', 'bool'), ('MTL_DONE', 'bool'),
('MTL_DONE_ZDATE', 'i4')])
s = np.argsort(exps['TILEID'])
nomtimefa = np.zeros(len(tiles), dtype='f4')
for i, (f, l) in enumerate(subslices(exps['TILEID'][s])):
ind = s[f:l]
tiles['TILEID'][i] = exps['TILEID'][ind[0]]
tiles['EFFTIME'][i] = np.sum(np.clip(
exps['EFFTIME'][ind], 0, np.inf))
# intentionally include all exposures here, not just good ones
# this way, if a tile gets observed, we know we can't redesign it
# later; we need to cut a new TILEID instead.
tiles['NOBS'][i] = len(ind)
nomtimefa[i] = np.median(exps['GOALTIME'][ind])
if np.any(np.abs(exps['GOALTIME'][ind] - nomtimefa[i]) > 1):
log.warning('Inconsistent GOALTIME on tile ', tiles['TILEID'][i])
if offlinetiles is not None:
tiles = update_offlinetiles(tiles, offlinetiles)
if mtldone is not None:
tiles = update_mtldone(tiles, mtldone)
tob = desisurvey.tiles.get_tiles()
idx, mask = tob.index(tiles['TILEID'], return_mask=True)
tiles['PROGRAM'] = 'UNKNOWN'
tiles['PROGRAM'][mask] = [tob.tileprogram[i] for i in idx[mask]]
nomtimetf = desisurvey.tiles.get_nominal_program_times(
tiles['PROGRAM'])
nomtime = np.where(nomtimefa > 0, nomtimefa, nomtimetf)
tiles['DONEFRAC'] = tiles['EFFTIME'] / nomtime
s = np.argsort(exps['EXPID'])
exps = exps[s]
return tiles, exps
[docs]
def write_exp(exps, fn):
"""Write tile & exposure ETC statistics from numpy objects.
Parameters
----------
tiles : array
tile ETC statistics
exps : array
exposure ETC statistics
"""
tab = astropy.table.Table(exps)
tab['NIGHT'].description = 'YYYYMMDD'
tab['TILEID'].description = 'Tile ID'
tab['EXPID'].description = 'Exposure ID'
tab['OBSTYPE'].description = 'Exposure OBSTYPE'
tab['EXPTIME'].description = 'Exposure time'
tab['EXPTIME'].format = '%9.3f'
tab['EFFTIME_ETC'].description = 'ETC effective exposure time'
tab['EFFTIME_ETC'].format = '%9.3f'
tab['EFFTIME_SPEC'].description = 'Effective time from offline pipeline.'
tab['EFFTIME_SPEC'].format = '%7.3f'
tab['EFFTIME'].description = 'Adopted effective time.'
tab['EFFTIME'].format = '%7.3f'
tab['QUALITY'].description = 'Exposure quality'
tab['COMMENTS'].description = 'Additional comments'
tab['GOALTIME'].description = 'Goal effective exposure time'
tab['GOALTIME'].format = '%6.1f'
tab['PROGRAM'].description = 'PROGRAM of exposure'
tab.write(fn, overwrite=True)
def convert_fits(fits):
outdtype = fits.dtype.descr
newdtype = []
for field in outdtype:
newfield = tuple(field)
if 'S' in newfield[-1]:
newfield = newfield[:-1] + (newfield[-1].replace('S', 'U'),)
newdtype += [newfield]
out = np.zeros(len(fits), dtype=newdtype)
for field in fits.dtype.names:
out[field] = fits[field]
return out
[docs]
def get_conditions(mjd):
"""Determine DARK/GRAY/BRIGHT for a set of exposures.
Parameters
----------
mjd: array of mjds to query
Returns
-------
conditions: array of strings DARK, GRAY, BRIGHT, UNKNOWN
"""
tiles = desisurvey.tiles.get_tiles()
ephem = desisurvey.ephem.get_ephem()
# taken in 2019 or later, with known MJD---removes some test exposures
okmjd = np.isfinite(mjd) & (mjd > 58484)
nights = ephem.table
indices = np.repeat(np.arange(len(nights)), 2)
startstop = np.concatenate([[night['brightdusk'], night['brightdawn']]
for night in nights])
nightind = np.zeros(len(mjd), dtype='f8')
nightind[~okmjd] = -1
nightind[okmjd] = np.interp(mjd[okmjd], startstop, indices)
# a lot of exposures apparently taken during the day?
# We should mark these as belonging to the "day" program?
mday = nightind != nightind.astype('i4')
if np.sum(mday & okmjd) > 0:
log.info('%d/%d exposures were taken during the day.' %
(np.sum(mday & okmjd), np.sum(okmjd)))
nightind = nightind.astype('i4')
conditions = []
for mjd0, night in zip(mjd[~mday & okmjd],
nights[nightind[~mday & okmjd]]):
nighttimes = np.concatenate([[night['brightdusk'], night['dusk']],
night['changes'][night['changes'] != 0],
[night['dawn'], night['brightdawn']]])
nightprograms = np.concatenate([
[tiles.CONDITION_INDEX['BRIGHT']],
night['programs'][night['programs'] != -1],
[tiles.CONDITION_INDEX['BRIGHT']]])
if len(nightprograms) != len(nighttimes)-1:
raise ValueError('number of program changes does not match '
'number of programs!')
condind = np.interp(mjd0, nighttimes,
np.arange(len(nighttimes)))
condition = nightprograms[condind.astype('i4')]
conditions.append(condition)
out = np.zeros(len(mjd), dtype='i4')
out[:] = -1
out[~mday & okmjd] = conditions
# this program index is ~backwards from OBSCONDITIONS.
newout = np.full(len(out), 'UNKNOWN', dtype='U8')
for condition in tiles.CONDITIONS:
m = out == tiles.CONDITION_INDEX[condition]
newout[m] = condition
return newout
def number_per_night(exps, nightly_donefrac_requirement=0.5):
tiles = desisurvey.tiles.get_tiles()
m = exps['EXPTIME'] > 30
exps = exps[m]
s = np.argsort(exps['TILEID'])
out = np.zeros(len(np.unique(exps['TILEID'])),
dtype=[('TILEID', 'i4'), ('NEXP', 'i4'),
('NNIGHT', 'f4')])
programs = np.zeros(len(exps), dtype='U20')
programs[:] = 'UNKNOWN'
ind, mask = tiles.index(exps['TILEID'], return_mask=True)
programs[mask] = tiles.tileprogram[ind[mask]]
nomtimes = desisurvey.tiles.get_nominal_program_times(programs)
efftimes = np.clip(exps['EFFTIME'], 0, np.inf)
donefrac = efftimes / nomtimes
for i, (f, l) in enumerate(subslices(exps['TILEID'][s])):
ind = s[f:l]
out['TILEID'][i] = exps['TILEID'][ind[0]]
out['NEXP'][i] = np.sum(m)
nights = exps['NIGHT'][ind]
for night in np.unique(nights):
m = nights == night
totaldonefrac = np.sum(
np.clip(donefrac[ind[m]], 0, np.inf))
if totaldonefrac > nightly_donefrac_requirement:
out['NNIGHT'][i] += 1
else:
out['NNIGHT'][i] += 0.1
# still gets credit for having been started
return out
def update_donefrac_from_offline(exps, offlinefn):
offline = fits.getdata(offlinefn)
tiles = desisurvey.tiles.get_tiles()
tileprogram = tiles.tileprogram
tileprograms = np.zeros(len(exps), dtype='U80')
mt, me = desisurvey.utils.match(tiles.tileID, exps['TILEID'])
tileprograms[:] = 'UNKNOWN'
tileprograms[me] = [p.strip() for p in tileprogram[mt]]
me, mo = desisurvey.utils.match(exps['EXPID'], offline['EXPID'])
nomtimes, timetypes = desisurvey.tiles.get_nominal_program_times(
tileprograms[me], return_timetypes=True)
uofflineprograms = np.unique(tileprograms[me])
unknown = []
config = desisurvey.config.Configuration()
for p in uofflineprograms:
if p not in config.programs.keys:
unknown.append(p)
log.warning('Unknown programs '+', '.join(unknown)+', using DARK '
'effective time.')
offlinetimetypes = np.zeros(len(offline), dtype='U80')
offlinetimetypes[:] = 'DARK'
offlinetimetypes[mo] = timetypes
offline_eff_time = offline['EFFTIME_SPEC'].copy()
mbad = ~np.isfinite(offline_eff_time)
if np.any(mbad[mo]):
badrecords = offline[mo][mbad[mo]]
log.warning('SOME EXPOSURES HAVE NaN EFFTIME, USING ZERO!!')
log.warning(' '.join([str(x) for x in badrecords['TILEID']]))
offline_eff_time[mbad] = 0
if ((len(np.unique(exps['EXPID'])) != len(exps)) or
(len(np.unique(offline['EXPID'])) != len(offline))):
raise ValueError('weird duplicate EXPID in exps or offline')
exps = exps.copy()
exps['EFFTIME_SPEC'][me] = offline_eff_time[mo]
# this cut should probably be np.isin(exps['TILEID'], tiles.tileid)
# instead of the selection on tile number; the goal is just to avoid
# printing a warning message for non "main" survey tiles that we're
# actually trying to complete.
m = ((exps['TILEID'] > 0) &
((exps['TILEID'] < 70000) | (exps['TILEID'] >= 100000)) &
(exps['EFFTIME_SPEC'] < 0) &
(exps['EFFTIME_ETC'] > 0))
# exposures where we're relying on the EFFTIME_ETC rather than
# EFFTIME_SPEC.
if np.any(m):
log.warning(f'Some ({np.sum(m)}) exposures are missing '
'offline effective times.')
errmsg = 'List of nights with tiles with exposures with missing times:\n'
for night in np.unique(exps['NIGHT'][m]):
m2 = exps['NIGHT'] == night
errmsg += f'{night} ({np.sum(m & m2)}): '
for tileid in np.unique(exps['TILEID'][m & m2]):
m3 = exps['TILEID'] == tileid
tilestr = f'{tileid} (%s), ' % ' '.join(
[str(x) for x in exps['EXPID'][m & m3]])
errmsg += tilestr
errmsg += '\n'
log.warning(errmsg)
return exps
def update_offlinetiles(tiles, offlinetilesfn):
otiles = astropy.table.Table.read(offlinetilesfn)
mt, mo = desisurvey.utils.match(tiles['TILEID'], otiles['TILEID'])
tiles = tiles.copy()
tiles['OFFLINE_DONE'] = False
ostatus = np.array([s.strip().lower() for s in otiles['OBSSTATUS']])
offlinedone = (ostatus == 'obsend') | (ostatus == 'done')
tiles['OFFLINE_DONE'][mt] = offlinedone[mo]
return tiles
def update_mtldone(tiles, mtldonefn):
mtldone = astropy.table.Table.read(mtldonefn)
mt, mm = desisurvey.utils.match(tiles['TILEID'], mtldone['TILEID'])
tiles = tiles.copy()
tiles['MTL_DONE'] = False
tiles['MTL_DONE'][mt] = True
tiles['MTL_DONE_ZDATE'][:] = 0
tiles['MTL_DONE_ZDATE'][mt] = [int(x) for x in mtldone['ZDATE'][mm]]
usedmtl = np.zeros(len(mtldone), dtype='i4')
usedmtl[mm] = 1
nunused = np.sum(usedmtl == 0)
if nunused > 0:
log.debug('MTLs completed for {} unknown tiles?'.format(nunused))
return tiles
def mark_bad_expsosures(exps, badexp):
badexp = astropy.table.Table.read(badexp)
me, mb = desisurvey.utils.match(exps['EXPID'], badexp['EXPID'])
ok = (badexp['BAD'] == 'True') | (badexp['BAD'] == 'False')
if ~np.all(ok):
log.warning('Weird entries in badexp file, ignoring!')
log.warning(' '.join(np.unique(badexp['BAD'][~ok])))
m = badexp[mb]['BAD'] == 'True'
exps['QUALITY'][me[m]] = 'bad'
return exps
[docs]
def read_exp(fn):
"""Read tile & exposure ETC statistics file from filename.
This function works around some fits string issues in old versions
of astropy.
Parameters
----------
fn : str
Returns
-------
tiles, exps
numpy arrays for the tile and exposure ETC files
"""
return astropy.table.Table.read(fn)
def parse(options=None):
import argparse
parser = argparse.ArgumentParser(
description='Collect ETC statistics from spectra.',
epilog='EXAMPLE: %(prog)s directory outfile')
parser.add_argument('directory', type=str,
help='directory to scan for spectra')
parser.add_argument('outfile', type=str,
help='file to write out')
parser.add_argument('--start_from', type=str, default=None,
help='etc_stats file to start from')
parser.add_argument('--offlinedepth', type=str,
help='offline depth file to use')
parser.add_argument('--offlinetiles', type=str,
help='offline tile file to use')
parser.add_argument('--mtldone', type=str,
help='mtl done file to use')
parser.add_argument('--badexp', type=str,
help='bad exposure file to use')
parser.add_argument('--config', type=str, default=None,
help='configuration file to use')
if options is None:
args = parser.parse_args()
else:
args = parser.parse_args(options)
return args
def main(args):
import desisurvey.config
_ = desisurvey.config.Configuration(args.config)
res = scan_directory(args.directory, start_from=args.start_from,
offlinedepth=args.offlinedepth,
offlinetiles=args.offlinetiles,
mtldone=args.mtldone, badexp=args.badexp)
if res is not None:
tiles, exps = res
write_exp(exps, args.outfile)
else:
raise ValueError('Could not collect ETC files.')