"""
Main program module for executable plotSETI.
Facilitates the automation of 2 large functions:
find_event_pipline()
plot_event_pipline()
"""
import sys
import os
import glob
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import matplotlib
from blimpy import __version__ as BLIMPY_VERSION
from turbo_seti.find_event.find_event_pipeline import find_event_pipeline
from turbo_seti.find_event.plot_event_pipeline import plot_event_pipeline
from turbo_seti.find_doppler.turbo_seti_version import TURBO_SETI_VERSION
# This file is in the find_event directory.
# The version file is next door in the find_doppler directory (sibling).
CURDIR = os.path.abspath(os.path.join(__file__, os.pardir))
UPDIR = os.path.abspath(os.path.join(CURDIR, os.pardir))
sys.path.append(UPDIR + "/find_doppler")
# 3 standard intermediate files:
NAME_CSVF = "found_event_table.csv"
NAME_H5_LIST = "list_h5_files.txt"
NAME_DAT_LIST = "list_dat_files.txt"
# Error return code
RETURN_NORMAL = 0
RETURN_ERROR = 1
HELP_EPILOGUE = \
"""
Optional Filtering Parameters
--------------------------------------
The following parameters can be used to prune hits from the dat files,
regardless of the filter threshold value:
* min_drift_rate (Hz/s)
* max_drift_rate (Hz/s)
* min_snr
Filter Threshold (ON-OFF tables)
--------------------------------------
1 : Select all top hits from the DAT files.
2 : Select only those top hits that are in at least one ON file AND not in any OFF files.
3 : Select only those top hits that are in all ON files AND not in any OFF files.
Default: 3.
Complex Cadences (--cadence=complex)
------------------------------------
All input .h5/.dat file pairs where the file header source_name fails to match
the --source_name parameter value are bypassed. In this way, source_name matches are
similar to ON files and non-matches are similar to OFF files.
Using the default --filter_threshold value of 2 means that a top hit must be in
at least one of the matched files to qualify as an event.
Specifying a --filter_threshold value of 3 indicates that a top hit must be in all
matched files to be an event.
Lists of h5 and dat files used internally
-----------------------------------------
Internally, plotSETI uses one text-file-resident list of h5 files
and another for the corresponding dat files.
Normally, these 2 lists are generated internally.
However, if parameter --h5dat_lists is set to 2 file paths separated by spaces
(one text file for h5s, one text file for dats), then those 2 list files:
* Will be checked for existence and consistency.
* Will be used for internal list processing.
E.g. plotSETI --h5dat_lists /dir_a/list_h5_files.txt /b/list_h5_files.txt --out_dir .....
tells plotSETI that there exists a list of h5 files in /dir_a/list_h5_files.txt
and a list of dat files in /dir_b/list_dat_files.txt
If --h5dat_lists is absent (default), plotSETI will internally generate the 2 list files.
"""
[docs]def clean_event_stuff(path_out_dir):
"""
Clean up the output directory of old artifacts.
Parameters
----------
path_out_dir : str
Output path of directory holding old artifacts.
Returns
-------
None.
"""
for deader in glob.glob(f"{path_out_dir}/*.png"):
os.remove(deader)
for deader in glob.glob(f"{path_out_dir}/list*.txt"):
os.remove(deader)
PATH_CSV = f"{path_out_dir}/{NAME_CSVF}"
if os.path.exists(PATH_CSV):
os.remove(PATH_CSV)
[docs]def count_text_lines(path_list_file):
"""
Count the list of text lines in a file.
Parameters
----------
path_list_file : str
Path of file containing a list of text lines..
Returns
-------
int
Count of text lines.
"""
temp_list = open(path_list_file, "r", encoding="utf-8").readlines()
return len(temp_list)
[docs]def make_lists(path_h5_dir, path_h5_list, path_dat_dir, path_dat_list):
"""
Create a list of .h5 files and a list of .dat files.
Parameters
----------
path_h5_dir : str
Directory where the h5 files reside.
path_h5_list : str
Path of output list of h5 files.
path_dat_dir : str
Directory where the dat files reside.
path_dat_list : str
Path of output list of dat files.
Returns
-------
int
Number in cadence : Success.
0 : Failure.
"""
N_dat = N_h5 = 0
print(f"plotSETI: Directory of h5 files: {path_h5_dir}")
print(f"plotSETI: Directory of dat files: {path_dat_dir}")
# Make a list of the h5 files.
with open(path_h5_list, "w", encoding="utf-8") as fh_h5:
for path_h5 in sorted(glob.glob("{}/*.h5".format(path_h5_dir))):
N_h5 += 1
fh_h5.write("{}\n".format(path_h5))
if N_h5 < 1:
print("\n*** plotSETI: No h5 files found!")
return 0
print(f"plotSETI: Found {N_h5} h5 files.")
# Make a list of the dat files.
with open(path_dat_list, "w", encoding="utf-8") as fh_dat:
for path_dat in sorted(glob.glob("{}/*.dat".format(path_dat_dir))):
N_dat += 1
fh_dat.write("{}\n".format(path_dat))
if N_dat < 1:
print("\n*** plotSETI: No dat files found!")
return 0
print(f"plotSETI: Found {N_dat} dat files.")
# Make sure that the lists are of the same size.
if N_h5 != N_dat:
print("\n*** plotSETI: Count of dat files must = count of h5 files!")
return 0
return N_h5
[docs]def main(args=None):
"""
This is the entry point to the plotSETI executable.
Parameters
----------
args : dict
"""
# Create an option parser to get command-line input/arguments
parser = ArgumentParser(description="plotSETI - post-search event-plot utility, version {}."
.format(TURBO_SETI_VERSION),
formatter_class=RawDescriptionHelpFormatter,
epilog=HELP_EPILOGUE)
parser.add_argument("h5_dir", type=str, default="", nargs="?",
help="Path to the directory holding the set of .h5 files")
parser.add_argument("-d", "--dat_dir", dest="dat_dir", type=str, default=None,
help="Path to the directory holding the set of .dat files. Default: h5_path. ")
parser.add_argument("-o", "--out_dir", dest="out_dir", type=str, default="./",
help="Path to the output directory. Default: current directory (.).")
parser.add_argument("--h5dat_lists", type=str, nargs="+", required=False,
help="User-supplied paths to lists of h5 files and dat files. Default: None supplied; will be internally-generated.")
parser.add_argument("-f", "--filter_threshold", dest="filter_threshold", type=int,
choices=[1, 2, 3], default=3,
help="Specification for how strict the top hit filtering will be.")
parser.add_argument("-z", "--plot_offset", dest="plot_offset", default=False, action="store_true",
help="Plot offset lines from from signal? Default:")
parser.add_argument("-s", "--snr_threshold", dest="snr_threshold", default=None,
help="The SNR below which signals will be discarded.")
parser.add_argument("-m", "--min_drift_rate", dest="min_drift_rate", default=None,
help="The minimum drift rate below which signals will be discarded.")
parser.add_argument("-M", "--max_drift_rate", dest="max_drift_rate", default=None,
help="The maximum drift rate above which signals will be discarded.")
parser.add_argument("-c", "--cadence", dest="cadence", type=str,
choices=["on", "off", "complex"], default="on",
help="Input file cadence FIRST file: on source, off source, complex cadence. Default: on.")
parser.add_argument("-n", "--source_name", dest="source_name", type=str, default="",
help="Complex cadence source name. Don't set this for on or off cadences.")
parser.add_argument("-e", "--erase_old_files", dest="erase_old", default=True, action="store_true",
help="Erase pre-existing *.png, *.csv, and list*.txt files in the output directory. Default: False.")
parser.add_argument("-v", "--version", dest="show_version", default=False, action="store_true",
help="Show the turbo_seti and blimpy versions and exit.")
parser.add_argument("--debug", default=False, action="store_true",
help="Turn on debug tracing (developer).")
if args is None:
args = parser.parse_args()
else:
args = parser.parse_args(args)
if args.show_version:
print("turbo_seti: {}".format(TURBO_SETI_VERSION))
print("blimpy: {}".format(BLIMPY_VERSION))
return RETURN_NORMAL
if args.h5_dir == "":
print("\nThe .h5 directory must be specified!\n")
os.system("plotSETI -h")
return RETURN_NORMAL
if not os.path.exists(args.h5_dir):
print("\nThe .h5 directory {} does not exist!\n".format(args.h5_dir))
return RETURN_ERROR
if args.dat_dir is None:
args.dat_dir = args.h5_dir
return execute_pipelines(args)
[docs]def execute_pipelines(args):
"""
Interface to the pipeline functions, called by main().
Parameters
----------
args : dict
"""
# Setup some parameter values for find_event_pipeline().
if args.cadence == "complex":
complex_cadence = True
if len(args.source_name) < 1:
print("\n*** plotSETI: Complex cadence requires a source_name.")
sys.exit(RETURN_ERROR)
else:
complex_cadence = False
if args.cadence == "on":
first_file = "ON"
else:
first_file = "OFF"
h5_dir = os.path.abspath(args.h5_dir) + "/"
dat_dir = os.path.abspath(args.dat_dir) + "/"
out_dir = os.path.abspath(args.out_dir) + "/"
if not os.path.exists(out_dir):
os.mkdir(out_dir)
if args.plot_offset:
offset="auto"
else:
offset=0
# Establish output pathnames,
path_csvf = out_dir + NAME_CSVF
clean_event_stuff(out_dir)
# Make the h5 and dat lists.
# Default to auto-generation?
if args.h5dat_lists is None:
SZ_user_list = 0
else:
SZ_user_list = len(args.h5dat_lists)
if args.debug:
print(f"DEBUG h5dats_list: #{SZ_user_list} {args.h5dat_lists}")
if SZ_user_list == 0: # Default to auto-generation.
path_h5_list = out_dir + NAME_H5_LIST
path_dat_list = out_dir + NAME_DAT_LIST
number_in_cadence = make_lists(h5_dir, path_h5_list, dat_dir, path_dat_list)
if number_in_cadence == 0:
return RETURN_ERROR
else: # User-specified lists
if SZ_user_list != 2:
print(f"\n*** plotSETI: h5dat_lists had {SZ_user_list} elements; must be 2 (one for h5 and one for dat)!")
return RETURN_ERROR
if args.h5dat_lists[0] is None or args.h5dat_lists[1] is None:
print(f"\n*** plotSETI: h5dat_lists had {SZ_user_list} elements; must be 2 (one for h5 and one for dat)!")
return RETURN_ERROR
# Check the list of h5 files.
path_h5_list = args.h5dat_lists[0]
if not os.path.exists(path_h5_list):
print(f"\n*** plotSETI: File {path_h5_list} does not exist!")
return RETURN_ERROR
N_h5 = count_text_lines(path_h5_list)
print(f"plotSETI: Found {N_h5} h5 files.")
# Check the list of dat files.
path_dat_list = args.h5dat_lists[1]
if not os.path.exists(path_dat_list):
print(f"\n*** plotSETI: File {path_dat_list} does not exist!")
return RETURN_ERROR
N_dat = count_text_lines(path_dat_list)
print(f"plotSETI: Found {N_dat} dat files.")
# Make sure that the lists are of the same size.
if N_h5 != N_dat:
print("\n*** plotSETI: Count of dat files must = count of h5 files!")
return RETURN_ERROR
number_in_cadence = N_h5
# Run find_event_pipeline()
if complex_cadence:
df_check = find_event_pipeline(path_dat_list,
path_h5_list,
filter_threshold = args.filter_threshold,
number_in_cadence = number_in_cadence,
on_source_complex_cadence=args.source_name,
sortby_tstart=True,
check_zero_drift=False,
SNR_cut=args.snr_threshold,
min_drift_rate=args.min_drift_rate,
max_drift_rate=args.max_drift_rate,
user_validation=False,
csv_name=path_csvf,
saving=True)
else: # not a complex cadence
df_check = find_event_pipeline(path_dat_list,
path_h5_list,
filter_threshold = args.filter_threshold,
number_in_cadence = number_in_cadence,
on_source_complex_cadence=False,
on_off_first=first_file,
sortby_tstart=True,
check_zero_drift=False,
SNR_cut=args.snr_threshold,
min_drift_rate=args.min_drift_rate,
max_drift_rate=args.max_drift_rate,
user_validation=False,
csv_name=path_csvf,
saving=True)
if df_check is None:
print("\n*** plotSETI: No events produced in find_event_pipeline()!")
return RETURN_ERROR
# Make the plots for all of the HDF5/DAT file pairs in batch mode.
matplotlib.use("agg", force=True)
plot_event_pipeline(path_csvf,
path_h5_list,
plot_dir=out_dir,
filter_spec=args.filter_threshold,
offset=offset,
user_validation=False)
print(f"\nplotSETI: Plots are stored in directory {out_dir}.")
return RETURN_NORMAL
if __name__ == "__main__":
# Start the show!
main()