Source code for turbo_seti.find_event.plot_dat

from os import mkdir
from os.path import dirname
import gc
import logging
logger_plot_event_name = 'plot_dat'
logger_plot_event = logging.getLogger(logger_plot_event_name)
logger_plot_event.setLevel(logging.INFO)

# Plotting packages import
import matplotlib
import matplotlib.pyplot as plt
matplotlib.use('agg')

# Math/Science package imports
import numpy as np
from astropy.time import Time
import pandas as pd

# BL imports
import blimpy as bl
from blimpy.utils import rebin
from . import find_event
from . import plot_event

# preliminary plot arguments
fontsize=16
font = {'family' : 'DejaVu Sans',
'size' : fontsize}
MAX_IMSHOW_POINTS = (4096, 1268)

[docs]def plot_dat(dat_list_string, fils_list_string, candidate_event_table_string, 
                                outdir=None, check_zero_drift=False, alpha=1, color='black', window=None):
    """
    Makes a plot similar to the one produced by 
    plot_candidate_events, but also includes the hits 
    detected, in addition to the candidate signal.
    
    Calls `plot_hit_candidate` and `make_plot`
    
    Arguments
    ----------
    dat_list_string : str
        List of .dat files in the cadence.
    fils_list_string : str
        List of filterbank or .h5 files in the cadence.
    candidate_event_table_string : str
        The string name of a .csv file that contains the
        list of events at a given filter level, created as
        output from find_event_pipeline.py. 
    outdir : str, optional
        Path to the directory where the plots will be saved to. 
        The default is None, which will result in the plots being 
        saved to the directory where the .dat file are located.
    check_zero_drift : bool, optional
         A True/False flag that tells the program whether to
        include hits that have a drift rate of 0 Hz/s. Earth-
        based RFI tends to have no drift rate, while signals
        from the sky are expected to have non-zero drift rates.
        The default is False.
    outdir : str, optional
        Path to the directory where the plots will be saved to. 
        The default is None, which will result in the plots being 
        saved to the directory the .dat file are located.
    alpha : float, optional
        The opacity of the overlayed hit plot. This should 
        be between 0 and 1, with 0 being invisible, and 1
        being the default opacity. This is passed into 
        matplotlib.pyplot function. 
    color : str, optional
        Allows for the specification of the color of the overlayed
        hits. The default is 'black'.
    window : tuple, optional
        Sets the start and stop frequencies of the plot, in MHz.
        The input takes the form of a tuple: (start, stop). And 
        assumes that the start is less than the stop. If given, the 
        resulting plot will range exactly between the start/stop
        frequencies. The default is None, which will result in 
        a plot of the entire range of hits detected.
    """
    #read candidate events into dataframe
    candidate_event_dataframe = pd.read_csv(candidate_event_table_string)
    
    #read in dat files
    dat_file_list = []
    for file in pd.read_csv(dat_list_string, encoding='utf-8', header=None, chunksize=1):
        dat_file_list.append(file.iloc[0,0])
        
    # put all hits into a single dataframe
    all_hits = []
    for dat in dat_file_list:
        frame = find_event.read_dat(dat)
        all_hits.append(frame)
    all_hits_frame = pd.concat(all_hits)
    
    #change the min/max frequency if specified
    if window != None:
        f_min = min(window)
        f_max = max(window)
        keep = np.where((all_hits_frame["Freq"] >= f_min) & (all_hits_frame["Freq"] <= f_max))
        all_hits_frame = all_hits_frame.iloc[keep]
        
        #keep only the candidates within the window 
        keep = np.where((candidate_event_dataframe["Freq"] > f_min) & (candidate_event_dataframe["Freq"] < f_max))
        candidate_event_dataframe = candidate_event_dataframe.iloc[keep]
        
    #remove hits with a drift rate of zero
    if not check_zero_drift:
        keep = np.where(all_hits_frame["DriftRate"] != 0)
        all_hits_frame = all_hits_frame.iloc[keep]
        
    #read in fil files
    fil_file_list = []
    for file in pd.read_csv(fils_list_string, encoding='utf-8', header=None, chunksize=1):
        fil_file_list.append(file.iloc[0,0])
        
    #obtaining source names
    source_name_list = []
    for fil in fil_file_list:
        wf = bl.Waterfall(fil, load_data=False)
        source_name = wf.container.header["source_name"]
        source_name_list.append(source_name)
        print("plot_all_dat: source_name={}".format(source_name))
        
    n_events = len(candidate_event_dataframe)
    if n_events == 0:
        # check to see if there are any hits detected
        if len(all_hits_frame) == 0:
            print("There are no hits in this range. This will make 0 .png files")
        else:
            # plot just the hits as there will be no candidate
            print("This will make 1 .png file")
            plot_hit_candidate(dat_file_list, 
                               fil_file_list, 
                               source_name_list,
                               all_hits_frame,
                               outdir=outdir,
                               check_zero_drift=check_zero_drift, 
                               alpha=alpha, 
                               color=color,
                               window=window)
    else:
        #plot the hits and candidate(s)
        print("This will make %s .png files"%n_events)
        for i in range(n_events):
            candidate = candidate_event_dataframe.iloc[i]
            plot_hit_candidate(dat_file_list, 
                               fil_file_list, 
                               source_name_list,
                               all_hits_frame,
                               candidate,
                               outdir=outdir,
                               check_zero_drift=check_zero_drift, 
                               alpha=alpha, 
                               color=color,
                               window=window)
        
[docs]def plot_hit_candidate(dat_file_list, fil_file_list, source_name_list, all_hits_frame, candidate=None, check_zero_drift=False, 
                 outdir=None, alpha=1, color='black', window=None):
    """ 
    Parameters
    ----------
    dat_file_list : list
        A Python list that contains a series of 
        strings corresponding to the filenames of .dat
        files, each on a new line, that corresponds to 
        the .dat files created when running turboSETI 
        candidate search on the .h5 or .fil files below
    fil_file_list : list
        A Python list that contains a series of 
        strings corresponding to the filenames of .dat
        files, each on a new line, that corresponds to 
        the cadence used to create the .csv file used 
        for event_csv_string.
    source_name_list : list
        A Python list that contains a series of strings
        corresponding to the source names of the 
        cadence in chronological (descending through 
        the plot pannels) cadence.
    all_hits_frame : dict
        A pandas dataframe contining information about 
        all the hits detected. The necessary data 
        includes the start and stop frequencies, the drift
        rate, and the source name. This dataframe is 
        generated in plot_all_hit_and_candidates above. 
    candidate : dict, optional
        A single row from a pandas dataframe containing
        information about one of the candidate signals
        detected. Contains information about the candidate 
        signal to be plotted. The necessary data includes 
        the start and stop frequencies, the drift rate, 
        and the source name. The dataframe the candiate
        comes from is generated in plot_all_hit_and_candidates
        above as `candidate_event_dataframe`. The default is None.
    check_zero_drift : bool, optional
         A True/False flag that tells the program whether to
        include hits that have a drift rate of 0 Hz/s. Earth-
        based RFI tends to have no drift rate, while signals
        from the sky are expected to have non-zero drift rates.
        The default is False.
    outdir : str, optional
        Path to the directory where the plots will be saved to. 
        The default is None, which will result in the plots being 
        saved to the directory the .dat file are located.
    alpha : float, optional
        The opacity of the overlayed hit plot. This should 
        be between 0 and 1, with 0 being invisible, and 1
        being the default opacity. This is passed into 
        matplotlib.pyplot function. 
    color : str, optional
        Allows for the specification of the color of the overlayed
        hits. The default is 'black'.
    window : tuple, optional
        Sets the start and stop frequencies of the plot, in MHz.
        The input takes the form of a tuple: (start, stop). And 
        assumes that the start is less than the stop. The 
        resulting plot will range exactly between the start/stop
        frequencies. The default is None, which will result in 
        a plot of the entire range of hits detected.
    """
    #set plot boundaries based on the contents of the file
    freq_range = np.max(all_hits_frame["Freq"]) - np.min(all_hits_frame["Freq"])
    filter_level = "f0"
    
    # total range all hits fall between 
    f_min = np.min(all_hits_frame["Freq"])
    f_max = np.max(all_hits_frame["Freq"])
    
    
    fil1 = bl.Waterfall(fil_file_list[0], load_data=False)
    t0 = fil1.header["tstart"]
    t_elapsed = Time(fil1.header['tstart'], format='mjd').unix - Time(t0, format='mjd').unix
    bandwidth = 2.4 * abs(freq_range)/1e6 * t_elapsed
    bandwidth = np.max((bandwidth, 500./1e6))
    
    # Get start and stop frequencies based on midpoint and bandwidth
    f_start, f_stop = np.sort((f_min - (bandwidth/2),  f_max + (bandwidth/2)))
    mid_f = 0.5*(f_start + f_stop)
    
    #if given a window to plot in, set boundaries accordingly
    if window is not None:
        f_start = min(window)
        f_stop  = max(window)
        mid_f   = 0.5*(f_start + f_stop) 
    
    # plugging some code from make_waterfall_plots
    global logger_plot_event

    # prepare for plotting
    matplotlib.rc('font', **font)

    # set up the sub-plots
    n_plots = len(fil_file_list)
    fig = plt.subplots(n_plots, sharex=True, sharey=True,figsize=(10, 2*n_plots))

    # get directory path for storing PNG files
    dirpath = dirname(fil_file_list[0]) + '/'

    # read in data for the first panel
    max_load = bl.calcload.calc_max_load(fil_file_list[0])
    print('plot_dat plot_hit_candidate: max_load={} is required for {}'.format(max_load, fil_file_list[0]))
    fil1 = bl.Waterfall(fil_file_list[0], f_start=f_start, f_stop=f_stop, max_load=max_load)
    t0 = fil1.header['tstart']
    dummy, plot_data1 = fil1.grab_data()

    # rebin data to plot correctly with fewer points
    dec_fac_x, dec_fac_y = 1, 1
    if plot_data1.shape[0] > MAX_IMSHOW_POINTS[0]:
        dec_fac_x = plot_data1.shape[0] / MAX_IMSHOW_POINTS[0]
    if plot_data1.shape[1] > MAX_IMSHOW_POINTS[1]:
        dec_fac_y =  int(np.ceil(plot_data1.shape[1] /  MAX_IMSHOW_POINTS[1]))
    plot_data1 = rebin(plot_data1, dec_fac_x, dec_fac_y)
    
    subplots = []
    del fil1, dummy, plot_data1
    gc.collect()
    
    on_source_name = source_name_list[0]
    f_candidate = mid_f
    
    if candidate is not None:
        on_source_name = candidate["Source"]
        f_candidate = candidate["Freq"]
    
    for i, filename in enumerate(fil_file_list):
        subplot = plt.subplot(n_plots, 1, i+1)
        subplots.append(subplot)
        
        #read in the data
        max_load = bl.calcload.calc_max_load(filename)
        print('plot_event make_waterfall_plots: max_load={} is required for {}'.format(max_load, filename))
        wf = bl.Waterfall(filename, f_start=f_start, f_stop=f_stop, max_load=max_load)
        
        this_plot = plot_event.plot_waterfall(wf, 
                                              source_name_list[i], 
                                              f_start, 
                                              f_stop)
        
        make_plot(dat_file_list[i], fil_file_list[i],
                 f_start, f_stop, t0, candidate, check_zero_drift=check_zero_drift, alpha=alpha, color=color)
        
        #more code from make_waterfall_plots
        # Title the full plot
        if i == 0:
            plot_title = "%s \n MJD:%5.5f" % (on_source_name, t0)

            plt.title(plot_title)
        # Format full plot
        if i < len(fil_file_list)-1:
            plt.xticks(np.linspace(f_start, f_stop, num=4), ['','','',''])

        del wf
        gc.collect()        

    # More overall plot formatting, axis labelling
    factor = 1e6
    units = 'Hz'

    #ax = plt.gca()
    #ax.get_xaxis().get_major_formatter().set_useOffset(False)
    xloc = np.linspace(f_start, f_stop, 5)
    xticks = [round(loc_freq) for loc_freq in (xloc - mid_f)*factor]
    if np.max(xticks) > 1000:
        xticks = [xt/1000 for xt in xticks]
        units = 'kHz'
    plt.xticks(xloc, xticks)
    plt.xlabel("Relative Frequency [%s] from %f MHz"%(units,mid_f),fontdict=font)

    # Add colorbar
    cax = fig[0].add_axes([0.94, 0.11, 0.03, 0.77])
    fig[0].colorbar(this_plot,cax=cax,label='Normalized Power (Arbitrary Units)')

    # Adjust plots
    plt.subplots_adjust(hspace=0,wspace=0)

    # save the figures
    if outdir is not None:
        dirpath = outdir
        
    # make note if the plot contains a candidate
    cand = ""
    if candidate is not None:
        cand = "_candidate"
    path_png = dirpath + filter_level + '_' + on_source_name + cand + '_freq_' "{:0.6f}".format(f_candidate) + ".png"
    plt.savefig(path_png, bbox_inches='tight', transparent=False)
    logger_plot_event.debug('make_waterfall_plots: Saved file {}'.format(path_png))

    # close all figure windows
    plt.close('all')

[docs]def make_plot(dat, fil, f_start, f_stop, t0, candidate=None, check_zero_drift=False, alpha=1, color='black'):
    """
    Parameters
    ----------
    dat : str
        The .dat file containing information about the hits detected.
    fil : str
        Filterbank or h5 file corresponding to the .dat file.
    f_start : float
        Start frequency, in MHz.
    f_stop : float
        Stop frequency, in MHz.
    t0 : float
        Start time of the candate event in mjd units.
    candidate : dict, optional
        A single row from a pandas dataframe containing
        information about one of the candidate signals
        detected. Contains information about the candidate 
        signal to be plotted. The necessary data includes 
        the start and stop frequencies, the drift rate, 
        and the source name. The dataframe the candiate
        comes from is generated in plot_all_hit_and_candidates
        above as `candidate_event_dataframe`. The default is None.
    check_zero_drift : bool, optional
         A True/False flag that tells the program whether to
        include hits that have a drift rate of 0 Hz/s. Earth-
        based RFI tends to have no drift rate, while signals
        from the sky are expected to have non-zero drift rates.
        The default is False.
    alpha : float, optional
        The opacity of the overlayed hit plot. This should 
        be between 0 and 1, with 0 being invisible, and 1
        being the default opacity. This is passed into 
        matplotlib.pyplot function. 
    color : str, optional
        Allows for the specification of the color of the overlayed
        hits. The default is 'black'.

    """
    wf = bl.Waterfall(fil, f_start, f_stop)
    hit_frame = find_event.read_dat(dat)
    
    #select just the hits within the frequency range
    if len(hit_frame) > 0:
        keep = np.where((hit_frame['Freq'] > f_start) & (hit_frame['Freq'] < f_stop))
        hit_frame = hit_frame.iloc[keep]
    
    # plot the estimated candidate line 
    if candidate is not None:
        t_elapsed = Time(wf.header['tstart'], format='mjd').unix - Time(t0, format='mjd').unix
        t_duration = (wf.n_ints_in_file - 1) * wf.header['tsamp']
        f_event = candidate["Freq"] + candidate["DriftRate"] / 1e6 * t_elapsed
        drift_rate = candidate["DriftRate"]
        # any mistakes will likely come from this line
        plot_event.overlay_drift(f_event, f_start, f_stop, drift_rate, t_duration)
    
    if len(hit_frame) == 0:
        # there are no hits detected in this dat file 
        return
    
    if not check_zero_drift:
        hit_frame = hit_frame[hit_frame["DriftRate"] != 0]
    
    #f_mid = 0.5 * (f_start + f_stop)
    t_duration = (wf.n_ints_in_file - 1) * wf.header["tsamp"]
    
    #plot all the hits 
    bandwidth = 500./1e6
    half_bandwidth = bandwidth/2.0
    for i in range(len(hit_frame)):
        hit = hit_frame.iloc[i]
        
        f_mid = hit["Freq"]
        drift_rate = hit["DriftRate"]
        f_event = f_mid 
        start, stop = np.sort((f_mid - (half_bandwidth),  f_mid + (half_bandwidth)))
        
        plot_event.overlay_drift(f_event, start, stop, drift_rate, t_duration, offset=0, alpha=alpha, color=color)