Untitled

# coding: utf-8

# I used this notebook to establish the routines ...

# ## Read output from BNT (.*mat*) and write structure into database
from __future__ import print_function # Python 2.x

import sys
import os
import time
from villus_helpers.BNT_helpers import *

# base params:
BNT_mat = r'N:\horsto\klusta_pipeline\villus\EXPORT\auto_input_file_vars.mat'
BNT_mat = "/".join(BNT_mat.split("\\"))

if not os.path.isfile(BNT_mat):
    print('Not a file! Trying to convert to linux compatible format...\n')
    # change to linux compatible format ...
    BNT_mat = "\\mnt\\N" + "".join(BNT_mat.split("N:"))
    BNT_mat =  "/".join(BNT_mat.split('\\'))

    if not os.path.isfile(BNT_mat):
        print('Not a file: {}\n'.format(BNT_mat))
        sys.exit()

import argparse
import json
import numpy as np
import math
import pandas as pd
import psycopg2
import cPickle

import sys
import os
from fnmatch import fnmatch
import h5py

from scipy.io import *

from tqdm import tqdm


from villus_helpers.general import *
from villus_helpers.psql_start import *
from villus_helpers.write2tables import *
# register pickle type to retrieve binary data from database
psycopg2.extensions.register_type(
    psycopg2.extensions.new_type(
        psycopg2.BINARY.values, 'BINARY-PICKLE', cast_pickle))


# Get current path:
script_path = os.path.dirname(sys.argv[0])
script_path = "/".join(script_path.split("\\"))
# get parameters to connect to database:
params = config(script_path,'postgresql','hotte')
db_status = test_connect(params)
if db_status == False:
    print('\n\n\n\nCould not establish database link. Skipping all psql connects.\n\n\n\n')


# check time when file was created / modified (in database_helpers.BNT_helpers):
status_mat_file = False
while not status_mat_file:
    size_mod = os.path.getsize(BNT_mat)
    time.sleep(10) # wait 30 seconds...
    size_mod_new = os.path.getsize(BNT_mat)
    if size_mod == size_mod_new:
        print('File size did not change - loading file!')
        status_mat_file = True
    else:
        print('File size has changed ... waiting another 30 seconds ...')
        status_mat_file = False

# read .mat
BNT_mat_dict = loadmat(BNT_mat,struct_as_record=True)


all_purged_screen = []
all_purged_session = []
all_purged_spikes = []

print('Purging tetrodes and sessions from database ...')
for no in tqdm(xrange(len(BNT_mat_dict['allStatistics'][0]))):

    #print('\n\nAnalyzing {}/{} (File {})'.format(no+1,len(BNT_mat_dict['allStatistics'][0])," - ".join(BNT_mat.split("/")[-2:])))
    path_name = str(BNT_mat_dict['allStatistics'][0][no]['Path'][0])
    path_name = "/".join(path_name.split("\\"))
    if not os.path.isdir(path_name):
        # change to linux compatible format ...
        if 'N:' in path_name:
            path_name = "\\mnt\\N" + "".join(path_name.split("N:"))
            path_name =  "/".join(path_name.split('\\'))
        elif 'L:' in path_name:
            path_name = "\\mnt\\L" + "".join(path_name.split("L:"))
            path_name =  "/".join(path_name.split('\\'))
        else:
            print('Sorry, drive not found - check folder name constructor ({})'.format(path_name))
            sys.exit()
    session_name = str(BNT_mat_dict['allStatistics'][0][no]['Basename'][0])
    tetrode_no = str(BNT_mat_dict['allStatistics'][0][no]['Tetrode'][0][0])
    cluster_no = str(BNT_mat_dict['allStatistics'][0][no]['Cell'][0][0])
    animal_id,n_drive_user,status,session_ts,analysis_ts = distill_params_root_table_BNT(path_name,session_name)

    # Purge from tetrodes_tb_bnt and sessions_tb_bnt
    # For tetrodes_tb_bnt I need:
    # (tetrode_no, n_drive_user, animal_id, session_ts)

    # for sessions_tb_bnt I need:
    # (session_name, n_drive_user, animal_id, session_ts)

    deleted_screen,deleted_session,deleted_spikes = purge_entries_bnt(animal_id,n_drive_user,session_ts,tetrode_no,session_name,params)
    #print(purged_t,purged_s)
    if deleted_screen: all_purged_screen.append(deleted_screen[0])
    if deleted_spikes: all_purged_spikes.append(deleted_spikes[0])
    if deleted_session: all_purged_session.append(deleted_session[0])

print('Done. ({} Screen(s), {}Spike(s) and {} Session(s))'.format(len(all_purged_screen),len(all_purged_spikes),len(all_purged_session)))


# ### Prepare writing to database ...

all_scores_labels = {'informationContentWholeRecording':'InformationContent_bnt','hdPeakRateWholeRecording':'tc_stats_hd_peakrate_bnt',
                'gridStats_OrientationWholeRecording':'gridstats_orientation_bnt','numFieldsWholeRecording':'numFields_bnt',
                'meanDirectionWholeRecording':'tc_stats_mean_direction_bnt','meanRateOutsideFieldsWholeRecording':'meanrate_outside_fields_bnt',
                'gridScoreWholeRecording':'grid_score_bnt','peakRateWholeRecording':'peak_rate_bnt',
                 'gridStats_SpacingWholeRecording':'gridstats_spacing_bnt','borderScoreWholeRecording':'borderscore_bnt',
                'informationRateWholeRecording':'InformationRate_bnt','speedScore':'speedscore_bnt',
                 'peakDirectionWholeRecording':'tc_stats_peakdirection_bnt','mvlWholeRecording':'tc_stats_mvl_bnt',
                'coherenceWholeRecording':'coherence_bnt','gridStats_EllipseWholeRecording':'gridstats_ellipse_bnt',
                'fieldMainWholeRecording':'fieldmain_bnt','sparsityWholeRecording':'sparsity_bnt',
                'gridStats_EllipseThetaWholeRecording':'gridstats_ellipse_theta_bnt','calbindinWholeRecording':'calbindin_bnt',
                'thetaStrengthWholeRecording':'theta_strength_bnt','selectivityWholeRecording':'selectivity_bnt',
                'meanRateWholeRecording':'mean_rate_bnt','thetaMeanPhaseWholeRecording':'theta_mean_phase_bnt','stabilityHalfWholeRecording':'stability_half_bnt',
                'angularStabilityWholeRecording':'angular_stability_bnt','calbindinDistanceWholeRecording':'calbindin_dist_bnt'}

# new:
#stabilityHalfWholeRecording
#angularStabilityWholeRecording
#calbindinDistanceWholeRecording

counter_cell_array = -1

for no in xrange(len(BNT_mat_dict['allStatistics'][0])):
    advance = True # label for advancing in the cell array ...

    try:
        print('\n\nAnalyzing {}/{} (File {})'.format(no+1,len(BNT_mat_dict['allStatistics'][0])," - ".join(BNT_mat.split("/")[-2:])))
        path_name = str(BNT_mat_dict['allStatistics'][0][no]['Path'][0])
        path_name = "/".join(path_name.split("\\"))
        if not os.path.isdir(path_name):
            # change to linux compatible format ...
            if "N:" in path_name:
                path_name = "\\mnt\\N" + "".join(path_name.split("N:"))
            elif "L:" in path_name:
                path_name = "\\mnt\\L" + "".join(path_name.split("L:"))
            else:
                print('Path not found! Running into error here ...')
            path_name =  "/".join(path_name.split('\\'))

        session_name = str(BNT_mat_dict['allStatistics'][0][no]['Basename'][0])
        tetrode_no = str(BNT_mat_dict['allStatistics'][0][no]['Tetrode'][0][0])
        cluster_no = str(BNT_mat_dict['allStatistics'][0][no]['Cell'][0][0])

        animal_id,n_drive_user,status,session_ts,analysis_ts = distill_params_root_table_BNT(path_name,session_name)

        print('Path {} | Session {} | Tetrode {} | Cell {}'.format(path_name,session_name,tetrode_no,cluster_no))
        print('Animal ID {} | N Drive user {} | Session timestamp {}'.format(animal_id,n_drive_user,session_ts))


        if not status:
            print('Session file not found. Skipping.')
            continue

        # Count meta tb entries
        meta_rows = count_meta_BNT(n_drive_user,animal_id,session_ts,params)
        #print('{} row(s) were found for this session in the db.'.format(meta_rows))
        # write entry to meta tb BNT
        if meta_rows == 0:
            id = to_meta_tb_BNT(analysis_ts,n_drive_user,animal_id,session_ts,params)
            print('New meta_tb entry created. Session timestamp: {}'.format(id[0].strftime("%B %d, %Y at %H-%M-%S")))

        # write to tetrodes_tb BNT:
        id = to_tetrodes_tb_BNT(tetrode_no,analysis_ts,path_name,n_drive_user,animal_id,session_ts,params)
        print('Entry tetrodes_tb_bnt: {}'.format(id))
        # write to sessions_tb BNT :
        id = to_sessions_tb_BNT(session_name, analysis_ts, n_drive_user, animal_id, session_ts,params)
        print('Entry sessions_tb_bnt: {}'.format(id))

        # Write out cluster:
        id = to_clusters_tb_BNT(cluster_no,analysis_ts,tetrode_no,path_name,n_drive_user,animal_id,session_ts,params)
        print('Entry clusters_tb_bnt: {}'.format(id))

        # Create scores dict to eventually enter into "BNT_tb_screen"

        all_scores = dict()
        # extract stats entries from allStatistics field
        for score in all_scores_labels:
            if score in BNT_mat_dict['allStatistics'][0][0].dtype.fields.keys():
                #print('Found {}'.format(score))
                try:
                    if len(BNT_mat_dict['allStatistics'][0][no][score]) > 1:
                        all_scores[all_scores_labels[score]] = BNT_mat_dict['allStatistics'][0][no][score]
                    elif len(BNT_mat_dict['allStatistics'][0][no][score][0]) > 1:
                        all_scores[all_scores_labels[score]] = BNT_mat_dict['allStatistics'][0][no][score][0]
                    else:
                        all_scores[all_scores_labels[score]] = BNT_mat_dict['allStatistics'][0][no][score][0][0]

                except IndexError as err:
                    print('Empty: {}'.format(score))
                    all_scores[all_scores_labels[score]] = np.nan
                    advance = False

            else:
                print('Not found: {}'.format(score))
                all_scores[all_scores_labels[score]] = np.nan


        if advance: counter_cell_array += 1

        # check if label in cell array (.mat) matches allStatistics label:
        tetrode_no_mat = int(str(BNT_mat_dict['cellsData'][counter_cell_array][0]['unitLabel'][0][0][0]).split("C")[0].split("T")[1])
        cluster_no_mat = int(str(BNT_mat_dict['cellsData'][counter_cell_array][0]['unitLabel'][0][0][0]).split("C")[1])
        if not (tetrode_no == tetrode_no_mat) and (cluster_no_mat == cluster_no): ('Labels do not match!');sys.exit()

        # Write out tracking and spiketimes cluster + tracking:
        # Spiketimes tracking:
        try:
            spiketimes_tracking_session_bnt = pd.DataFrame(BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['spikes2Pos'][0],
                                                           columns=['time','correct_x_inter','correct_y_inter'])
            spike_no_bnt = len(spiketimes_tracking_session_bnt)
            # write to db
            cluster_no_spiketimes_tracking_bnt = to_spiketimes_tracking_table_BNT(cluster_no,analysis_ts,tetrode_no,session_name,path_name,n_drive_user,animal_id,session_ts, spiketimes_tracking_session_bnt, spike_no_bnt, params)
            print('Entered into to_spiketimes_tracking_table_BNT: {}'.format(cluster_no_spiketimes_tracking_bnt))
        except KeyError:
            cluster_no_spiketimes_tracking_bnt = to_spiketimes_tracking_table_BNT(cluster_no,analysis_ts,tetrode_no,session_name,path_name,n_drive_user,animal_id,session_ts, [np.nan], np.nan,params)
            print('No spiketimes + tracking for this cluster found.')

        # Tracking:
        try:
            tracking_session_bnt = pd.DataFrame(BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['pos'][0],
                                                columns=['time','correct_x_inter','correct_y_inter'])
            session_name_tracking_bnt = to_tracking_table_bnt(analysis_ts,session_name,n_drive_user,animal_id,session_ts,
                                                              tracking_session_bnt,params)
            print('Entered into tracking_tb_BNT: {}'.format(session_name_tracking_bnt))
        except KeyError:
            session_name_tracking_bnt = to_tracking_table_bnt(analysis_ts,session_name,n_drive_user,animal_id,session_ts,
                                                              [np.nan],params)
            print('No tracking data found')


        # take care of the rest:
        try:
            masked_ratemap_bnt = BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['map'][0]['z'][0][0].T # needs to be transposed to match Klusta
        except KeyError:
            print('No ratemap entry found')
            masked_ratemap_bnt = [np.nan]

        try:
            autocorr_bnt = BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['aCorr'][0].T # needs to be transposed
        except KeyError:
            print('No spatial autocorrelation entry found')
            autocorr_bnt = [np.nan]

        try:
            occupancy_map_bnt = BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['map'][0]['time'][0][0].T # needs to be transposed
        except KeyError:
            print('No occupancy map entry found')
            occupancy_map_bnt = [np.nan]

        try:
            hist_angle_smooth_bnt = BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['hdValues'][0]
            hist_angle_smooth_bnt = np.append(hist_angle_smooth_bnt,hist_angle_smooth_bnt[0])
            bins_angle_center_bnt = np.radians(BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['hdAngles_deg'][0][0])

        except KeyError:
            print('No HD tuning values found')
            hist_angle_smooth_bnt = [np.nan]
            bins_angle_center_bnt = [np.nan]

        # Spikes cluster:
        try:
            spiketimes_cluster_bnt = BNT_mat_dict['cellsData'][counter_cell_array][0]['epochs'][0][0][0][0][0]['spikes'][0].squeeze()
        except KeyError:
            print('No spikes for this cluster found.')
            spiketimes_cluster_bnt = [np.nan]

        # Parameters:
        try:
            param_dict = {}
            for field in BNT_mat_dict['p'].dtype.fields.keys():
                param_dict[field] = BNT_mat_dict['p'][field][0][0].squeeze()
            params_bnt = pd.DataFrame.from_dict(param_dict, orient='index')
        except KeyError:
            print('No parameters found!')
            params_bnt =[np.nan]

        # delete entry from scores_...
        id = delete_BNT(cluster_no,tetrode_no,session_name,n_drive_user,animal_id,session_ts,params)
        print('Deleted from BNT_tb_screen [(Cluster, Tetrode, Session)]: {}'.format(id))

        id = to_BNT_table(cluster_no,analysis_ts,tetrode_no,session_name,path_name,n_drive_user,animal_id,session_ts,all_scores,
                          masked_ratemap_bnt,autocorr_bnt,occupancy_map_bnt,hist_angle_smooth_bnt,bins_angle_center_bnt,spiketimes_cluster_bnt,params_bnt,params)

        print('Entry BNT_tb_screen: {}'.format(id))
        if id == None:
            print('No entry could be created for  ')
            sys.exit()
    except IndexError as err:
        print('Index Error! continuing ...')
        continue