Untitled

import os
import mne
import glob
import numpy as np
import pandas as pd
from scipy.io import wavfile
from mne.time_frequency import tfr_array_morlet
from scipy.signal import resample
import matplotlib.pyplot as plt

from utils import text_order, add_phonetic_info
mne.utils.set_log_level(None)

data_path = '/home/jrk/data/cnrs/masc/example_subject/raw'

ID2TEXT = {0: 'lw1',
           1: 'cable_spool_fort',
           2: 'easy_money',
           3: 'The_Black_Willow'}
texts = {'The_Black_Willow': 6 * 2,
         'cable_spool_fort': 3 * 2,
         'easy_money': 4 * 2,
         'lw1': 2 * 2}

subject = 'A0253'
tmin = -2.
tmax = 2.
hpf = None
lpf = 60.
time_decim = 40

# loop through each text and wav file
logfile = glob.glob("%s/%s*.log" % (data_path, subject))
assert len(logfile) == 1
logfile = logfile[0]
text_orders = text_order(logfile)

text_number = 0

# get story order for this subject to figure out the text name
text_name = ID2TEXT[int(text_orders[text_number])]

# get speech timing information
info_dir = './timing_info/%s-json_timing_wPhonemes.csv'
info_dir = info_dir % text_name
annot = pd.read_csv(info_dir)

# add phoneme features (just for the example)
annot = add_phonetic_info(annot)

# only keep events where p2fa successfully found the timing in
# the wav file and it is a normal sentence trial
not_nan_idx = pd.notnull(annot['start'].values)
annot = annot[not_nan_idx].query("start != 0")

# load data
raw_fname = glob.glob("%s/%s_MASC*_%s_*[0-9].con" % (
    data_path, subject, text_number + 1))

assert len(raw_fname) == 1
raw_fname = raw_fname[0]
raw = mne.io.read_raw_kit(raw_fname, preload=True, slope='+')

# get triggers sent to MEG (onset of each wav file)
raw_events = mne.find_events(raw, min_duration=0.002)

# only keep triggers from the audio -- not button responses
raw_events = raw_events[raw_events[:, 2] > 8]

raw.pick_types(meg=True)
raw.filter(hpf, lpf)

# fix the word start timing for each wav file
for wav_number in range(texts[text_name]):
    wav_rows_location = annot.query("wav_file == @wav_number")
    wav_rows_location = wav_rows_location.index

    # change units to ms in the annot, and add the
    # baseline trigger timing from the MEG
    annot.loc[wav_rows_location, ['start', 'stop']] = \
        (annot.loc[wav_rows_location, ['start', 'stop']] * raw.info['sfreq']) + raw_events[wav_number][0]  # noqa

# Compute audio poer
raw_volume = np.zeros(raw.n_times)
for wav_number in range(texts[text_name]):
    print(wav_number)
    wav_fname = os.path.join(data_path, '../../wav_files',
                             '%s_%i.wav' % (text_name, wav_number))
    fs, data = wavfile.read(wav_fname)

    # wavelet decomposition
    tfr = tfr_array_morlet(data[None, None], fs,
                           np.logspace(2, 3, 100),
                           decim=10, output='power')
    # volume
    volume = tfr[0][0].sum(0)

    # resample to meg frquency
    volume_decim = resample(volume, int(
        10. * len(volume) / fs * raw.info['sfreq']))

    # add at correct time
    start = raw_events[wav_number, 0]
    raw_volume[start:(start+len(volume_decim))] = volume_decim

# Check alignment
voiced = annot.loc[annot.phonation=='v', 'start'].values.astype(int)
sounds = np.array([raw_volume[voiced+t] for t in range(-500, 2000)])

# plot mean volume with regard to voicing onset
plt.plot(np.mean(sounds, 1))
plt.axvline(520)

# Correlation between meg and volume
Y = raw._data
Y -= Y.mean(1, keepdims=True)
Y -= Y.std(1, keepdims=True)

coef = list()
for t in range(0, 500, 10):
    X = np.r_[np.diff(np.roll(raw_volume, t)), 0]
    coef.append(X[None] @ Y.T)
coef = np.squeeze(coef)