import argparse import numpy as np import pandas as pd from tqdm import tqdm import statsmodels.api as sm from statsmodels.formula.api import ols from statsmodels.stats.anova import anova_lm #from pygam import PoissonGAM, LinearGAM, GammaGAM #from pygam import l as linear_term #from pygam import s as spline_term from parameters import DATAPATH, MINRATE, NPUPILAREABINS, NGAMSPLINES from util import load_data, unbiased_variance, times_to_counts, _resample_data, filter_units, sort_data def arousal_rate_matrix(spk_rates, pupil_area, **kwargs): """ Get a matrix of firing rate histograms for different levels of arousal. """ area_bins, binned_area, binned_rates = get_binned_rates(spk_rates, pupil_area, **kwargs) rate_bins = np.linspace(spk_rates.min(), spk_rates.max() + np.finfo('float').eps, kwargs['nbins'] + 1) # Fill pupil area rate matrix rate_mat = np.zeros((len(rate_bins) - 1, len(area_bins) - 1)) for bin_i, rates in enumerate(binned_rates): rate_hist = np.histogram(rates, bins=rate_bins, density=True)[0] / (len(rate_bins) - 1) rate_mat[:, bin_i] += rate_hist return rate_bins, area_bins, rate_mat if __name__ == "__main__": parser = argparse.ArgumentParser() parser.add_argument('e_name') parser.add_argument('spk_type') args = parser.parse_args() df_pupil = load_data('pupil', [args.e_name]) df_spikes = load_data('spikes', [args.e_name]) df_spikes = filter_units(df_spikes, MINRATE) df = pd.merge(df_pupil, df_spikes, on=['m', 's', 'e']) seriess = [] for idx, row in tqdm(df.iterrows(), total=len(df)): data = { 'm': row['m'], 's': row['s'], 'e': row['e'], 'u': row['u'], } if args.spk_type == 'ratio': row = times_to_counts(row, ['tonicspk', 'burstspk'], t0='pupil_tpts') row['ratio_counts'] = (row['burstspk_counts'] + np.finfo('float').eps) / row['tonicspk_counts'] else: row = times_to_counts(row, [args.spk_type], t0='pupil_tpts') row = _resample_data(row, ['pupil_area'], t0='pupil_tpts') # Get pupil area gradient row['pupil_gradient'] = np.gradient(row['pupil_area']) spk_counts = row['{}_counts'.format(args.spk_type)] data['rate_max'] = spk_counts.max() data['rate_min'] = spk_counts.min() for var in ['area', 'gradient']: regressor = row['pupil_{}'.format(var)] data['{}_max'.format(var)] = regressor.max() data['{}_min'.format(var)] = regressor.min() if var == 'area': # Bin rates bin_min, bin_max = np.percentile(regressor, [2.5, 97.5]) #bin_min, bin_max = regressor.min(), regressor.max() bins = np.linspace(bin_min, bin_max, NPUPILAREABINS + 1) elif var == 'gradient': bin_max = np.percentile(np.abs(regressor), 97.5) bins_neg = np.linspace(-1 * bin_max, 0, NPUPILAREABINS + 1) bins_pos = np.linspace(0, bin_max, NPUPILAREABINS + 1) bins = np.concatenate([bins_neg[:-1], bins_pos]) binned_counts = sort_data(spk_counts, regressor, bins=bins) data[f'{var}_means'] = np.array([counts.mean() for counts in binned_counts]) # ANOVA (linear OLS with categorical pupil size) digitized_regressor = np.digitize(regressor, bins=bins).clip(1, NPUPILAREABINS) df_anova = pd.DataFrame(np.column_stack([spk_counts, digitized_regressor]), columns=['count', 'bin']) anova_model = ols('count ~ C(bin)', data=df_anova).fit() # use formula to specify model data['{}_p'.format(var)] = anova_model.f_pvalue # Linear model (linear OLS with sorted categorical pupil size) mean_counts = np.array([counts.mean() for counts in binned_counts if len(counts) > 0]) mean_counts.sort() y = mean_counts X = sm.add_constant(np.arange(len(mean_counts))) # use design matrix to specify model linear_model = sm.OLS(y, X).fit() intercept, slope = linear_model.params data['{}_slope'.format(var)] = slope """ # Poisson GAM model for rates gam_model = PoissonGAM(spline_term(0, n_splines=NGAMSPLINES), fit_intercept=True) # Search over parameter controlling smoothness (lambda) for best fit gam_model.gridsearch(regressor[..., np.newaxis], spk_counts, keep_best=True, progress=False) data['{}_fit'.format(var)] = gam_model.predict(gam_model.generate_X_grid(term=0)) data['{}_dev'.format(var)] = gam_model.score(regressor[..., np.newaxis], spk_counts) """ # Get rate variance in pupil area bins binned_counts = sort_data(spk_counts, regressor, bins=bins) data['{}_var'.format(var)] = np.array([unbiased_variance(rates) for rates in binned_counts]) seriess.append(pd.Series(data=data)) df_tuning = pd.DataFrame(seriess) filename = 'sizetuning_{}_{}.pkl'.format(args.e_name, args.spk_type) df_tuning.to_pickle(DATAPATH + filename)