|
@@ -0,0 +1,129 @@
|
|
|
+import os, json, h5py, time
|
|
|
+import numpy as np
|
|
|
+from scipy import signal
|
|
|
+
|
|
|
+
|
|
|
+def pack(session_path):
|
|
|
+ """
|
|
|
+ Pack independent tracking datasets into a single HDF5 file.
|
|
|
+
|
|
|
+ File has the following structure:
|
|
|
+
|
|
|
+ /raw
|
|
|
+ /positions - raw positions from .csv
|
|
|
+ /events - raw events from .csv
|
|
|
+ /sounds - raw sounds from .csv
|
|
|
+ /processed
|
|
|
+ /timeline - matrix of [time, x, y, speed] sampled at 50Hz, data is smoothed with gaussian kernel
|
|
|
+ /trial_idxs - matrix of trial indices to timeline
|
|
|
+ /sound_idxs - matrix of sound indices to timeline
|
|
|
+
|
|
|
+ each dataset has an attribute 'headers' with the description of columns.
|
|
|
+ """
|
|
|
+ params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]
|
|
|
+
|
|
|
+ with open(os.path.join(session_path, params_file)) as json_file:
|
|
|
+ parameters = json.load(json_file)
|
|
|
+
|
|
|
+ h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])
|
|
|
+ with h5py.File(h5name, 'w') as f: # overwrite mode
|
|
|
+
|
|
|
+
|
|
|
+ # -------- save raw data ------------
|
|
|
+ raw = f.create_group('raw')
|
|
|
+ raw.attrs['parameters'] = json.dumps(parameters)
|
|
|
+
|
|
|
+ for ds_name in ['positions', 'events', 'sounds']:
|
|
|
+ filename = os.path.join(session_path, '%s.csv' % ds_name)
|
|
|
+ with open(filename) as ff:
|
|
|
+ headers = ff.readline()
|
|
|
+ data = np.loadtxt(filename, delimiter=',', skiprows=1)
|
|
|
+
|
|
|
+ ds = raw.create_dataset(ds_name, data=data)
|
|
|
+ ds.attrs['headers'] = headers
|
|
|
+
|
|
|
+ # TODO - saving contours! and get file names from the config
|
|
|
+# with open(os.path.join(session_path, '%s.csv' % 'contours')) as ff:
|
|
|
+# data = ff.readlines()
|
|
|
+
|
|
|
+# headers = data[0] # skip headers line
|
|
|
+# contours = [[(x.split(':')[0], x.split(':')[1]) for x in contour.split(',')] for contour in data[1:]]
|
|
|
+# contours = [np.array(contour) for contour in contours]
|
|
|
+
|
|
|
+ # -------- save processed ------------
|
|
|
+ proc = f.create_group('processed')
|
|
|
+
|
|
|
+ positions = np.array(f['raw']['positions'])
|
|
|
+
|
|
|
+ # TODO remove outliers - position jumps over 20cm?
|
|
|
+ #diffs_x = np.diff(positions[:, 1])
|
|
|
+ #diffs_y = np.diff(positions[:, 2])
|
|
|
+ #dists = np.sqrt(diffs_x**2 + diffs_y**2)
|
|
|
+ #np.where(dists > 0.2 / pixel_size)[0]
|
|
|
+
|
|
|
+ # convert timeline to 100 Hz
|
|
|
+ time_freq = 100 # at 100Hz
|
|
|
+ s_start, s_end = positions[:, 0][0], positions[:, 0][-1]
|
|
|
+ times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))
|
|
|
+ pos_at_freq = np.zeros((len(times), 3))
|
|
|
+
|
|
|
+ curr_idx = 0
|
|
|
+ for i, t in enumerate(times):
|
|
|
+ if curr_idx < len(positions) - 1 and \
|
|
|
+ np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):
|
|
|
+ curr_idx += 1
|
|
|
+ pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])
|
|
|
+
|
|
|
+ # make time from session start
|
|
|
+ pos_at_freq[:, 0] = pos_at_freq[:, 0] - pos_at_freq[0][0]
|
|
|
+
|
|
|
+ width = 50 # 100 points ~= 1 sec with at 100Hz
|
|
|
+ kernel = signal.gaussian(width, std=(width) / 7.2)
|
|
|
+
|
|
|
+ x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()
|
|
|
+ y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()
|
|
|
+
|
|
|
+ # speed
|
|
|
+ dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))
|
|
|
+ dt = np.diff(pos_at_freq[:, 0])
|
|
|
+ speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])
|
|
|
+
|
|
|
+ timeline = proc.create_dataset('timeline', data=np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed]))
|
|
|
+ timeline.attrs['headers'] = 'time, x, y, speed'
|
|
|
+
|
|
|
+ # save trials
|
|
|
+ events = np.array(f['raw']['events'])
|
|
|
+ events[:, 0] = events[:, 0] - s_start
|
|
|
+
|
|
|
+ t_count = len(np.unique(events[events[:, -1] != 0][:, -2]))
|
|
|
+ trials = np.zeros((t_count, 6))
|
|
|
+ for i in range(t_count):
|
|
|
+ t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()
|
|
|
+ t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()
|
|
|
+ state = 0 if events[2*i + 1][-1] > 1 else 1
|
|
|
+
|
|
|
+ trials[i] = (t_start_idx, t_end_idx, events[2*i][1], events[2*i][2], events[2*i][3], state)
|
|
|
+
|
|
|
+ trial_idxs = proc.create_dataset('trial_idxs', data=trials)
|
|
|
+ trial_idxs.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'
|
|
|
+
|
|
|
+ # save sounds
|
|
|
+ sounds = np.array(f['raw']['sounds'])
|
|
|
+ sounds[:, 0] = sounds[:, 0] - s_start
|
|
|
+
|
|
|
+ sound_idxs = np.zeros((len(sounds), 2))
|
|
|
+ left_idx = 0
|
|
|
+ delta = 10**5
|
|
|
+ for i in range(len(sounds)):
|
|
|
+ while left_idx < len(pos_at_freq) and \
|
|
|
+ np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:
|
|
|
+ delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])
|
|
|
+ left_idx += 1
|
|
|
+
|
|
|
+ sound_idxs[i] = (left_idx, sounds[i][1])
|
|
|
+ delta = 10**5
|
|
|
+
|
|
|
+ sound_idxs = proc.create_dataset('sound_idxs', data=sound_idxs)
|
|
|
+ sound_idxs.attrs['headers'] = 'timeline_idx, sound_id'
|
|
|
+
|
|
|
+ return h5name
|