2 år sedan · b7e47c20ee
--- a/modules/performance.py
+++ b/modules/performance.py
--- a/modules/postprocessing.py
+++ b/modules/postprocessing.py
@@ -0,0 +1,129 @@
 
				+import os, json, h5py, time
			
 
				+import numpy as np
			
 
				+from scipy import signal
			
 
				+
			
 
				+
			
 
				+def pack(session_path):
			
 
				+    """
			
 
				+    Pack independent tracking datasets into a single HDF5 file.
			
 
				+    
			
 
				+    File has the following structure:
			
 
				+    
			
 
				+    /raw
			
 
				+        /positions      - raw positions from .csv
			
 
				+        /events         - raw events from .csv
			
 
				+        /sounds         - raw sounds from .csv
			
 
				+    /processed
			
 
				+        /timeline       - matrix of [time, x, y, speed] sampled at 50Hz, data is smoothed with gaussian kernel
			
 
				+        /trial_idxs     - matrix of trial indices to timeline
			
 
				+        /sound_idxs     - matrix of sound indices to timeline
			
 
				+        
			
 
				+    each dataset has an attribute 'headers' with the description of columns.
			
 
				+    """
			
 
				+    params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]
			
 
				+
			
 
				+    with open(os.path.join(session_path, params_file)) as json_file:
			
 
				+        parameters = json.load(json_file)
			
 
				+    
			
 
				+    h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])
			
 
				+    with h5py.File(h5name, 'w') as f:  # overwrite mode
			
 
				+
			
 
				+
			
 
				+        # -------- save raw data ------------
			
 
				+        raw = f.create_group('raw')
			
 
				+        raw.attrs['parameters'] = json.dumps(parameters)
			
 
				+
			
 
				+        for ds_name in ['positions', 'events', 'sounds']:
			
 
				+            filename = os.path.join(session_path, '%s.csv' % ds_name)
			
 
				+            with open(filename) as ff:
			
 
				+                headers = ff.readline()
			
 
				+            data = np.loadtxt(filename, delimiter=',', skiprows=1)
			
 
				+
			
 
				+            ds = raw.create_dataset(ds_name, data=data)
			
 
				+            ds.attrs['headers'] = headers
			
 
				+        
			
 
				+        # TODO - saving contours! and get file names from the config
			
 
				+#         with open(os.path.join(session_path, '%s.csv' % 'contours')) as ff:
			
 
				+#             data = ff.readlines()
			
 
				+        
			
 
				+#         headers = data[0]   # skip headers line
			
 
				+#         contours = [[(x.split(':')[0], x.split(':')[1]) for x in contour.split(',')] for contour in data[1:]]
			
 
				+#         contours = [np.array(contour) for contour in contours]
			
 
				+
			
 
				+        # -------- save processed ------------
			
 
				+        proc = f.create_group('processed')
			
 
				+
			
 
				+        positions = np.array(f['raw']['positions'])
			
 
				+
			
 
				+        # TODO remove outliers - position jumps over 20cm?
			
 
				+        #diffs_x = np.diff(positions[:, 1])
			
 
				+        #diffs_y = np.diff(positions[:, 2])
			
 
				+        #dists = np.sqrt(diffs_x**2 + diffs_y**2)
			
 
				+        #np.where(dists > 0.2 / pixel_size)[0]
			
 
				+
			
 
				+        # convert timeline to 100 Hz
			
 
				+        time_freq = 100  # at 100Hz
			
 
				+        s_start, s_end = positions[:, 0][0], positions[:, 0][-1]
			
 
				+        times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))
			
 
				+        pos_at_freq = np.zeros((len(times), 3))
			
 
				+
			
 
				+        curr_idx = 0
			
 
				+        for i, t in enumerate(times):
			
 
				+            if curr_idx < len(positions) - 1 and \
			
 
				+                np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):
			
 
				+                curr_idx += 1
			
 
				+            pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])
			
 
				+
			
 
				+        # make time from session start
			
 
				+        pos_at_freq[:, 0] = pos_at_freq[:, 0] - pos_at_freq[0][0]
			
 
				+
			
 
				+        width = 50  # 100 points ~= 1 sec with at 100Hz
			
 
				+        kernel = signal.gaussian(width, std=(width) / 7.2)
			
 
				+
			
 
				+        x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()
			
 
				+        y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()
			
 
				+
			
 
				+        # speed
			
 
				+        dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))
			
 
				+        dt = np.diff(pos_at_freq[:, 0])
			
 
				+        speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])
			
 
				+
			
 
				+        timeline = proc.create_dataset('timeline', data=np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed]))
			
 
				+        timeline.attrs['headers'] = 'time, x, y, speed'
			
 
				+
			
 
				+        # save trials
			
 
				+        events = np.array(f['raw']['events'])
			
 
				+        events[:, 0] = events[:, 0] - s_start
			
 
				+
			
 
				+        t_count = len(np.unique(events[events[:, -1] != 0][:, -2]))
			
 
				+        trials = np.zeros((t_count, 6))
			
 
				+        for i in range(t_count):
			
 
				+            t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()
			
 
				+            t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()
			
 
				+            state = 0 if events[2*i + 1][-1] > 1 else 1
			
 
				+
			
 
				+            trials[i] = (t_start_idx, t_end_idx, events[2*i][1], events[2*i][2], events[2*i][3], state)
			
 
				+
			
 
				+        trial_idxs = proc.create_dataset('trial_idxs', data=trials)
			
 
				+        trial_idxs.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'
			
 
				+
			
 
				+        # save sounds
			
 
				+        sounds = np.array(f['raw']['sounds'])
			
 
				+        sounds[:, 0] = sounds[:, 0] - s_start
			
 
				+
			
 
				+        sound_idxs = np.zeros((len(sounds), 2))
			
 
				+        left_idx = 0
			
 
				+        delta = 10**5
			
 
				+        for i in range(len(sounds)):
			
 
				+            while left_idx < len(pos_at_freq) and \
			
 
				+                    np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:
			
 
				+                delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])
			
 
				+                left_idx += 1
			
 
				+
			
 
				+            sound_idxs[i] = (left_idx, sounds[i][1])
			
 
				+            delta = 10**5
			
 
				+
			
 
				+        sound_idxs = proc.create_dataset('sound_idxs', data=sound_idxs)
			
 
				+        sound_idxs.attrs['headers'] = 'timeline_idx, sound_id'
			
 
				+        
			
 
				+    return h5name
			
--- a/performance.ipynb
+++ b/performance.ipynb
@@ -2,7 +2,7 @@
 
				  "cells": [
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				+   "execution_count": 3,
			
 
				    "id": "870d90fd",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -27,7 +27,7 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				+   "execution_count": 4,
			
 
				    "id": "d4832744",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
@@ -38,14 +38,14 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				+   "execution_count": 5,
			
 
				    "id": "b10243bb",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": [
			
 
				     "# session paths\n",
			
 
				     "source = '/home/sobolev/nevermind/Andrey/data'\n",
			
 
				-    "source = 'Z:\\\\Andrey\\\\data'\n",
			
 
				+    "#source = 'Z:\\\\Andrey\\\\data'\n",
			
 
				     "animal = '008229'\n",
			
 
				     "\n",
			
 
				     "# single sessions\n",
			
@@ -57,24 +57,24 @@
 
				   },
			
 
				   {
			
 
				    "cell_type": "code",
			
 
				-   "execution_count": 11,
			
 
				+   "execution_count": 8,
			
 
				    "id": "0e732c48",
			
 
				    "metadata": {},
			
 
				    "outputs": [
			
 
				     {
			
 
				      "data": {
			
 
				       "text/plain": [
			
 
				-       "['008229_hippoSIT_2022-05-09_15-55-58',\n",
			
 
				-       " '008229_hippoSIT_2022-05-09_21-01-15',\n",
			
 
				-       " '008229_hippoSIT_2022-05-10_09-02-35',\n",
			
 
				-       " '008229_hippoSIT_2022-05-10_14-01-46',\n",
			
 
				-       " '008229_hippoSIT_2022-05-11_13-55-55',\n",
			
 
				-       " '008229_hippoSIT_2022-05-12_15-36-57',\n",
			
 
				-       " '008229_hippoSIT_2022-05-13_09-48-00',\n",
			
 
				-       " '008229_hippoSIT_2022-05-13_15-17-12']"
			
 
				+       "['008229_hippoSIT_2022-06-01_11-08-51',\n",
			
 
				+       " '008229_hippoSIT_2022-06-01_16-09-26',\n",
			
 
				+       " '008229_hippoSIT_2022-06-02_15-26-01',\n",
			
 
				+       " '008229_hippoSIT_2022-06-02_20-42-08',\n",
			
 
				+       " '008229_hippoSIT_2022-06-03_09-18-49',\n",
			
 
				+       " '008229_hippoSIT_2022-06-08_10-57-40',\n",
			
 
				+       " '008229_hippoSIT_2022-06-08_15-52-52',\n",
			
 
				+       " '008229_hippoSIT_2022-06-09_09-25-49']"
			
 
				       ]
			
 
				      },
			
 
				-     "execution_count": 11,
			
 
				+     "execution_count": 8,
			
 
				      "metadata": {},
			
 
				      "output_type": "execute_result"
			
 
				     }
			
@@ -555,7 +555,9 @@
 
				    "cell_type": "code",
			
 
				    "execution_count": 49,
			
 
				    "id": "4a3898f0",
			
 
				-   "metadata": {},
			
 
				+   "metadata": {
			
 
				+    "scrolled": true
			
 
				+   },
			
 
				    "outputs": [
			
 
				     {
			
 
				      "data": {
			
@@ -604,7 +606,96 @@
 
				   {
			
 
				    "cell_type": "code",
			
 
				    "execution_count": null,
			
 
				-   "id": "424eb706",
			
 
				+   "id": "dcd46d5b",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "markdown",
			
 
				+   "id": "f506ea51",
			
 
				+   "metadata": {},
			
 
				+   "source": [
			
 
				+    "## Miltiple island performance"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "id": "9bf8003f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "['008229_hippoSIT_2022-06-09_14-15-49']"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 1,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "# session paths\n",
			
 
				+    "source = '/home/sobolev/nevermind/Andrey/data'\n",
			
 
				+    "#source = 'Z:\\\\Andrey\\\\data'\n",
			
 
				+    "animal = '008229'\n",
			
 
				+    "\n",
			
 
				+    "# single sessions\n",
			
 
				+    "sessions = ['008229_hippoSIT_2022-06-09_14-15-49']\n",
			
 
				+    "\n",
			
 
				+    "# or list of sessions\n",
			
 
				+    "#sessions = get_sessions_list(os.path.join(source, animal), animal)[-1:]\n",
			
 
				+    "sessions"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 5,
			
 
				+   "id": "63a0928f",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "session = sessions[0]\n",
			
 
				+    "s_path = os.path.join(source, animal, session)\n",
			
 
				+    "h5name = os.path.join(s_path, session + '.h5')\n",
			
 
				+    "jsname = os.path.join(s_path, session + '.json')\n",
			
 
				+    "\n",
			
 
				+    "# loading session configuration\n",
			
 
				+    "with open(jsname, 'r') as f:\n",
			
 
				+    "    cfg = json.load(f)\n",
			
 
				+    "\n",
			
 
				+    "with h5py.File(h5name, 'r') as f:\n",
			
 
				+    "    tl = np.array(f['processed']['timeline'])  # time, X, Y, speed\n",
			
 
				+    "    trial_idxs = np.array(f['processed']['trial_idxs']) # idx start, idx end, X, Y, R, trial result (idx to tl)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 12,
			
 
				+   "id": "a8f36fe3",
			
 
				+   "metadata": {},
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "array([60.00033188, -0.2735146 ,  0.06554206,  0.10341734])"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 12,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "tl[int(trial_idxs[0][1])]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "id": "8c9ee1ea",
			
 
				    "metadata": {},
			
 
				    "outputs": [],
			
 
				    "source": []
			
@@ -612,7 +703,7 @@
 
				  ],
			
 
				  "metadata": {
			
 
				   "kernelspec": {
			
 
				-   "display_name": "Python 3",
			
 
				+   "display_name": "Python 3 (ipykernel)",
			
 
				    "language": "python",
			
 
				    "name": "python3"
			
 
				   },
			
@@ -626,7 +717,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.8.8"
			
 
				+   "version": "3.8.10"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,
			
--- a/pipeline/mcsHDF2dat.ipynb
+++ b/pipeline/mcsHDF2dat.ipynb
--- a/pipeline/ss-klusta.ipynb
+++ b/pipeline/ss-klusta.ipynb
--- a/pipeline/ss-neurosuite.ipynb
+++ b/pipeline/ss-neurosuite.ipynb
--- a/utils.ipynb
+++ b/utils.ipynb
@@ -204,7 +204,7 @@
 
				    "name": "python",
			
 
				    "nbconvert_exporter": "python",
			
 
				    "pygments_lexer": "ipython3",
			
 
				-   "version": "3.8.5"
			
 
				+   "version": "3.8.10"
			
 
				   }
			
 
				  },
			
 
				  "nbformat": 4,