Bläddra i källkod

little refactoring to modules

asobolev 2 år sedan
förälder
incheckning
b7e47c20ee
7 ändrade filer med 239 tillägg och 19 borttagningar
  1. 0 0
      modules/performance.py
  2. 129 0
      modules/postprocessing.py
  3. 109 18
      performance.ipynb
  4. 0 0
      sorting/mcsHDF2dat.ipynb
  5. 0 0
      sorting/ss-klusta.ipynb
  6. 0 0
      sorting/ss-neurosuite.ipynb
  7. 1 1
      utils.ipynb

functions.py → modules/performance.py


+ 129 - 0
modules/postprocessing.py

@@ -0,0 +1,129 @@
+import os, json, h5py, time
+import numpy as np
+from scipy import signal
+
+
+def pack(session_path):
+    """
+    Pack independent tracking datasets into a single HDF5 file.
+    
+    File has the following structure:
+    
+    /raw
+        /positions      - raw positions from .csv
+        /events         - raw events from .csv
+        /sounds         - raw sounds from .csv
+    /processed
+        /timeline       - matrix of [time, x, y, speed] sampled at 50Hz, data is smoothed with gaussian kernel
+        /trial_idxs     - matrix of trial indices to timeline
+        /sound_idxs     - matrix of sound indices to timeline
+        
+    each dataset has an attribute 'headers' with the description of columns.
+    """
+    params_file = [x for x in os.listdir(session_path) if x.endswith('.json')][0]
+
+    with open(os.path.join(session_path, params_file)) as json_file:
+        parameters = json.load(json_file)
+    
+    h5name = os.path.join(session_path, '%s.h5' % params_file.split('.')[0])
+    with h5py.File(h5name, 'w') as f:  # overwrite mode
+
+
+        # -------- save raw data ------------
+        raw = f.create_group('raw')
+        raw.attrs['parameters'] = json.dumps(parameters)
+
+        for ds_name in ['positions', 'events', 'sounds']:
+            filename = os.path.join(session_path, '%s.csv' % ds_name)
+            with open(filename) as ff:
+                headers = ff.readline()
+            data = np.loadtxt(filename, delimiter=',', skiprows=1)
+
+            ds = raw.create_dataset(ds_name, data=data)
+            ds.attrs['headers'] = headers
+        
+        # TODO - saving contours! and get file names from the config
+#         with open(os.path.join(session_path, '%s.csv' % 'contours')) as ff:
+#             data = ff.readlines()
+        
+#         headers = data[0]   # skip headers line
+#         contours = [[(x.split(':')[0], x.split(':')[1]) for x in contour.split(',')] for contour in data[1:]]
+#         contours = [np.array(contour) for contour in contours]
+
+        # -------- save processed ------------
+        proc = f.create_group('processed')
+
+        positions = np.array(f['raw']['positions'])
+
+        # TODO remove outliers - position jumps over 20cm?
+        #diffs_x = np.diff(positions[:, 1])
+        #diffs_y = np.diff(positions[:, 2])
+        #dists = np.sqrt(diffs_x**2 + diffs_y**2)
+        #np.where(dists > 0.2 / pixel_size)[0]
+
+        # convert timeline to 100 Hz
+        time_freq = 100  # at 100Hz
+        s_start, s_end = positions[:, 0][0], positions[:, 0][-1]
+        times = np.linspace(s_start, s_end, int((s_end - s_start) * time_freq))
+        pos_at_freq = np.zeros((len(times), 3))
+
+        curr_idx = 0
+        for i, t in enumerate(times):
+            if curr_idx < len(positions) - 1 and \
+                np.abs(t - positions[:, 0][curr_idx]) > np.abs(t - positions[:, 0][curr_idx + 1]):
+                curr_idx += 1
+            pos_at_freq[i] = (t, positions[curr_idx][1], positions[curr_idx][2])
+
+        # make time from session start
+        pos_at_freq[:, 0] = pos_at_freq[:, 0] - pos_at_freq[0][0]
+
+        width = 50  # 100 points ~= 1 sec with at 100Hz
+        kernel = signal.gaussian(width, std=(width) / 7.2)
+
+        x_smooth = np.convolve(pos_at_freq[:, 1], kernel, 'same') / kernel.sum()
+        y_smooth = np.convolve(pos_at_freq[:, 2], kernel, 'same') / kernel.sum()
+
+        # speed
+        dx = np.sqrt(np.square(np.diff(x_smooth)) + np.square(np.diff(y_smooth)))
+        dt = np.diff(pos_at_freq[:, 0])
+        speed = np.concatenate([dx/dt, [dx[-1]/dt[-1]]])
+
+        timeline = proc.create_dataset('timeline', data=np.column_stack([pos_at_freq[:, 0], x_smooth, y_smooth, speed]))
+        timeline.attrs['headers'] = 'time, x, y, speed'
+
+        # save trials
+        events = np.array(f['raw']['events'])
+        events[:, 0] = events[:, 0] - s_start
+
+        t_count = len(np.unique(events[events[:, -1] != 0][:, -2]))
+        trials = np.zeros((t_count, 6))
+        for i in range(t_count):
+            t_start_idx = (np.abs(pos_at_freq[:, 0] - events[2*i][0])).argmin()
+            t_end_idx = (np.abs(pos_at_freq[:, 0] - events[2*i + 1][0])).argmin()
+            state = 0 if events[2*i + 1][-1] > 1 else 1
+
+            trials[i] = (t_start_idx, t_end_idx, events[2*i][1], events[2*i][2], events[2*i][3], state)
+
+        trial_idxs = proc.create_dataset('trial_idxs', data=trials)
+        trial_idxs.attrs['headers'] = 't_start_idx, t_end_idx, target_x, target_y, target_r, fail_or_success'
+
+        # save sounds
+        sounds = np.array(f['raw']['sounds'])
+        sounds[:, 0] = sounds[:, 0] - s_start
+
+        sound_idxs = np.zeros((len(sounds), 2))
+        left_idx = 0
+        delta = 10**5
+        for i in range(len(sounds)):
+            while left_idx < len(pos_at_freq) and \
+                    np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx]) < delta:
+                delta = np.abs(sounds[i][0] - pos_at_freq[:, 0][left_idx])
+                left_idx += 1
+
+            sound_idxs[i] = (left_idx, sounds[i][1])
+            delta = 10**5
+
+        sound_idxs = proc.create_dataset('sound_idxs', data=sound_idxs)
+        sound_idxs.attrs['headers'] = 'timeline_idx, sound_id'
+        
+    return h5name

+ 109 - 18
performance.ipynb

@@ -2,7 +2,7 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "id": "870d90fd",
    "metadata": {},
    "outputs": [],
@@ -27,7 +27,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "id": "d4832744",
    "metadata": {},
    "outputs": [],
@@ -38,14 +38,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 5,
    "id": "b10243bb",
    "metadata": {},
    "outputs": [],
    "source": [
     "# session paths\n",
     "source = '/home/sobolev/nevermind/Andrey/data'\n",
-    "source = 'Z:\\\\Andrey\\\\data'\n",
+    "#source = 'Z:\\\\Andrey\\\\data'\n",
     "animal = '008229'\n",
     "\n",
     "# single sessions\n",
@@ -57,24 +57,24 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 8,
    "id": "0e732c48",
    "metadata": {},
    "outputs": [
     {
      "data": {
       "text/plain": [
-       "['008229_hippoSIT_2022-05-09_15-55-58',\n",
-       " '008229_hippoSIT_2022-05-09_21-01-15',\n",
-       " '008229_hippoSIT_2022-05-10_09-02-35',\n",
-       " '008229_hippoSIT_2022-05-10_14-01-46',\n",
-       " '008229_hippoSIT_2022-05-11_13-55-55',\n",
-       " '008229_hippoSIT_2022-05-12_15-36-57',\n",
-       " '008229_hippoSIT_2022-05-13_09-48-00',\n",
-       " '008229_hippoSIT_2022-05-13_15-17-12']"
+       "['008229_hippoSIT_2022-06-01_11-08-51',\n",
+       " '008229_hippoSIT_2022-06-01_16-09-26',\n",
+       " '008229_hippoSIT_2022-06-02_15-26-01',\n",
+       " '008229_hippoSIT_2022-06-02_20-42-08',\n",
+       " '008229_hippoSIT_2022-06-03_09-18-49',\n",
+       " '008229_hippoSIT_2022-06-08_10-57-40',\n",
+       " '008229_hippoSIT_2022-06-08_15-52-52',\n",
+       " '008229_hippoSIT_2022-06-09_09-25-49']"
       ]
      },
-     "execution_count": 11,
+     "execution_count": 8,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -555,7 +555,9 @@
    "cell_type": "code",
    "execution_count": 49,
    "id": "4a3898f0",
-   "metadata": {},
+   "metadata": {
+    "scrolled": true
+   },
    "outputs": [
     {
      "data": {
@@ -604,7 +606,96 @@
   {
    "cell_type": "code",
    "execution_count": null,
-   "id": "424eb706",
+   "id": "dcd46d5b",
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "id": "f506ea51",
+   "metadata": {},
+   "source": [
+    "## Miltiple island performance"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "id": "9bf8003f",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "['008229_hippoSIT_2022-06-09_14-15-49']"
+      ]
+     },
+     "execution_count": 1,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "# session paths\n",
+    "source = '/home/sobolev/nevermind/Andrey/data'\n",
+    "#source = 'Z:\\\\Andrey\\\\data'\n",
+    "animal = '008229'\n",
+    "\n",
+    "# single sessions\n",
+    "sessions = ['008229_hippoSIT_2022-06-09_14-15-49']\n",
+    "\n",
+    "# or list of sessions\n",
+    "#sessions = get_sessions_list(os.path.join(source, animal), animal)[-1:]\n",
+    "sessions"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "id": "63a0928f",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "session = sessions[0]\n",
+    "s_path = os.path.join(source, animal, session)\n",
+    "h5name = os.path.join(s_path, session + '.h5')\n",
+    "jsname = os.path.join(s_path, session + '.json')\n",
+    "\n",
+    "# loading session configuration\n",
+    "with open(jsname, 'r') as f:\n",
+    "    cfg = json.load(f)\n",
+    "\n",
+    "with h5py.File(h5name, 'r') as f:\n",
+    "    tl = np.array(f['processed']['timeline'])  # time, X, Y, speed\n",
+    "    trial_idxs = np.array(f['processed']['trial_idxs']) # idx start, idx end, X, Y, R, trial result (idx to tl)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "id": "a8f36fe3",
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([60.00033188, -0.2735146 ,  0.06554206,  0.10341734])"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "tl[int(trial_idxs[0][1])]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "8c9ee1ea",
    "metadata": {},
    "outputs": [],
    "source": []
@@ -612,7 +703,7 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python 3",
+   "display_name": "Python 3 (ipykernel)",
    "language": "python",
    "name": "python3"
   },
@@ -626,7 +717,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.8"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,

pipeline/mcsHDF2dat.ipynb → sorting/mcsHDF2dat.ipynb


pipeline/ss-klusta.ipynb → sorting/ss-klusta.ipynb


pipeline/ss-neurosuite.ipynb → sorting/ss-neurosuite.ipynb


+ 1 - 1
utils.ipynb

@@ -204,7 +204,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.8.5"
+   "version": "3.8.10"
   }
  },
  "nbformat": 4,