pandas_bridge.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618
  1. # -*- coding: utf-8 -*-
  2. """
  3. Bridge to the pandas library.
  4. :copyright: Copyright 2014-2016 by the Elephant team, see `doc/authors.rst`.
  5. :license: Modified BSD, see LICENSE.txt for details.
  6. """
  7. from __future__ import division, print_function, unicode_literals
  8. import numpy as np
  9. import pandas as pd
  10. import warnings
  11. import quantities as pq
  12. from elephant.neo_tools import (extract_neo_attributes, get_all_epochs,
  13. get_all_events, get_all_spiketrains)
  14. warnings.simplefilter('once', DeprecationWarning)
  15. warnings.warn("pandas_bridge module will be removed in Elephant v0.8.x",
  16. DeprecationWarning)
  17. def _multiindex_from_dict(inds):
  18. """Given a dictionary, return a `pandas.MultiIndex`.
  19. Parameters
  20. ----------
  21. inds : dict
  22. A dictionary where the keys are annotations or attribute names and
  23. the values are the corresponding annotation or attribute value.
  24. Returns
  25. -------
  26. pandas MultiIndex
  27. """
  28. names, indexes = zip(*sorted(inds.items()))
  29. return pd.MultiIndex.from_tuples([indexes], names=names)
  30. def _sort_inds(obj, axis=0):
  31. """Put the indexes and index levels of a pandas object in sorted order.
  32. Paramters
  33. ---------
  34. obj : pandas Series, DataFrame, Panel, or Panel4D
  35. The object whose indexes should be sorted.
  36. axis : int, list, optional, 'all'
  37. The axis whose indexes should be sorted. Default is 0.
  38. Can also be a list of indexes, in which case all of those axes
  39. are sorted. If 'all', sort all indexes.
  40. Returns
  41. -------
  42. pandas Series, DataFrame, Panel, or Panel4D
  43. A copy of the object with indexes sorted.
  44. Indexes are sorted in-place.
  45. """
  46. if axis == 'all':
  47. return _sort_inds(obj, axis=range(obj.ndim))
  48. if hasattr(axis, '__iter__'):
  49. for iax in axis:
  50. obj = _sort_inds(obj, iax)
  51. return obj
  52. obj = obj.reorder_levels(sorted(obj.axes[axis].names), axis=axis)
  53. return obj.sort_index(level=0, axis=axis, sort_remaining=True)
  54. def _extract_neo_attrs_safe(obj, parents=True, child_first=True):
  55. """Given a neo object, return a dictionary of attributes and annotations.
  56. This is done in a manner that is safe for `pandas` indexes.
  57. Parameters
  58. ----------
  59. obj : neo object
  60. parents : bool, optional
  61. Also include attributes and annotations from parent neo
  62. objects (if any).
  63. child_first : bool, optional
  64. If True (default True), values of child attributes are used
  65. over parent attributes in the event of a name conflict.
  66. If False, parent attributes are used.
  67. This parameter does nothing if `parents` is False.
  68. Returns
  69. -------
  70. dict
  71. A dictionary where the keys are annotations or attribute names and
  72. the values are the corresponding annotation or attribute value.
  73. """
  74. res = extract_neo_attributes(obj, skip_array=True, skip_none=True,
  75. parents=parents, child_first=child_first)
  76. for key, value in res.items():
  77. res[key] = _convert_value_safe(value)
  78. key2 = _convert_value_safe(key)
  79. if key2 is not key:
  80. res[key2] = res.pop(key)
  81. return res
  82. def _convert_value_safe(value):
  83. """Convert `neo` values to a value compatible with `pandas`.
  84. Some types and dtypes used with neo are not safe to use with pandas in some
  85. or all situations.
  86. `quantities.Quantity` don't follow the normal python rule that values
  87. with that are equal should have the same hash, making it fundamentally
  88. incompatible with `pandas`.
  89. On python 3, `pandas` coerces `S` dtypes to bytes, which are not always
  90. safe to use.
  91. Parameters
  92. ----------
  93. value : any
  94. Value to convert (if it has any known issues).
  95. Returns
  96. -------
  97. any
  98. `value` or a version of value with potential problems fixed.
  99. """
  100. if hasattr(value, 'dimensionality'):
  101. return (value.magnitude.tolist(), str(value.dimensionality))
  102. if hasattr(value, 'dtype') and value.dtype.kind == 'S':
  103. return value.astype('U').tolist()
  104. if hasattr(value, 'tolist'):
  105. return value.tolist()
  106. if hasattr(value, 'decode') and not hasattr(value, 'encode'):
  107. return value.decode('UTF8')
  108. return value
  109. def spiketrain_to_dataframe(spiketrain, parents=True, child_first=True):
  110. """Convert a `neo.SpikeTrain` to a `pandas.DataFrame`.
  111. The `pandas.DataFrame` object has a single column, with each element
  112. being the spike time converted to a `float` value in seconds.
  113. The column heading is a `pandas.MultiIndex` with one index
  114. for each of the scalar attributes and annotations. The `index`
  115. is the spike number.
  116. Parameters
  117. ----------
  118. spiketrain : neo SpikeTrain
  119. The SpikeTrain to convert.
  120. parents : bool, optional
  121. Also include attributes and annotations from parent neo
  122. objects (if any).
  123. Returns
  124. -------
  125. pandas DataFrame
  126. A DataFrame containing the spike times from `spiketrain`.
  127. Notes
  128. -----
  129. The index name is `spike_number`.
  130. Attributes that contain non-scalar values are skipped. So are
  131. annotations or attributes containing a value of `None`.
  132. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  133. and annotations of that type are converted to a tuple where the first
  134. element is the scalar value and the second is the string representation of
  135. the units.
  136. """
  137. attrs = _extract_neo_attrs_safe(spiketrain,
  138. parents=parents, child_first=child_first)
  139. columns = _multiindex_from_dict(attrs)
  140. times = spiketrain.magnitude
  141. times = pq.Quantity(times, spiketrain.units).rescale('s').magnitude
  142. times = times[np.newaxis].T
  143. index = pd.Index(np.arange(len(spiketrain)), name='spike_number')
  144. pdobj = pd.DataFrame(times, index=index, columns=columns)
  145. return _sort_inds(pdobj, axis=1)
  146. def event_to_dataframe(event, parents=True, child_first=True):
  147. """Convert a `neo.core.Event` to a `pandas.DataFrame`.
  148. The `pandas.DataFrame` object has a single column, with each element
  149. being the event label from the `event.label` attribute.
  150. The column heading is a `pandas.MultiIndex` with one index
  151. for each of the scalar attributes and annotations. The `index`
  152. is the time stamp from the `event.times` attribute.
  153. Parameters
  154. ----------
  155. event : neo Event
  156. The Event to convert.
  157. parents : bool, optional
  158. Also include attributes and annotations from parent neo
  159. objects (if any).
  160. child_first : bool, optional
  161. If True (default True), values of child attributes are used
  162. over parent attributes in the event of a name conflict.
  163. If False, parent attributes are used.
  164. This parameter does nothing if `parents` is False.
  165. Returns
  166. -------
  167. pandas DataFrame
  168. A DataFrame containing the labels from `event`.
  169. Notes
  170. -----
  171. If the length of event.times and event.labels are not the same,
  172. the longer will be truncated to the length of the shorter.
  173. The index name is `times`.
  174. Attributes that contain non-scalar values are skipped. So are
  175. annotations or attributes containing a value of `None`.
  176. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  177. and annotations of that type are converted to a tuple where the first
  178. element is the scalar value and the second is the string representation of
  179. the units.
  180. """
  181. attrs = _extract_neo_attrs_safe(event,
  182. parents=parents, child_first=child_first)
  183. columns = _multiindex_from_dict(attrs)
  184. times = event.times.rescale('s').magnitude
  185. labels = event.labels.astype('U')
  186. times = times[:len(labels)]
  187. labels = labels[:len(times)]
  188. index = pd.Index(times, name='times')
  189. pdobj = pd.DataFrame(labels[np.newaxis].T, index=index, columns=columns)
  190. return _sort_inds(pdobj, axis=1)
  191. def epoch_to_dataframe(epoch, parents=True, child_first=True):
  192. """Convert a `neo.core.Epoch` to a `pandas.DataFrame`.
  193. The `pandas.DataFrame` object has a single column, with each element
  194. being the epoch label from the `epoch.label` attribute.
  195. The column heading is a `pandas.MultiIndex` with one index
  196. for each of the scalar attributes and annotations. The `index`
  197. is a `pandas.MultiIndex`, with the first index being the time stamp from
  198. the `epoch.times` attribute and the second being the duration from
  199. the `epoch.durations` attribute.
  200. Parameters
  201. ----------
  202. epoch : neo Epoch
  203. The Epoch to convert.
  204. parents : bool, optional
  205. Also include attributes and annotations from parent neo
  206. objects (if any).
  207. child_first : bool, optional
  208. If True (default True), values of child attributes are used
  209. over parent attributes in the event of a name conflict.
  210. If False, parent attributes are used.
  211. This parameter does nothing if `parents` is False.
  212. Returns
  213. -------
  214. pandas DataFrame
  215. A DataFrame containing the labels from `epoch`.
  216. Notes
  217. -----
  218. If the length of `epoch.times`, `epoch.duration`, and `epoch.labels` are
  219. not the same, the longer will be truncated to the length of the shortest.
  220. The index names for `epoch.times` and `epoch.durations` are `times` and
  221. `durations`, respectively.
  222. Attributes that contain non-scalar values are skipped. So are
  223. annotations or attributes containing a value of `None`.
  224. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  225. and annotations of that type are converted to a tuple where the first
  226. element is the scalar value and the second is the string representation of
  227. the units.
  228. """
  229. attrs = _extract_neo_attrs_safe(epoch,
  230. parents=parents, child_first=child_first)
  231. columns = _multiindex_from_dict(attrs)
  232. times = epoch.times.rescale('s').magnitude
  233. durs = epoch.durations.rescale('s').magnitude
  234. labels = epoch.labels.astype('U')
  235. minlen = min([len(durs), len(times), len(labels)])
  236. index = pd.MultiIndex.from_arrays([times[:minlen], durs[:minlen]],
  237. names=['times', 'durations'])
  238. pdobj = pd.DataFrame(labels[:minlen][np.newaxis].T,
  239. index=index, columns=columns)
  240. return _sort_inds(pdobj, axis='all')
  241. def _multi_objs_to_dataframe(container, conv_func, get_func,
  242. parents=True, child_first=True):
  243. """Convert one or more of a given `neo` object to a `pandas.DataFrame`.
  244. The objects can be any list, dict, or other iterable or mapping containing
  245. the object, as well as any neo object that can hold the object.
  246. Objects are searched recursively, so the objects can be nested (such as a
  247. list of blocks).
  248. The column heading is a `pandas.MultiIndex` with one index
  249. for each of the scalar attributes and annotations of the respective
  250. object.
  251. Parameters
  252. ----------
  253. container : list, tuple, iterable, dict, neo container object
  254. The container for the objects to convert.
  255. parents : bool, optional
  256. Also include attributes and annotations from parent neo
  257. objects (if any).
  258. child_first : bool, optional
  259. If True (default True), values of child attributes are used
  260. over parent attributes in the event of a name conflict.
  261. If False, parent attributes are used.
  262. This parameter does nothing if `parents` is False.
  263. Returns
  264. -------
  265. pandas DataFrame
  266. A DataFrame containing the converted objects.
  267. Attributes that contain non-scalar values are skipped. So are
  268. annotations or attributes containing a value of `None`.
  269. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  270. and annotations of that type are converted to a tuple where the first
  271. element is the scalar value and the second is the string representation of
  272. the units.
  273. """
  274. res = pd.concat([conv_func(obj, parents=parents, child_first=child_first)
  275. for obj in get_func(container)], axis=1)
  276. return _sort_inds(res, axis=1)
  277. def multi_spiketrains_to_dataframe(container,
  278. parents=True, child_first=True):
  279. """Convert one or more `neo.SpikeTrain` objects to a `pandas.DataFrame`.
  280. The objects can be any list, dict, or other iterable or mapping containing
  281. spiketrains, as well as any neo object that can hold spiketrains:
  282. `neo.Block`, `neo.ChannelIndex`, `neo.Unit`, and `neo.Segment`.
  283. Objects are searched recursively, so the objects can be nested (such as a
  284. list of blocks).
  285. The `pandas.DataFrame` object has one column for each spiketrain, with each
  286. element being the spike time converted to a `float` value in seconds.
  287. columns are padded to the same length with `NaN` values.
  288. The column heading is a `pandas.MultiIndex` with one index
  289. for each of the scalar attributes and annotations of the respective
  290. spiketrain. The `index` is the spike number.
  291. Parameters
  292. ----------
  293. container : list, tuple, iterable, dict,
  294. neo Block, neo Segment, neo Unit, neo ChannelIndex
  295. The container for the spiketrains to convert.
  296. parents : bool, optional
  297. Also include attributes and annotations from parent neo
  298. objects (if any).
  299. child_first : bool, optional
  300. If True (default True), values of child attributes are used
  301. over parent attributes in the event of a name conflict.
  302. If False, parent attributes are used.
  303. This parameter does nothing if `parents` is False.
  304. Returns
  305. -------
  306. pandas DataFrame
  307. A DataFrame containing the spike times from `container`.
  308. Notes
  309. -----
  310. The index name is `spike_number`.
  311. Attributes that contain non-scalar values are skipped. So are
  312. annotations or attributes containing a value of `None`.
  313. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  314. and annotations of that type are converted to a tuple where the first
  315. element is the scalar value and the second is the string representation of
  316. the units.
  317. """
  318. return _multi_objs_to_dataframe(container,
  319. spiketrain_to_dataframe,
  320. get_all_spiketrains,
  321. parents=parents, child_first=child_first)
  322. def multi_events_to_dataframe(container, parents=True, child_first=True):
  323. """Convert one or more `neo.Event` objects to a `pandas.DataFrame`.
  324. The objects can be any list, dict, or other iterable or mapping containing
  325. events, as well as any neo object that can hold events:
  326. `neo.Block` and `neo.Segment`. Objects are searched recursively, so the
  327. objects can be nested (such as a list of blocks).
  328. The `pandas.DataFrame` object has one column for each event, with each
  329. element being the event label. columns are padded to the same length with
  330. `NaN` values.
  331. The column heading is a `pandas.MultiIndex` with one index
  332. for each of the scalar attributes and annotations of the respective
  333. event. The `index` is the time stamp from the `event.times` attribute.
  334. Parameters
  335. ----------
  336. container : list, tuple, iterable, dict, neo Block, neo Segment
  337. The container for the events to convert.
  338. parents : bool, optional
  339. Also include attributes and annotations from parent neo
  340. objects (if any).
  341. child_first : bool, optional
  342. If True (default True), values of child attributes are used
  343. over parent attributes in the event of a name conflict.
  344. If False, parent attributes are used.
  345. This parameter does nothing if `parents` is False.
  346. Returns
  347. -------
  348. pandas DataFrame
  349. A DataFrame containing the labels from `container`.
  350. Notes
  351. -----
  352. If the length of event.times and event.labels are not the same for any
  353. individual event, the longer will be truncated to the length of the
  354. shorter for that event. Between events, lengths can differ.
  355. The index name is `times`.
  356. Attributes that contain non-scalar values are skipped. So are
  357. annotations or attributes containing a value of `None`.
  358. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  359. and annotations of that type are converted to a tuple where the first
  360. element is the scalar value and the second is the string representation of
  361. the units.
  362. """
  363. return _multi_objs_to_dataframe(container,
  364. event_to_dataframe, get_all_events,
  365. parents=parents, child_first=child_first)
  366. def multi_epochs_to_dataframe(container, parents=True, child_first=True):
  367. """Convert one or more `neo.Epoch` objects to a `pandas.DataFrame`.
  368. The objects can be any list, dict, or other iterable or mapping containing
  369. epochs, as well as any neo object that can hold epochs:
  370. `neo.Block` and `neo.Segment`. Objects are searched recursively, so the
  371. objects can be nested (such as a list of blocks).
  372. The `pandas.DataFrame` object has one column for each epoch, with each
  373. element being the epoch label. columns are padded to the same length with
  374. `NaN` values.
  375. The column heading is a `pandas.MultiIndex` with one index
  376. for each of the scalar attributes and annotations of the respective
  377. epoch. The `index` is a `pandas.MultiIndex`, with the first index being
  378. the time stamp from the `epoch.times` attribute and the second being the
  379. duration from the `epoch.durations` attribute.
  380. Parameters
  381. ----------
  382. container : list, tuple, iterable, dict, neo Block, neo Segment
  383. The container for the epochs to convert.
  384. parents : bool, optional
  385. Also include attributes and annotations from parent neo
  386. objects (if any).
  387. child_first : bool, optional
  388. If True (default True), values of child attributes are used
  389. over parent attributes in the event of a name conflict.
  390. If False, parent attributes are used.
  391. This parameter does nothing if `parents` is False.
  392. Returns
  393. -------
  394. pandas DataFrame
  395. A DataFrame containing the labels from `container`.
  396. Notes
  397. -----
  398. If the length of `epoch.times`, `epoch.duration`, and `epoch.labels` are
  399. not the same for any individual epoch, the longer will be truncated to the
  400. length of the shorter for that epoch. Between epochs, lengths can differ.
  401. The index level names for `epoch.times` and `epoch.durations` are
  402. `times` and `durations`, respectively.
  403. Attributes that contain non-scalar values are skipped. So are
  404. annotations or attributes containing a value of `None`.
  405. `quantity.Quantities` types are incompatible with `pandas`, so attributes
  406. and annotations of that type are converted to a tuple where the first
  407. element is the scalar value and the second is the string representation of
  408. the units.
  409. """
  410. return _multi_objs_to_dataframe(container,
  411. epoch_to_dataframe, get_all_epochs,
  412. parents=parents, child_first=child_first)
  413. def slice_spiketrain(pdobj, t_start=None, t_stop=None):
  414. """Slice a `pandas.DataFrame`, changing indices appropriately.
  415. Values outside the sliced range are converted to `NaN` values.
  416. Slicing happens over columns.
  417. This sets the `t_start` and `t_stop` column indexes to be the new values.
  418. Otherwise it is the same as setting values outside the range to `NaN`.
  419. Parameters
  420. ----------
  421. pdobj : pandas DataFrame
  422. The DataFrame to slice.
  423. t_start : float, optional.
  424. If specified, the returned DataFrame values less than this set
  425. to `NaN`.
  426. Default is `None` (do not use this argument).
  427. t_stop : float, optional.
  428. If specified, the returned DataFrame values greater than this set
  429. to `NaN`.
  430. Default is `None` (do not use this argument).
  431. Returns
  432. -------
  433. pdobj : scalar, pandas Series, DataFrame, or Panel
  434. The returned data type is the same as the type of `pdobj`
  435. Notes
  436. -----
  437. The order of the index and/or column levels of the returned object may
  438. differ from the order of the original.
  439. If `t_start` or `t_stop` is specified, all columns indexes will be changed
  440. to the respective values, including those already within the new range.
  441. If `t_start` or `t_stop` is not specified, those column indexes will not
  442. be changed.
  443. Returns a copy, even if `t_start` and `t_stop` are both `None`.
  444. """
  445. if t_start is None and t_stop is None:
  446. return pdobj.copy()
  447. if t_stop is not None:
  448. pdobj[pdobj > t_stop] = np.nan
  449. pdobj = pdobj.T.reset_index(level='t_stop')
  450. pdobj['t_stop'] = t_stop
  451. pdobj = pdobj.set_index('t_stop', append=True).T
  452. pdobj = _sort_inds(pdobj, axis=1)
  453. if t_start is not None:
  454. pdobj[pdobj < t_start] = np.nan
  455. pdobj = pdobj.T.reset_index(level='t_start')
  456. pdobj['t_start'] = t_start
  457. pdobj = pdobj.set_index('t_start', append=True).T
  458. pdobj = _sort_inds(pdobj, axis=1)
  459. return pdobj