Advertisement
Guest User

Untitled

a guest
Jul 20th, 2017
55
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 13.53 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "In some very particular cases, the current version of `pandas.to_timedelta()` does not correctly raise an `OverflowError`. This is demonstrated below."
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": 1,
  13. "metadata": {
  14. "collapsed": true
  15. },
  16. "outputs": [],
  17. "source": [
  18. "import pandas as pd\n",
  19. "import numpy as np"
  20. ]
  21. },
  22. {
  23. "cell_type": "code",
  24. "execution_count": 2,
  25. "metadata": {},
  26. "outputs": [
  27. {
  28. "name": "stdout",
  29. "output_type": "stream",
  30. "text": [
  31. "pandas version: 0.20.1\n",
  32. " numpy version: 1.12.1\n"
  33. ]
  34. }
  35. ],
  36. "source": [
  37. "print('pandas version: %s' % pd.__version__)\n",
  38. "print(' numpy version: %s' % np.__version__)"
  39. ]
  40. },
  41. {
  42. "cell_type": "markdown",
  43. "metadata": {},
  44. "source": [
  45. "# Function to create floats with smallest increment "
  46. ]
  47. },
  48. {
  49. "cell_type": "code",
  50. "execution_count": 3,
  51. "metadata": {
  52. "collapsed": true
  53. },
  54. "outputs": [],
  55. "source": [
  56. "def float_array_with_smallest_increments(initial_float, N_points_in_one_direction): \n",
  57. " floats_upward = [initial_float, ]\n",
  58. " floats_downward = [initial_float, ]\n",
  59. " for i in range(N_points_in_one_direction):\n",
  60. " floats_upward.append(np.nextafter(floats_upward[-1] , int_max))\n",
  61. " floats_downward.append(np.nextafter(floats_downward[-1] , int_min)) \n",
  62. " return np.array(floats_downward[::-1] + floats_upward[1:])"
  63. ]
  64. },
  65. {
  66. "cell_type": "code",
  67. "execution_count": 4,
  68. "metadata": {},
  69. "outputs": [],
  70. "source": [
  71. "int_min = np.iinfo(np.int64).min\n",
  72. "int_max = np.iinfo(np.int64).max"
  73. ]
  74. },
  75. {
  76. "cell_type": "markdown",
  77. "metadata": {},
  78. "source": [
  79. "# Test overflow of `to_timedelta()` using seconds "
  80. ]
  81. },
  82. {
  83. "cell_type": "code",
  84. "execution_count": 5,
  85. "metadata": {},
  86. "outputs": [
  87. {
  88. "name": "stdout",
  89. "output_type": "stream",
  90. "text": [
  91. "9223372036.85476684570312500000\n",
  92. "9223372036.85476875305175781250\n",
  93. "9223372036.85477066040039062500\n",
  94. "9223372036.85477256774902343750\n",
  95. "9223372036.85477447509765625000\n",
  96. "9223372036.85477638244628906250\n",
  97. "9223372036.85477828979492187500\n",
  98. "9223372036.85478019714355468750\n",
  99. "9223372036.85478210449218750000\n",
  100. "9223372036.85478401184082031250\n",
  101. "9223372036.85478591918945312500\n"
  102. ]
  103. }
  104. ],
  105. "source": [
  106. "seconds_as_floats = float_array_with_smallest_increments(int_max/1e9, 5)\n",
  107. "\n",
  108. "for v in np.nditer(seconds_as_floats):\n",
  109. " print('%.20f' % v)"
  110. ]
  111. },
  112. {
  113. "cell_type": "code",
  114. "execution_count": 6,
  115. "metadata": {},
  116. "outputs": [
  117. {
  118. "data": {
  119. "text/plain": [
  120. "TimedeltaIndex([ '106751 days 23:47:16.854767',\n",
  121. " '106751 days 23:47:16.854769',\n",
  122. " '106751 days 23:47:16.854771',\n",
  123. " '106751 days 23:47:16.854773',\n",
  124. " '106751 days 23:47:16.854774',\n",
  125. " '-106752 days +00:12:43.145224',\n",
  126. " '-106752 days +00:12:43.145226',\n",
  127. " '-106752 days +00:12:43.145228',\n",
  128. " '-106752 days +00:12:43.145230',\n",
  129. " '-106752 days +00:12:43.145232',\n",
  130. " '-106752 days +00:12:43.145234'],\n",
  131. " dtype='timedelta64[ns]', freq=None)"
  132. ]
  133. },
  134. "execution_count": 6,
  135. "metadata": {},
  136. "output_type": "execute_result"
  137. }
  138. ],
  139. "source": [
  140. "pd.to_timedelta(seconds_as_floats, unit='s')"
  141. ]
  142. },
  143. {
  144. "cell_type": "markdown",
  145. "metadata": {},
  146. "source": [
  147. "**It overflows without raising!**"
  148. ]
  149. },
  150. {
  151. "cell_type": "markdown",
  152. "metadata": {},
  153. "source": [
  154. "# Test overflow of `to_timedelta()` using microseconds "
  155. ]
  156. },
  157. {
  158. "cell_type": "code",
  159. "execution_count": 7,
  160. "metadata": {},
  161. "outputs": [
  162. {
  163. "name": "stdout",
  164. "output_type": "stream",
  165. "text": [
  166. "9223372036854766.00000000000000000000\n",
  167. "9223372036854768.00000000000000000000\n",
  168. "9223372036854770.00000000000000000000\n",
  169. "9223372036854772.00000000000000000000\n",
  170. "9223372036854774.00000000000000000000\n",
  171. "9223372036854776.00000000000000000000\n",
  172. "9223372036854778.00000000000000000000\n",
  173. "9223372036854780.00000000000000000000\n",
  174. "9223372036854782.00000000000000000000\n",
  175. "9223372036854784.00000000000000000000\n",
  176. "9223372036854786.00000000000000000000\n"
  177. ]
  178. }
  179. ],
  180. "source": [
  181. "microseconds_as_floats = float_array_with_smallest_increments(int_max/1e3, 5)\n",
  182. "\n",
  183. "for v in np.nditer(microseconds_as_floats):\n",
  184. " print('%.20f' % v)"
  185. ]
  186. },
  187. {
  188. "cell_type": "code",
  189. "execution_count": 8,
  190. "metadata": {},
  191. "outputs": [
  192. {
  193. "ename": "OverflowError",
  194. "evalue": "Python int too large to convert to C long",
  195. "output_type": "error",
  196. "traceback": [
  197. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  198. "\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)",
  199. "\u001b[0;32m<ipython-input-8-190654f2ef57>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  200. "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 80\u001b[0m errors=errors, name=arg.name)\n\u001b[1;32m 81\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_list_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 82\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_convert_listlike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbox\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbox\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 83\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m raise TypeError('arg must be a string, timedelta, list, tuple, '\n",
  201. "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_convert_listlike\u001b[0;34m(arg, unit, box, errors, name)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m value = tslib.array_to_timedelta64(_ensure_object(arg),\n\u001b[0;32m--> 164\u001b[0;31m unit=unit, errors=errors)\n\u001b[0m\u001b[1;32m 165\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'timedelta64[ns]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  202. "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58701)\u001b[0;34m()\u001b[0m\n",
  203. "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58408)\u001b[0;34m()\u001b[0m\n",
  204. "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n",
  205. "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n",
  206. "\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long"
  207. ]
  208. }
  209. ],
  210. "source": [
  211. "pd.to_timedelta(microseconds_as_floats, unit='us')"
  212. ]
  213. },
  214. {
  215. "cell_type": "markdown",
  216. "metadata": {},
  217. "source": [
  218. "**It correctly raises an OverflowError**"
  219. ]
  220. },
  221. {
  222. "cell_type": "code",
  223. "execution_count": 9,
  224. "metadata": {},
  225. "outputs": [
  226. {
  227. "data": {
  228. "text/plain": [
  229. "TimedeltaIndex(['106751 days 23:47:16.854766', '106751 days 23:47:16.854768',\n",
  230. " '106751 days 23:47:16.854770', '106751 days 23:47:16.854772'],\n",
  231. " dtype='timedelta64[ns]', freq=None)"
  232. ]
  233. },
  234. "execution_count": 9,
  235. "metadata": {},
  236. "output_type": "execute_result"
  237. }
  238. ],
  239. "source": [
  240. "pd.to_timedelta(microseconds_as_floats[0:4], unit='us')"
  241. ]
  242. },
  243. {
  244. "cell_type": "code",
  245. "execution_count": 10,
  246. "metadata": {},
  247. "outputs": [
  248. {
  249. "ename": "OverflowError",
  250. "evalue": "Python int too large to convert to C long",
  251. "output_type": "error",
  252. "traceback": [
  253. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  254. "\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)",
  255. "\u001b[0;32m<ipython-input-10-9bedf031e2a6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  256. "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;31m# ...so it must be a scalar value. Return scalar.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m return _coerce_scalar_to_timedelta_type(arg, unit=unit,\n\u001b[0;32m---> 89\u001b[0;31m box=box, errors=errors)\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  257. "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_coerce_scalar_to_timedelta_type\u001b[0;34m(r, unit, box, errors)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtslib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_to_timedelta64\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 135\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'raise'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  258. "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:62190)\u001b[0;34m()\u001b[0m\n",
  259. "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n",
  260. "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n",
  261. "\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long"
  262. ]
  263. }
  264. ],
  265. "source": [
  266. "pd.to_timedelta(microseconds_as_floats[5], unit='us')"
  267. ]
  268. }
  269. ],
  270. "metadata": {
  271. "kernelspec": {
  272. "display_name": "Python 2",
  273. "language": "python",
  274. "name": "python2"
  275. },
  276. "language_info": {
  277. "codemirror_mode": {
  278. "name": "ipython",
  279. "version": 2
  280. },
  281. "file_extension": ".py",
  282. "mimetype": "text/x-python",
  283. "name": "python",
  284. "nbconvert_exporter": "python",
  285. "pygments_lexer": "ipython2",
  286. "version": "2.7.12"
  287. }
  288. },
  289. "nbformat": 4,
  290. "nbformat_minor": 1
  291. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement