Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "In some very particular cases, the current version of `pandas.to_timedelta()` does not correctly raise an `OverflowError`. This is demonstrated below."
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "import numpy as np"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "pandas version: 0.20.1\n",
- " numpy version: 1.12.1\n"
- ]
- }
- ],
- "source": [
- "print('pandas version: %s' % pd.__version__)\n",
- "print(' numpy version: %s' % np.__version__)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Function to create floats with smallest increment "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "def float_array_with_smallest_increments(initial_float, N_points_in_one_direction): \n",
- " floats_upward = [initial_float, ]\n",
- " floats_downward = [initial_float, ]\n",
- " for i in range(N_points_in_one_direction):\n",
- " floats_upward.append(np.nextafter(floats_upward[-1] , int_max))\n",
- " floats_downward.append(np.nextafter(floats_downward[-1] , int_min)) \n",
- " return np.array(floats_downward[::-1] + floats_upward[1:])"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [],
- "source": [
- "int_min = np.iinfo(np.int64).min\n",
- "int_max = np.iinfo(np.int64).max"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Test overflow of `to_timedelta()` using seconds "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "9223372036.85476684570312500000\n",
- "9223372036.85476875305175781250\n",
- "9223372036.85477066040039062500\n",
- "9223372036.85477256774902343750\n",
- "9223372036.85477447509765625000\n",
- "9223372036.85477638244628906250\n",
- "9223372036.85477828979492187500\n",
- "9223372036.85478019714355468750\n",
- "9223372036.85478210449218750000\n",
- "9223372036.85478401184082031250\n",
- "9223372036.85478591918945312500\n"
- ]
- }
- ],
- "source": [
- "seconds_as_floats = float_array_with_smallest_increments(int_max/1e9, 5)\n",
- "\n",
- "for v in np.nditer(seconds_as_floats):\n",
- " print('%.20f' % v)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "TimedeltaIndex([ '106751 days 23:47:16.854767',\n",
- " '106751 days 23:47:16.854769',\n",
- " '106751 days 23:47:16.854771',\n",
- " '106751 days 23:47:16.854773',\n",
- " '106751 days 23:47:16.854774',\n",
- " '-106752 days +00:12:43.145224',\n",
- " '-106752 days +00:12:43.145226',\n",
- " '-106752 days +00:12:43.145228',\n",
- " '-106752 days +00:12:43.145230',\n",
- " '-106752 days +00:12:43.145232',\n",
- " '-106752 days +00:12:43.145234'],\n",
- " dtype='timedelta64[ns]', freq=None)"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd.to_timedelta(seconds_as_floats, unit='s')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**It overflows without raising!**"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Test overflow of `to_timedelta()` using microseconds "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "9223372036854766.00000000000000000000\n",
- "9223372036854768.00000000000000000000\n",
- "9223372036854770.00000000000000000000\n",
- "9223372036854772.00000000000000000000\n",
- "9223372036854774.00000000000000000000\n",
- "9223372036854776.00000000000000000000\n",
- "9223372036854778.00000000000000000000\n",
- "9223372036854780.00000000000000000000\n",
- "9223372036854782.00000000000000000000\n",
- "9223372036854784.00000000000000000000\n",
- "9223372036854786.00000000000000000000\n"
- ]
- }
- ],
- "source": [
- "microseconds_as_floats = float_array_with_smallest_increments(int_max/1e3, 5)\n",
- "\n",
- "for v in np.nditer(microseconds_as_floats):\n",
- " print('%.20f' % v)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "ename": "OverflowError",
- "evalue": "Python int too large to convert to C long",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-8-190654f2ef57>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 80\u001b[0m errors=errors, name=arg.name)\n\u001b[1;32m 81\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mis_list_like\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 82\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0m_convert_listlike\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0munit\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbox\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mbox\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0merrors\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0merrors\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 83\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0mgetattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0marg\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'ndim'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;34m>\u001b[0m \u001b[0;36m1\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 84\u001b[0m raise TypeError('arg must be a string, timedelta, list, tuple, '\n",
- "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_convert_listlike\u001b[0;34m(arg, unit, box, errors, name)\u001b[0m\n\u001b[1;32m 162\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 163\u001b[0m value = tslib.array_to_timedelta64(_ensure_object(arg),\n\u001b[0;32m--> 164\u001b[0;31m unit=unit, errors=errors)\n\u001b[0m\u001b[1;32m 165\u001b[0m \u001b[0mvalue\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mvalue\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'timedelta64[ns]'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mFalse\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 166\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58701)\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.array_to_timedelta64 (pandas/_libs/tslib.c:58408)\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long"
- ]
- }
- ],
- "source": [
- "pd.to_timedelta(microseconds_as_floats, unit='us')"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "**It correctly raises an OverflowError**"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 9,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "TimedeltaIndex(['106751 days 23:47:16.854766', '106751 days 23:47:16.854768',\n",
- " '106751 days 23:47:16.854770', '106751 days 23:47:16.854772'],\n",
- " dtype='timedelta64[ns]', freq=None)"
- ]
- },
- "execution_count": 9,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "pd.to_timedelta(microseconds_as_floats[0:4], unit='us')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 10,
- "metadata": {},
- "outputs": [
- {
- "ename": "OverflowError",
- "evalue": "Python int too large to convert to C long",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mOverflowError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-10-9bedf031e2a6>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpd\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_timedelta\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmicroseconds_as_floats\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m5\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'us'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36mto_timedelta\u001b[0;34m(arg, unit, box, errors)\u001b[0m\n\u001b[1;32m 87\u001b[0m \u001b[0;31m# ...so it must be a scalar value. Return scalar.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 88\u001b[0m return _coerce_scalar_to_timedelta_type(arg, unit=unit,\n\u001b[0;32m---> 89\u001b[0;31m box=box, errors=errors)\n\u001b[0m\u001b[1;32m 90\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 91\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/Users/chwala-c/anaconda/lib/python2.7/site-packages/pandas/core/tools/timedeltas.pyc\u001b[0m in \u001b[0;36m_coerce_scalar_to_timedelta_type\u001b[0;34m(r, unit, box, errors)\u001b[0m\n\u001b[1;32m 132\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 133\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 134\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtslib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mconvert_to_timedelta64\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mr\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0munit\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 135\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 136\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0merrors\u001b[0m \u001b[0;34m==\u001b[0m \u001b[0;34m'raise'\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:62190)\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.convert_to_timedelta64 (pandas/_libs/tslib.c:61660)\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;32mpandas/_libs/tslib.pyx\u001b[0m in \u001b[0;36mpandas._libs.tslib.cast_from_unit (pandas/_libs/tslib.c:68471)\u001b[0;34m()\u001b[0m\n",
- "\u001b[0;31mOverflowError\u001b[0m: Python int too large to convert to C long"
- ]
- }
- ],
- "source": [
- "pd.to_timedelta(microseconds_as_floats[5], unit='us')"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 2",
- "language": "python",
- "name": "python2"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 2
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython2",
- "version": "2.7.12"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 1
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement