Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "3.5.1 (default, Dec 26 2015, 18:11:22) \n",
- "[GCC 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81)]\n",
- "Pandas: 0.18.1\n"
- ]
- }
- ],
- "source": [
- "# Version info\n",
- "import sys\n",
- "print(sys.version)\n",
- "print('Pandas: ', pd.__version__)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 1,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "# Sample data\n",
- "data=\"\"\"01, home #sweet home\n",
- "01, #happy #life \n",
- "02, #world peace\n",
- "03, #all are one\n",
- "\"\"\""
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 2,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": [
- "import pandas as pd\n",
- "from io import StringIO"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {
- "collapsed": false
- },
- "outputs": [],
- "source": [
- "df = pd.read_csv(StringIO(data), \n",
- " header=None, \n",
- " names=['col1', 'col2'],\n",
- " index_col=0)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th>col2</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>col1</th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>home #sweet home</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>#happy #life</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <td>#world peace</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <td>#all are one</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " col2\n",
- "col1 \n",
- "1 home #sweet home\n",
- "1 #happy #life \n",
- "2 #world peace\n",
- "3 #all are one"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "data": {
- "text/html": [
- "<div>\n",
- "<table border=\"1\" class=\"dataframe\">\n",
- " <thead>\n",
- " <tr style=\"text-align: right;\">\n",
- " <th></th>\n",
- " <th></th>\n",
- " <th>0</th>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>col1</th>\n",
- " <th>match</th>\n",
- " <th></th>\n",
- " </tr>\n",
- " </thead>\n",
- " <tbody>\n",
- " <tr>\n",
- " <th rowspan=\"3\" valign=\"top\">1</th>\n",
- " <th>0</th>\n",
- " <td>s</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>0</th>\n",
- " <td>h</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>1</th>\n",
- " <td>l</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>2</th>\n",
- " <th>0</th>\n",
- " <td>w</td>\n",
- " </tr>\n",
- " <tr>\n",
- " <th>3</th>\n",
- " <th>0</th>\n",
- " <td>a</td>\n",
- " </tr>\n",
- " </tbody>\n",
- "</table>\n",
- "</div>"
- ],
- "text/plain": [
- " 0\n",
- "col1 match \n",
- "1 0 s\n",
- " 0 h\n",
- " 1 l\n",
- "2 0 w\n",
- "3 0 a"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# This should work (will only output the first character of each match)\n",
- "df['col2'].str.extractall('#(\\S)')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {
- "collapsed": false
- },
- "outputs": [
- {
- "ename": "AssertionError",
- "evalue": "1 columns passed, passed data had 6 columns",
- "output_type": "error",
- "traceback": [
- "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
- "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
- "\u001b[0;32m<ipython-input-8-4792d3b6c94f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# This won't work due to a bug\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'col2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextractall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'(#\\S+)'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
- "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/strings.py\u001b[0m in \u001b[0;36mextractall\u001b[0;34m(self, pat, flags)\u001b[0m\n\u001b[1;32m 1619\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr_extractall\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1620\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextractall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1621\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mstr_extractall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_orig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1622\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1623\u001b[0m _shared_docs['find'] = (\"\"\"\n",
- "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/strings.py\u001b[0m in \u001b[0;36mstr_extractall\u001b[0;34m(arr, pat, flags)\u001b[0m\n\u001b[1;32m 714\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 717\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 718\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_named_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fields\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 263\u001b[0;31m \u001b[0marrays\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_to_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 264\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_ensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_to_arrays\u001b[0;34m(data, columns, coerce_float, dtype)\u001b[0m\n\u001b[1;32m 5350\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5351\u001b[0m return _list_to_arrays(data, columns, coerce_float=coerce_float,\n\u001b[0;32m-> 5352\u001b[0;31m dtype=dtype)\n\u001b[0m\u001b[1;32m 5353\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMapping\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5354\u001b[0m return _list_of_dict_to_arrays(data, columns,\n",
- "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_list_to_arrays\u001b[0;34m(data, columns, coerce_float, dtype)\u001b[0m\n\u001b[1;32m 5429\u001b[0m \u001b[0mcontent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_object_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5430\u001b[0m return _convert_object_array(content, columns, dtype=dtype,\n\u001b[0;32m-> 5431\u001b[0;31m coerce_float=coerce_float)\n\u001b[0m\u001b[1;32m 5432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5433\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_convert_object_array\u001b[0;34m(content, columns, coerce_float, dtype)\u001b[0m\n\u001b[1;32m 5487\u001b[0m \u001b[0;31m# caller's responsibility to check for this...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5488\u001b[0m raise AssertionError('%d columns passed, passed data had %s '\n\u001b[0;32m-> 5489\u001b[0;31m 'columns' % (len(columns), len(content)))\n\u001b[0m\u001b[1;32m 5490\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5491\u001b[0m \u001b[0;31m# provide soft conversion of object dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mAssertionError\u001b[0m: 1 columns passed, passed data had 6 columns"
- ]
- }
- ],
- "source": [
- "# This won't work due to a bug\n",
- "df['col2'].str.extractall('(#\\S+)')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "collapsed": true
- },
- "outputs": [],
- "source": []
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.5.1"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 0
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement