Advertisement
Guest User

Untitled

a guest
Jul 24th, 2016
57
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 12.80 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 7,
  6. "metadata": {
  7. "collapsed": false
  8. },
  9. "outputs": [
  10. {
  11. "name": "stdout",
  12. "output_type": "stream",
  13. "text": [
  14. "3.5.1 (default, Dec 26 2015, 18:11:22) \n",
  15. "[GCC 4.2.1 Compatible Apple LLVM 7.0.2 (clang-700.1.81)]\n",
  16. "Pandas: 0.18.1\n"
  17. ]
  18. }
  19. ],
  20. "source": [
  21. "# Version info\n",
  22. "import sys\n",
  23. "print(sys.version)\n",
  24. "print('Pandas: ', pd.__version__)"
  25. ]
  26. },
  27. {
  28. "cell_type": "code",
  29. "execution_count": 1,
  30. "metadata": {
  31. "collapsed": true
  32. },
  33. "outputs": [],
  34. "source": [
  35. "# Sample data\n",
  36. "data=\"\"\"01, home #sweet home\n",
  37. "01, #happy #life \n",
  38. "02, #world peace\n",
  39. "03, #all are one\n",
  40. "\"\"\""
  41. ]
  42. },
  43. {
  44. "cell_type": "code",
  45. "execution_count": 2,
  46. "metadata": {
  47. "collapsed": true
  48. },
  49. "outputs": [],
  50. "source": [
  51. "import pandas as pd\n",
  52. "from io import StringIO"
  53. ]
  54. },
  55. {
  56. "cell_type": "code",
  57. "execution_count": 3,
  58. "metadata": {
  59. "collapsed": false
  60. },
  61. "outputs": [],
  62. "source": [
  63. "df = pd.read_csv(StringIO(data), \n",
  64. " header=None, \n",
  65. " names=['col1', 'col2'],\n",
  66. " index_col=0)"
  67. ]
  68. },
  69. {
  70. "cell_type": "code",
  71. "execution_count": 4,
  72. "metadata": {
  73. "collapsed": false
  74. },
  75. "outputs": [
  76. {
  77. "data": {
  78. "text/html": [
  79. "<div>\n",
  80. "<table border=\"1\" class=\"dataframe\">\n",
  81. " <thead>\n",
  82. " <tr style=\"text-align: right;\">\n",
  83. " <th></th>\n",
  84. " <th>col2</th>\n",
  85. " </tr>\n",
  86. " <tr>\n",
  87. " <th>col1</th>\n",
  88. " <th></th>\n",
  89. " </tr>\n",
  90. " </thead>\n",
  91. " <tbody>\n",
  92. " <tr>\n",
  93. " <th>1</th>\n",
  94. " <td>home #sweet home</td>\n",
  95. " </tr>\n",
  96. " <tr>\n",
  97. " <th>1</th>\n",
  98. " <td>#happy #life</td>\n",
  99. " </tr>\n",
  100. " <tr>\n",
  101. " <th>2</th>\n",
  102. " <td>#world peace</td>\n",
  103. " </tr>\n",
  104. " <tr>\n",
  105. " <th>3</th>\n",
  106. " <td>#all are one</td>\n",
  107. " </tr>\n",
  108. " </tbody>\n",
  109. "</table>\n",
  110. "</div>"
  111. ],
  112. "text/plain": [
  113. " col2\n",
  114. "col1 \n",
  115. "1 home #sweet home\n",
  116. "1 #happy #life \n",
  117. "2 #world peace\n",
  118. "3 #all are one"
  119. ]
  120. },
  121. "execution_count": 4,
  122. "metadata": {},
  123. "output_type": "execute_result"
  124. }
  125. ],
  126. "source": [
  127. "df"
  128. ]
  129. },
  130. {
  131. "cell_type": "code",
  132. "execution_count": 5,
  133. "metadata": {
  134. "collapsed": false
  135. },
  136. "outputs": [
  137. {
  138. "data": {
  139. "text/html": [
  140. "<div>\n",
  141. "<table border=\"1\" class=\"dataframe\">\n",
  142. " <thead>\n",
  143. " <tr style=\"text-align: right;\">\n",
  144. " <th></th>\n",
  145. " <th></th>\n",
  146. " <th>0</th>\n",
  147. " </tr>\n",
  148. " <tr>\n",
  149. " <th>col1</th>\n",
  150. " <th>match</th>\n",
  151. " <th></th>\n",
  152. " </tr>\n",
  153. " </thead>\n",
  154. " <tbody>\n",
  155. " <tr>\n",
  156. " <th rowspan=\"3\" valign=\"top\">1</th>\n",
  157. " <th>0</th>\n",
  158. " <td>s</td>\n",
  159. " </tr>\n",
  160. " <tr>\n",
  161. " <th>0</th>\n",
  162. " <td>h</td>\n",
  163. " </tr>\n",
  164. " <tr>\n",
  165. " <th>1</th>\n",
  166. " <td>l</td>\n",
  167. " </tr>\n",
  168. " <tr>\n",
  169. " <th>2</th>\n",
  170. " <th>0</th>\n",
  171. " <td>w</td>\n",
  172. " </tr>\n",
  173. " <tr>\n",
  174. " <th>3</th>\n",
  175. " <th>0</th>\n",
  176. " <td>a</td>\n",
  177. " </tr>\n",
  178. " </tbody>\n",
  179. "</table>\n",
  180. "</div>"
  181. ],
  182. "text/plain": [
  183. " 0\n",
  184. "col1 match \n",
  185. "1 0 s\n",
  186. " 0 h\n",
  187. " 1 l\n",
  188. "2 0 w\n",
  189. "3 0 a"
  190. ]
  191. },
  192. "execution_count": 5,
  193. "metadata": {},
  194. "output_type": "execute_result"
  195. }
  196. ],
  197. "source": [
  198. "# This should work (will only output the first character of each match)\n",
  199. "df['col2'].str.extractall('#(\\S)')"
  200. ]
  201. },
  202. {
  203. "cell_type": "code",
  204. "execution_count": 8,
  205. "metadata": {
  206. "collapsed": false
  207. },
  208. "outputs": [
  209. {
  210. "ename": "AssertionError",
  211. "evalue": "1 columns passed, passed data had 6 columns",
  212. "output_type": "error",
  213. "traceback": [
  214. "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
  215. "\u001b[0;31mAssertionError\u001b[0m Traceback (most recent call last)",
  216. "\u001b[0;32m<ipython-input-8-4792d3b6c94f>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;31m# This won't work due to a bug\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mdf\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'col2'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mstr\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mextractall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'(#\\S+)'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
  217. "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/strings.py\u001b[0m in \u001b[0;36mextractall\u001b[0;34m(self, pat, flags)\u001b[0m\n\u001b[1;32m 1619\u001b[0m \u001b[0;34m@\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mstr_extractall\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1620\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mextractall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1621\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mstr_extractall\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_orig\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mpat\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mflags\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mflags\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 1622\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1623\u001b[0m _shared_docs['find'] = (\"\"\"\n",
  218. "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/strings.py\u001b[0m in \u001b[0;36mstr_extractall\u001b[0;34m(arr, pat, flags)\u001b[0m\n\u001b[1;32m 714\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 715\u001b[0m \u001b[0mindex\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 716\u001b[0;31m \u001b[0mresult\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mDataFrame\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mmatch_list\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindex\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 717\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mresult\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 718\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  219. "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, data, index, columns, dtype, copy)\u001b[0m\n\u001b[1;32m 261\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mcom\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mis_named_tuple\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mand\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 262\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_fields\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 263\u001b[0;31m \u001b[0marrays\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_to_arrays\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcolumns\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdtype\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mdtype\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 264\u001b[0m \u001b[0mcolumns\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0m_ensure_index\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mcolumns\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 265\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  220. "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_to_arrays\u001b[0;34m(data, columns, coerce_float, dtype)\u001b[0m\n\u001b[1;32m 5350\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5351\u001b[0m return _list_to_arrays(data, columns, coerce_float=coerce_float,\n\u001b[0;32m-> 5352\u001b[0;31m dtype=dtype)\n\u001b[0m\u001b[1;32m 5353\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mcollections\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mMapping\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5354\u001b[0m return _list_of_dict_to_arrays(data, columns,\n",
  221. "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_list_to_arrays\u001b[0;34m(data, columns, coerce_float, dtype)\u001b[0m\n\u001b[1;32m 5429\u001b[0m \u001b[0mcontent\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mto_object_array\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mT\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5430\u001b[0m return _convert_object_array(content, columns, dtype=dtype,\n\u001b[0;32m-> 5431\u001b[0;31m coerce_float=coerce_float)\n\u001b[0m\u001b[1;32m 5432\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5433\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
  222. "\u001b[0;32m/Users/kristof/Development/virtualenv/jupyter/lib/python3.5/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36m_convert_object_array\u001b[0;34m(content, columns, coerce_float, dtype)\u001b[0m\n\u001b[1;32m 5487\u001b[0m \u001b[0;31m# caller's responsibility to check for this...\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5488\u001b[0m raise AssertionError('%d columns passed, passed data had %s '\n\u001b[0;32m-> 5489\u001b[0;31m 'columns' % (len(columns), len(content)))\n\u001b[0m\u001b[1;32m 5490\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5491\u001b[0m \u001b[0;31m# provide soft conversion of object dtypes\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
  223. "\u001b[0;31mAssertionError\u001b[0m: 1 columns passed, passed data had 6 columns"
  224. ]
  225. }
  226. ],
  227. "source": [
  228. "# This won't work due to a bug\n",
  229. "df['col2'].str.extractall('(#\\S+)')"
  230. ]
  231. },
  232. {
  233. "cell_type": "code",
  234. "execution_count": null,
  235. "metadata": {
  236. "collapsed": true
  237. },
  238. "outputs": [],
  239. "source": []
  240. }
  241. ],
  242. "metadata": {
  243. "kernelspec": {
  244. "display_name": "Python 3",
  245. "language": "python",
  246. "name": "python3"
  247. },
  248. "language_info": {
  249. "codemirror_mode": {
  250. "name": "ipython",
  251. "version": 3
  252. },
  253. "file_extension": ".py",
  254. "mimetype": "text/x-python",
  255. "name": "python",
  256. "nbconvert_exporter": "python",
  257. "pygments_lexer": "ipython3",
  258. "version": "3.5.1"
  259. }
  260. },
  261. "nbformat": 4,
  262. "nbformat_minor": 0
  263. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement