Guest User

Untitled

a guest
Jan 18th, 2019
103
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 11.06 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "#import the libraries\n",
  10. "import numpy as np\n",
  11. "import pandas as pd"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 2,
  17. "metadata": {
  18. "scrolled": false
  19. },
  20. "outputs": [
  21. {
  22. "data": {
  23. "text/plain": [
  24. "0 4\n",
  25. "1 7\n",
  26. "2 3\n",
  27. "3 0\n",
  28. "4 9\n",
  29. "5 1\n",
  30. "dtype: int64"
  31. ]
  32. },
  33. "execution_count": 2,
  34. "metadata": {},
  35. "output_type": "execute_result"
  36. }
  37. ],
  38. "source": [
  39. "#define a series wih no index\n",
  40. "ser = pd.Series([4,7,3,0,9,1])\n",
  41. "ser"
  42. ]
  43. },
  44. {
  45. "cell_type": "markdown",
  46. "metadata": {},
  47. "source": [
  48. "The value attribute is used to extract the actual data stored in the Series. "
  49. ]
  50. },
  51. {
  52. "cell_type": "code",
  53. "execution_count": 3,
  54. "metadata": {
  55. "scrolled": true
  56. },
  57. "outputs": [
  58. {
  59. "data": {
  60. "text/plain": [
  61. "array([4, 7, 3, 0, 9, 1], dtype=int64)"
  62. ]
  63. },
  64. "execution_count": 3,
  65. "metadata": {},
  66. "output_type": "execute_result"
  67. }
  68. ],
  69. "source": [
  70. "ser.values"
  71. ]
  72. },
  73. {
  74. "cell_type": "markdown",
  75. "metadata": {},
  76. "source": [
  77. "The index attribute is used to represent the index objects."
  78. ]
  79. },
  80. {
  81. "cell_type": "code",
  82. "execution_count": 4,
  83. "metadata": {},
  84. "outputs": [
  85. {
  86. "data": {
  87. "text/plain": [
  88. "RangeIndex(start=0, stop=6, step=1)"
  89. ]
  90. },
  91. "execution_count": 4,
  92. "metadata": {},
  93. "output_type": "execute_result"
  94. }
  95. ],
  96. "source": [
  97. "ser.index"
  98. ]
  99. },
  100. {
  101. "cell_type": "code",
  102. "execution_count": 5,
  103. "metadata": {},
  104. "outputs": [
  105. {
  106. "data": {
  107. "text/plain": [
  108. "3"
  109. ]
  110. },
  111. "execution_count": 5,
  112. "metadata": {},
  113. "output_type": "execute_result"
  114. }
  115. ],
  116. "source": [
  117. "#You can play with the indices using the arithmetic operators\n",
  118. "ser[1*2]"
  119. ]
  120. },
  121. {
  122. "cell_type": "code",
  123. "execution_count": 6,
  124. "metadata": {},
  125. "outputs": [
  126. {
  127. "data": {
  128. "text/plain": [
  129. "1"
  130. ]
  131. },
  132. "execution_count": 6,
  133. "metadata": {},
  134. "output_type": "execute_result"
  135. }
  136. ],
  137. "source": [
  138. "ser[2+3]"
  139. ]
  140. },
  141. {
  142. "cell_type": "code",
  143. "execution_count": 7,
  144. "metadata": {
  145. "scrolled": true
  146. },
  147. "outputs": [
  148. {
  149. "data": {
  150. "text/plain": [
  151. "A 4\n",
  152. "B 2\n",
  153. "C 6\n",
  154. "dtype: int64"
  155. ]
  156. },
  157. "execution_count": 7,
  158. "metadata": {},
  159. "output_type": "execute_result"
  160. }
  161. ],
  162. "source": [
  163. "#Define a Series with specific index\n",
  164. "ser1 = pd.Series([4,2,6],index = ['A','B','C'])\n",
  165. "ser1"
  166. ]
  167. },
  168. {
  169. "cell_type": "code",
  170. "execution_count": 8,
  171. "metadata": {
  172. "scrolled": true
  173. },
  174. "outputs": [
  175. {
  176. "data": {
  177. "text/plain": [
  178. "Index(['A', 'B', 'C'], dtype='object')"
  179. ]
  180. },
  181. "execution_count": 8,
  182. "metadata": {},
  183. "output_type": "execute_result"
  184. }
  185. ],
  186. "source": [
  187. "ser1.index"
  188. ]
  189. },
  190. {
  191. "cell_type": "code",
  192. "execution_count": 9,
  193. "metadata": {},
  194. "outputs": [
  195. {
  196. "data": {
  197. "text/plain": [
  198. "array([4, 2, 6], dtype=int64)"
  199. ]
  200. },
  201. "execution_count": 9,
  202. "metadata": {},
  203. "output_type": "execute_result"
  204. }
  205. ],
  206. "source": [
  207. "ser1.values"
  208. ]
  209. },
  210. {
  211. "cell_type": "markdown",
  212. "metadata": {},
  213. "source": [
  214. "You can manipulate the values directly by specifying the index and the new value."
  215. ]
  216. },
  217. {
  218. "cell_type": "code",
  219. "execution_count": 10,
  220. "metadata": {},
  221. "outputs": [
  222. {
  223. "name": "stdout",
  224. "output_type": "stream",
  225. "text": [
  226. "A 10\n",
  227. "B 2\n",
  228. "C 6\n",
  229. "dtype: int64\n"
  230. ]
  231. }
  232. ],
  233. "source": [
  234. "ser1['A'] = 10\n",
  235. "print(ser1)"
  236. ]
  237. },
  238. {
  239. "cell_type": "markdown",
  240. "metadata": {},
  241. "source": [
  242. "You can select multiple indices at once using a list of indices. The output will in the order of indices you mention in the list."
  243. ]
  244. },
  245. {
  246. "cell_type": "code",
  247. "execution_count": 11,
  248. "metadata": {},
  249. "outputs": [
  250. {
  251. "data": {
  252. "text/plain": [
  253. "C 6\n",
  254. "A 10\n",
  255. "dtype: int64"
  256. ]
  257. },
  258. "execution_count": 11,
  259. "metadata": {},
  260. "output_type": "execute_result"
  261. }
  262. ],
  263. "source": [
  264. "ser1[['C','A']]"
  265. ]
  266. },
  267. {
  268. "cell_type": "code",
  269. "execution_count": 12,
  270. "metadata": {},
  271. "outputs": [
  272. {
  273. "data": {
  274. "text/plain": [
  275. "A 10\n",
  276. "C 6\n",
  277. "dtype: int64"
  278. ]
  279. },
  280. "execution_count": 12,
  281. "metadata": {},
  282. "output_type": "execute_result"
  283. }
  284. ],
  285. "source": [
  286. "#select values that are greater than 5 from Series \n",
  287. "ser1[ser1>5]"
  288. ]
  289. },
  290. {
  291. "cell_type": "code",
  292. "execution_count": 13,
  293. "metadata": {},
  294. "outputs": [
  295. {
  296. "data": {
  297. "text/plain": [
  298. "A 100\n",
  299. "B 4\n",
  300. "C 36\n",
  301. "dtype: int64"
  302. ]
  303. },
  304. "execution_count": 13,
  305. "metadata": {},
  306. "output_type": "execute_result"
  307. }
  308. ],
  309. "source": [
  310. "#multiply the contents of the two Series\n",
  311. "ser1 * ser1"
  312. ]
  313. },
  314. {
  315. "cell_type": "code",
  316. "execution_count": 14,
  317. "metadata": {},
  318. "outputs": [
  319. {
  320. "data": {
  321. "text/plain": [
  322. "A 20\n",
  323. "B 12\n",
  324. "C 16\n",
  325. "dtype: int64"
  326. ]
  327. },
  328. "execution_count": 14,
  329. "metadata": {},
  330. "output_type": "execute_result"
  331. }
  332. ],
  333. "source": [
  334. "#alter the content of Series by performing addition and subtraction \n",
  335. "ser1 + 10"
  336. ]
  337. },
  338. {
  339. "cell_type": "markdown",
  340. "metadata": {},
  341. "source": [
  342. "you can check if a particular index value is present, you can use the in operator. Remember Python is case sensitive and you need to specify your index as is."
  343. ]
  344. },
  345. {
  346. "cell_type": "code",
  347. "execution_count": 15,
  348. "metadata": {
  349. "scrolled": true
  350. },
  351. "outputs": [
  352. {
  353. "data": {
  354. "text/plain": [
  355. "False"
  356. ]
  357. },
  358. "execution_count": 15,
  359. "metadata": {},
  360. "output_type": "execute_result"
  361. }
  362. ],
  363. "source": [
  364. "'a' in ser1"
  365. ]
  366. },
  367. {
  368. "cell_type": "code",
  369. "execution_count": 16,
  370. "metadata": {
  371. "scrolled": true
  372. },
  373. "outputs": [
  374. {
  375. "data": {
  376. "text/plain": [
  377. "True"
  378. ]
  379. },
  380. "execution_count": 16,
  381. "metadata": {},
  382. "output_type": "execute_result"
  383. }
  384. ],
  385. "source": [
  386. "'A' in ser1"
  387. ]
  388. },
  389. {
  390. "cell_type": "code",
  391. "execution_count": 17,
  392. "metadata": {},
  393. "outputs": [
  394. {
  395. "data": {
  396. "text/plain": [
  397. "Mumbai 4.0\n",
  398. "Delhi 6.0\n",
  399. "Srinagar NaN\n",
  400. "Kolkata 3.0\n",
  401. "Bangalore 8.0\n",
  402. "dtype: float64"
  403. ]
  404. },
  405. "execution_count": 17,
  406. "metadata": {},
  407. "output_type": "execute_result"
  408. }
  409. ],
  410. "source": [
  411. "index = ['Mumbai','Delhi','Srinagar','Kolkata','Bangalore']\n",
  412. "ser2 = pd.Series([4,6,np.NaN,3,8],index)\n",
  413. "ser2"
  414. ]
  415. },
  416. {
  417. "cell_type": "markdown",
  418. "metadata": {},
  419. "source": [
  420. "Pandas uses 'NaN' as its standard representation for missing or unknown values. Missing values form an important aspect of data analysis. Often the data person needs to deal with the missing values to get a complete set of data points to perform further analysis. The following functions provide a way to detect all the null values in our Series object."
  421. ]
  422. },
  423. {
  424. "cell_type": "code",
  425. "execution_count": 18,
  426. "metadata": {
  427. "scrolled": true
  428. },
  429. "outputs": [
  430. {
  431. "data": {
  432. "text/plain": [
  433. "Mumbai False\n",
  434. "Delhi False\n",
  435. "Srinagar True\n",
  436. "Kolkata False\n",
  437. "Bangalore False\n",
  438. "dtype: bool"
  439. ]
  440. },
  441. "execution_count": 18,
  442. "metadata": {},
  443. "output_type": "execute_result"
  444. }
  445. ],
  446. "source": [
  447. "pd.isnull(ser2)"
  448. ]
  449. },
  450. {
  451. "cell_type": "code",
  452. "execution_count": 19,
  453. "metadata": {
  454. "scrolled": true
  455. },
  456. "outputs": [
  457. {
  458. "data": {
  459. "text/plain": [
  460. "Mumbai True\n",
  461. "Delhi True\n",
  462. "Srinagar False\n",
  463. "Kolkata True\n",
  464. "Bangalore True\n",
  465. "dtype: bool"
  466. ]
  467. },
  468. "execution_count": 19,
  469. "metadata": {},
  470. "output_type": "execute_result"
  471. }
  472. ],
  473. "source": [
  474. "pd.notnull(ser2)"
  475. ]
  476. },
  477. {
  478. "cell_type": "markdown",
  479. "metadata": {},
  480. "source": [
  481. "Now we move ahead to manipulate two distinct series. When we use an arithmetic operator to perform some operation between 2 series, the common indices amongst the two series are considered for the operation. The rest are mentioned as NaN. "
  482. ]
  483. },
  484. {
  485. "cell_type": "code",
  486. "execution_count": 20,
  487. "metadata": {},
  488. "outputs": [],
  489. "source": [
  490. "index = ['Delhi','Mumbai','Goa','Bangalore']\n",
  491. "ser3 = pd.Series([5,7,3,1],index)"
  492. ]
  493. },
  494. {
  495. "cell_type": "code",
  496. "execution_count": 21,
  497. "metadata": {
  498. "scrolled": true
  499. },
  500. "outputs": [
  501. {
  502. "data": {
  503. "text/plain": [
  504. "Bangalore 9.0\n",
  505. "Delhi 11.0\n",
  506. "Goa NaN\n",
  507. "Kolkata NaN\n",
  508. "Mumbai 11.0\n",
  509. "Srinagar NaN\n",
  510. "dtype: float64"
  511. ]
  512. },
  513. "execution_count": 21,
  514. "metadata": {},
  515. "output_type": "execute_result"
  516. }
  517. ],
  518. "source": [
  519. "ser2 + ser3"
  520. ]
  521. },
  522. {
  523. "cell_type": "markdown",
  524. "metadata": {},
  525. "source": [
  526. "The name attribute allows us to call a particular Series with a name. This attribute can also be used for the indices of a Series. "
  527. ]
  528. },
  529. {
  530. "cell_type": "code",
  531. "execution_count": 22,
  532. "metadata": {},
  533. "outputs": [],
  534. "source": [
  535. "ser2.name = 'Temperature'"
  536. ]
  537. },
  538. {
  539. "cell_type": "code",
  540. "execution_count": 23,
  541. "metadata": {},
  542. "outputs": [],
  543. "source": [
  544. "ser2.index.name = 'City'"
  545. ]
  546. },
  547. {
  548. "cell_type": "code",
  549. "execution_count": 24,
  550. "metadata": {
  551. "scrolled": true
  552. },
  553. "outputs": [
  554. {
  555. "data": {
  556. "text/plain": [
  557. "City\n",
  558. "Mumbai 4.0\n",
  559. "Delhi 6.0\n",
  560. "Srinagar NaN\n",
  561. "Kolkata 3.0\n",
  562. "Bangalore 8.0\n",
  563. "Name: Temperature, dtype: float64"
  564. ]
  565. },
  566. "execution_count": 24,
  567. "metadata": {},
  568. "output_type": "execute_result"
  569. }
  570. ],
  571. "source": [
  572. "ser2"
  573. ]
  574. }
  575. ],
  576. "metadata": {
  577. "kernelspec": {
  578. "display_name": "Python 3",
  579. "language": "python",
  580. "name": "python3"
  581. },
  582. "language_info": {
  583. "codemirror_mode": {
  584. "name": "ipython",
  585. "version": 3
  586. },
  587. "file_extension": ".py",
  588. "mimetype": "text/x-python",
  589. "name": "python",
  590. "nbconvert_exporter": "python",
  591. "pygments_lexer": "ipython3",
  592. "version": "3.6.5"
  593. }
  594. },
  595. "nbformat": 4,
  596. "nbformat_minor": 2
  597. }
Add Comment
Please, Sign In to add comment