Advertisement
Guest User

Untitled

a guest
May 23rd, 2019
56
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 14.88 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "# 对产品列计数:查看产品销售量"
  8. ]
  9. },
  10. {
  11. "cell_type": "code",
  12. "execution_count": 194,
  13. "metadata": {},
  14. "outputs": [
  15. {
  16. "data": {
  17. "text/html": [
  18. "<div>\n",
  19. "<style scoped>\n",
  20. " .dataframe tbody tr th:only-of-type {\n",
  21. " vertical-align: middle;\n",
  22. " }\n",
  23. "\n",
  24. " .dataframe tbody tr th {\n",
  25. " vertical-align: top;\n",
  26. " }\n",
  27. "\n",
  28. " .dataframe thead th {\n",
  29. " text-align: right;\n",
  30. " }\n",
  31. "</style>\n",
  32. "<table border=\"1\" class=\"dataframe\">\n",
  33. " <thead>\n",
  34. " <tr style=\"text-align: right;\">\n",
  35. " <th></th>\n",
  36. " <th>姓名</th>\n",
  37. " <th>产品</th>\n",
  38. " <th>售价</th>\n",
  39. " </tr>\n",
  40. " </thead>\n",
  41. " <tbody>\n",
  42. " <tr>\n",
  43. " <th>0</th>\n",
  44. " <td>张三</td>\n",
  45. " <td>A</td>\n",
  46. " <td>20</td>\n",
  47. " </tr>\n",
  48. " <tr>\n",
  49. " <th>1</th>\n",
  50. " <td>李四</td>\n",
  51. " <td>B</td>\n",
  52. " <td>30</td>\n",
  53. " </tr>\n",
  54. " <tr>\n",
  55. " <th>2</th>\n",
  56. " <td>王五</td>\n",
  57. " <td>B</td>\n",
  58. " <td>30</td>\n",
  59. " </tr>\n",
  60. " <tr>\n",
  61. " <th>3</th>\n",
  62. " <td>赵六</td>\n",
  63. " <td>A</td>\n",
  64. " <td>20</td>\n",
  65. " </tr>\n",
  66. " <tr>\n",
  67. " <th>4</th>\n",
  68. " <td>小明</td>\n",
  69. " <td>A</td>\n",
  70. " <td>20</td>\n",
  71. " </tr>\n",
  72. " <tr>\n",
  73. " <th>5</th>\n",
  74. " <td>小红</td>\n",
  75. " <td>C</td>\n",
  76. " <td>50</td>\n",
  77. " </tr>\n",
  78. " </tbody>\n",
  79. "</table>\n",
  80. "</div>"
  81. ],
  82. "text/plain": [
  83. " 姓名 产品 售价\n",
  84. "0 张三 A 20\n",
  85. "1 李四 B 30\n",
  86. "2 王五 B 30\n",
  87. "3 赵六 A 20\n",
  88. "4 小明 A 20\n",
  89. "5 小红 C 50"
  90. ]
  91. },
  92. "execution_count": 194,
  93. "metadata": {},
  94. "output_type": "execute_result"
  95. }
  96. ],
  97. "source": [
  98. "import pandas as pd\n",
  99. "df=pd.DataFrame({\"姓名\":['张三','李四','王五','赵六','小明','小红'],\"产品\":['A','B','B','A','A','C'],\"售价\":[20,30,30,20,20,50]})\n",
  100. "df"
  101. ]
  102. },
  103. {
  104. "cell_type": "markdown",
  105. "metadata": {},
  106. "source": [
  107. "# 方法①:\n",
  108. "\n",
  109. "(1)按照分组条件[\"产品\"] (后面的[\"产品\"]仅仅为了对单列计数),增加一列出现次数'count'\n",
  110. "\n",
  111. "(2)按照分组条件[\"产品\"]去重。\n",
  112. "\n",
  113. "(3)重置索引"
  114. ]
  115. },
  116. {
  117. "cell_type": "code",
  118. "execution_count": 72,
  119. "metadata": {},
  120. "outputs": [
  121. {
  122. "data": {
  123. "text/html": [
  124. "<div>\n",
  125. "<style scoped>\n",
  126. " .dataframe tbody tr th:only-of-type {\n",
  127. " vertical-align: middle;\n",
  128. " }\n",
  129. "\n",
  130. " .dataframe tbody tr th {\n",
  131. " vertical-align: top;\n",
  132. " }\n",
  133. "\n",
  134. " .dataframe thead th {\n",
  135. " text-align: right;\n",
  136. " }\n",
  137. "</style>\n",
  138. "<table border=\"1\" class=\"dataframe\">\n",
  139. " <thead>\n",
  140. " <tr style=\"text-align: right;\">\n",
  141. " <th></th>\n",
  142. " <th>产品</th>\n",
  143. " <th>姓名</th>\n",
  144. " <th>售价</th>\n",
  145. " <th>count</th>\n",
  146. " </tr>\n",
  147. " </thead>\n",
  148. " <tbody>\n",
  149. " <tr>\n",
  150. " <th>0</th>\n",
  151. " <td>A</td>\n",
  152. " <td>张三</td>\n",
  153. " <td>20</td>\n",
  154. " <td>3</td>\n",
  155. " </tr>\n",
  156. " <tr>\n",
  157. " <th>1</th>\n",
  158. " <td>B</td>\n",
  159. " <td>李四</td>\n",
  160. " <td>30</td>\n",
  161. " <td>2</td>\n",
  162. " </tr>\n",
  163. " <tr>\n",
  164. " <th>2</th>\n",
  165. " <td>B</td>\n",
  166. " <td>王五</td>\n",
  167. " <td>30</td>\n",
  168. " <td>2</td>\n",
  169. " </tr>\n",
  170. " <tr>\n",
  171. " <th>3</th>\n",
  172. " <td>A</td>\n",
  173. " <td>赵六</td>\n",
  174. " <td>20</td>\n",
  175. " <td>3</td>\n",
  176. " </tr>\n",
  177. " <tr>\n",
  178. " <th>4</th>\n",
  179. " <td>A</td>\n",
  180. " <td>小明</td>\n",
  181. " <td>20</td>\n",
  182. " <td>3</td>\n",
  183. " </tr>\n",
  184. " <tr>\n",
  185. " <th>5</th>\n",
  186. " <td>C</td>\n",
  187. " <td>小红</td>\n",
  188. " <td>50</td>\n",
  189. " <td>1</td>\n",
  190. " </tr>\n",
  191. " </tbody>\n",
  192. "</table>\n",
  193. "</div>"
  194. ],
  195. "text/plain": [
  196. " 产品 姓名 售价 count\n",
  197. "0 A 张三 20 3\n",
  198. "1 B 李四 30 2\n",
  199. "2 B 王五 30 2\n",
  200. "3 A 赵六 20 3\n",
  201. "4 A 小明 20 3\n",
  202. "5 C 小红 50 1"
  203. ]
  204. },
  205. "execution_count": 72,
  206. "metadata": {},
  207. "output_type": "execute_result"
  208. }
  209. ],
  210. "source": [
  211. "df[\"count\"]=df.groupby([\"产品\"])['产品'].transform(len)\n",
  212. "df"
  213. ]
  214. },
  215. {
  216. "cell_type": "code",
  217. "execution_count": 73,
  218. "metadata": {},
  219. "outputs": [
  220. {
  221. "data": {
  222. "text/html": [
  223. "<div>\n",
  224. "<style scoped>\n",
  225. " .dataframe tbody tr th:only-of-type {\n",
  226. " vertical-align: middle;\n",
  227. " }\n",
  228. "\n",
  229. " .dataframe tbody tr th {\n",
  230. " vertical-align: top;\n",
  231. " }\n",
  232. "\n",
  233. " .dataframe thead th {\n",
  234. " text-align: right;\n",
  235. " }\n",
  236. "</style>\n",
  237. "<table border=\"1\" class=\"dataframe\">\n",
  238. " <thead>\n",
  239. " <tr style=\"text-align: right;\">\n",
  240. " <th></th>\n",
  241. " <th>产品</th>\n",
  242. " <th>姓名</th>\n",
  243. " <th>售价</th>\n",
  244. " <th>count</th>\n",
  245. " </tr>\n",
  246. " </thead>\n",
  247. " <tbody>\n",
  248. " <tr>\n",
  249. " <th>0</th>\n",
  250. " <td>A</td>\n",
  251. " <td>张三</td>\n",
  252. " <td>20</td>\n",
  253. " <td>3</td>\n",
  254. " </tr>\n",
  255. " <tr>\n",
  256. " <th>1</th>\n",
  257. " <td>B</td>\n",
  258. " <td>李四</td>\n",
  259. " <td>30</td>\n",
  260. " <td>2</td>\n",
  261. " </tr>\n",
  262. " <tr>\n",
  263. " <th>2</th>\n",
  264. " <td>C</td>\n",
  265. " <td>小红</td>\n",
  266. " <td>50</td>\n",
  267. " <td>1</td>\n",
  268. " </tr>\n",
  269. " </tbody>\n",
  270. "</table>\n",
  271. "</div>"
  272. ],
  273. "text/plain": [
  274. " 产品 姓名 售价 count\n",
  275. "0 A 张三 20 3\n",
  276. "1 B 李四 30 2\n",
  277. "2 C 小红 50 1"
  278. ]
  279. },
  280. "execution_count": 73,
  281. "metadata": {},
  282. "output_type": "execute_result"
  283. }
  284. ],
  285. "source": [
  286. "df=df.drop_duplicates(\"产品\")\n",
  287. "df=df.reset_index(drop=True)\n",
  288. "df"
  289. ]
  290. },
  291. {
  292. "cell_type": "markdown",
  293. "metadata": {},
  294. "source": [
  295. "# 方法②\n",
  296. "\n",
  297. "### 1统计[\"产品\"]个数(如下四种方法)\n",
  298. "(1)size\n",
  299. "\n",
  300. "(2)value_counts\n",
  301. "\n",
  302. "(3)count \n",
  303. "\n",
  304. "(4)agg(\"count\")"
  305. ]
  306. },
  307. {
  308. "cell_type": "code",
  309. "execution_count": 257,
  310. "metadata": {},
  311. "outputs": [
  312. {
  313. "data": {
  314. "text/plain": [
  315. "产品\n",
  316. "A 3\n",
  317. "B 2\n",
  318. "C 1\n",
  319. "dtype: int64"
  320. ]
  321. },
  322. "execution_count": 257,
  323. "metadata": {},
  324. "output_type": "execute_result"
  325. }
  326. ],
  327. "source": [
  328. "df1=df.groupby(['产品']).size() \n",
  329. "df1"
  330. ]
  331. },
  332. {
  333. "cell_type": "code",
  334. "execution_count": 253,
  335. "metadata": {},
  336. "outputs": [
  337. {
  338. "data": {
  339. "text/plain": [
  340. "A 3\n",
  341. "B 2\n",
  342. "C 1\n",
  343. "Name: 产品, dtype: int64"
  344. ]
  345. },
  346. "execution_count": 253,
  347. "metadata": {},
  348. "output_type": "execute_result"
  349. }
  350. ],
  351. "source": [
  352. "df1=df[\"产品\"].value_counts()\n",
  353. "df1"
  354. ]
  355. },
  356. {
  357. "cell_type": "code",
  358. "execution_count": 250,
  359. "metadata": {},
  360. "outputs": [
  361. {
  362. "data": {
  363. "text/plain": [
  364. "产品\n",
  365. "A 3\n",
  366. "B 2\n",
  367. "C 1\n",
  368. "Name: 姓名, dtype: int64"
  369. ]
  370. },
  371. "execution_count": 250,
  372. "metadata": {},
  373. "output_type": "execute_result"
  374. }
  375. ],
  376. "source": [
  377. "df1=df.groupby(['产品'])[\"姓名\"].count() \n",
  378. "df1"
  379. ]
  380. },
  381. {
  382. "cell_type": "code",
  383. "execution_count": 262,
  384. "metadata": {},
  385. "outputs": [
  386. {
  387. "data": {
  388. "text/plain": [
  389. "产品\n",
  390. "A 3\n",
  391. "B 2\n",
  392. "C 1\n",
  393. "Name: 姓名, dtype: int64"
  394. ]
  395. },
  396. "execution_count": 262,
  397. "metadata": {},
  398. "output_type": "execute_result"
  399. }
  400. ],
  401. "source": [
  402. "df1=df.groupby(['产品'])[\"姓名\"].agg(\"count\")\n",
  403. "df1"
  404. ]
  405. },
  406. {
  407. "cell_type": "markdown",
  408. "metadata": {},
  409. "source": [
  410. "### 2整理成表格(如下三种方法)\n",
  411. "(1)去掉[\"产品\"]索引+增加最后一行列名\n",
  412. "\n",
  413. "(2)去索引+改列名\n",
  414. "\n",
  415. "(2)使结果变成表格形式+去索引+改列名"
  416. ]
  417. },
  418. {
  419. "cell_type": "code",
  420. "execution_count": 258,
  421. "metadata": {},
  422. "outputs": [
  423. {
  424. "data": {
  425. "text/html": [
  426. "<div>\n",
  427. "<style scoped>\n",
  428. " .dataframe tbody tr th:only-of-type {\n",
  429. " vertical-align: middle;\n",
  430. " }\n",
  431. "\n",
  432. " .dataframe tbody tr th {\n",
  433. " vertical-align: top;\n",
  434. " }\n",
  435. "\n",
  436. " .dataframe thead th {\n",
  437. " text-align: right;\n",
  438. " }\n",
  439. "</style>\n",
  440. "<table border=\"1\" class=\"dataframe\">\n",
  441. " <thead>\n",
  442. " <tr style=\"text-align: right;\">\n",
  443. " <th></th>\n",
  444. " <th>产品</th>\n",
  445. " <th>count</th>\n",
  446. " </tr>\n",
  447. " </thead>\n",
  448. " <tbody>\n",
  449. " <tr>\n",
  450. " <th>0</th>\n",
  451. " <td>A</td>\n",
  452. " <td>3</td>\n",
  453. " </tr>\n",
  454. " <tr>\n",
  455. " <th>1</th>\n",
  456. " <td>B</td>\n",
  457. " <td>2</td>\n",
  458. " </tr>\n",
  459. " <tr>\n",
  460. " <th>2</th>\n",
  461. " <td>C</td>\n",
  462. " <td>1</td>\n",
  463. " </tr>\n",
  464. " </tbody>\n",
  465. "</table>\n",
  466. "</div>"
  467. ],
  468. "text/plain": [
  469. " 产品 count\n",
  470. "0 A 3\n",
  471. "1 B 2\n",
  472. "2 C 1"
  473. ]
  474. },
  475. "execution_count": 258,
  476. "metadata": {},
  477. "output_type": "execute_result"
  478. }
  479. ],
  480. "source": [
  481. "df1=df1.reset_index(name='count') #去掉[\"产品\"]索引+增加最后一行列名\n",
  482. "df1"
  483. ]
  484. },
  485. {
  486. "cell_type": "code",
  487. "execution_count": 265,
  488. "metadata": {},
  489. "outputs": [
  490. {
  491. "data": {
  492. "text/html": [
  493. "<div>\n",
  494. "<style scoped>\n",
  495. " .dataframe tbody tr th:only-of-type {\n",
  496. " vertical-align: middle;\n",
  497. " }\n",
  498. "\n",
  499. " .dataframe tbody tr th {\n",
  500. " vertical-align: top;\n",
  501. " }\n",
  502. "\n",
  503. " .dataframe thead th {\n",
  504. " text-align: right;\n",
  505. " }\n",
  506. "</style>\n",
  507. "<table border=\"1\" class=\"dataframe\">\n",
  508. " <thead>\n",
  509. " <tr style=\"text-align: right;\">\n",
  510. " <th></th>\n",
  511. " <th>产品</th>\n",
  512. " <th>count</th>\n",
  513. " </tr>\n",
  514. " </thead>\n",
  515. " <tbody>\n",
  516. " <tr>\n",
  517. " <th>0</th>\n",
  518. " <td>A</td>\n",
  519. " <td>3</td>\n",
  520. " </tr>\n",
  521. " <tr>\n",
  522. " <th>1</th>\n",
  523. " <td>B</td>\n",
  524. " <td>2</td>\n",
  525. " </tr>\n",
  526. " <tr>\n",
  527. " <th>2</th>\n",
  528. " <td>C</td>\n",
  529. " <td>1</td>\n",
  530. " </tr>\n",
  531. " </tbody>\n",
  532. "</table>\n",
  533. "</div>"
  534. ],
  535. "text/plain": [
  536. " 产品 count\n",
  537. "0 A 3\n",
  538. "1 B 2\n",
  539. "2 C 1"
  540. ]
  541. },
  542. "execution_count": 265,
  543. "metadata": {},
  544. "output_type": "execute_result"
  545. }
  546. ],
  547. "source": [
  548. "df1.reset_index([\"产品\"]).rename({\"姓名\":\"count\"},axis=1)"
  549. ]
  550. },
  551. {
  552. "cell_type": "code",
  553. "execution_count": 261,
  554. "metadata": {},
  555. "outputs": [
  556. {
  557. "data": {
  558. "text/html": [
  559. "<div>\n",
  560. "<style scoped>\n",
  561. " .dataframe tbody tr th:only-of-type {\n",
  562. " vertical-align: middle;\n",
  563. " }\n",
  564. "\n",
  565. " .dataframe tbody tr th {\n",
  566. " vertical-align: top;\n",
  567. " }\n",
  568. "\n",
  569. " .dataframe thead th {\n",
  570. " text-align: right;\n",
  571. " }\n",
  572. "</style>\n",
  573. "<table border=\"1\" class=\"dataframe\">\n",
  574. " <thead>\n",
  575. " <tr style=\"text-align: right;\">\n",
  576. " <th></th>\n",
  577. " <th>count</th>\n",
  578. " <th>姓名</th>\n",
  579. " </tr>\n",
  580. " </thead>\n",
  581. " <tbody>\n",
  582. " <tr>\n",
  583. " <th>0</th>\n",
  584. " <td>A</td>\n",
  585. " <td>3</td>\n",
  586. " </tr>\n",
  587. " <tr>\n",
  588. " <th>1</th>\n",
  589. " <td>B</td>\n",
  590. " <td>2</td>\n",
  591. " </tr>\n",
  592. " <tr>\n",
  593. " <th>2</th>\n",
  594. " <td>C</td>\n",
  595. " <td>1</td>\n",
  596. " </tr>\n",
  597. " </tbody>\n",
  598. "</table>\n",
  599. "</div>"
  600. ],
  601. "text/plain": [
  602. " count 姓名\n",
  603. "0 A 3\n",
  604. "1 B 2\n",
  605. "2 C 1"
  606. ]
  607. },
  608. "execution_count": 261,
  609. "metadata": {},
  610. "output_type": "execute_result"
  611. }
  612. ],
  613. "source": [
  614. "df1=df1.to_frame()\n",
  615. "df1.reset_index().rename({\"index\":\"产品\", \"产品\":\"count\"},axis=1)"
  616. ]
  617. }
  618. ],
  619. "metadata": {
  620. "kernelspec": {
  621. "display_name": "Python 3",
  622. "language": "python",
  623. "name": "python3"
  624. },
  625. "language_info": {
  626. "codemirror_mode": {
  627. "name": "ipython",
  628. "version": 3
  629. },
  630. "file_extension": ".py",
  631. "mimetype": "text/x-python",
  632. "name": "python",
  633. "nbconvert_exporter": "python",
  634. "pygments_lexer": "ipython3",
  635. "version": "3.7.0"
  636. }
  637. },
  638. "nbformat": 4,
  639. "nbformat_minor": 2
  640. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement