Guest User

Untitled

a guest
Oct 22nd, 2017
81
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 16.57 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "markdown",
  5. "metadata": {},
  6. "source": [
  7. "--------------\n",
  8. "## 演習1:複数CSVデータをExcelファイルに一括変換\n",
  9. "-------------------\n"
  10. ]
  11. },
  12. {
  13. "cell_type": "markdown",
  14. "metadata": {},
  15. "source": [
  16. "### Step1: まずは、複数CSVデータをJupyter notebookに表示させます。"
  17. ]
  18. },
  19. {
  20. "cell_type": "code",
  21. "execution_count": 5,
  22. "metadata": {
  23. "scrolled": true
  24. },
  25. "outputs": [
  26. {
  27. "data": {
  28. "text/plain": [
  29. "['/Users/yanomekeita/dhu/monthly_searches/ms_ar.csv',\n",
  30. " '/Users/yanomekeita/dhu/monthly_searches/ms_css.csv',\n",
  31. " '/Users/yanomekeita/dhu/monthly_searches/ms_excel.csv',\n",
  32. " '/Users/yanomekeita/dhu/monthly_searches/ms_html.csv',\n",
  33. " '/Users/yanomekeita/dhu/monthly_searches/ms_illustrator.csv',\n",
  34. " '/Users/yanomekeita/dhu/monthly_searches/ms_java.csv',\n",
  35. " '/Users/yanomekeita/dhu/monthly_searches/ms_photoshop.csv',\n",
  36. " '/Users/yanomekeita/dhu/monthly_searches/ms_php.csv',\n",
  37. " '/Users/yanomekeita/dhu/monthly_searches/ms_programming.csv',\n",
  38. " '/Users/yanomekeita/dhu/monthly_searches/ms_python.csv',\n",
  39. " '/Users/yanomekeita/dhu/monthly_searches/ms_ruby.csv',\n",
  40. " '/Users/yanomekeita/dhu/monthly_searches/ms_swift.csv',\n",
  41. " '/Users/yanomekeita/dhu/monthly_searches/ms_vr.csv']"
  42. ]
  43. },
  44. "execution_count": 5,
  45. "metadata": {},
  46. "output_type": "execute_result"
  47. }
  48. ],
  49. "source": [
  50. "# ディレクトリ操作のためのOSライブラリと、ファイル検索をしてくれるglobライブラリを読み込みます。\n",
  51. "import os\n",
  52. "import glob\n",
  53. "\n",
  54. "file_dir = '/Users/yanomekeita/dhu/monthly_searches'\n",
  55. "file_path = os.path.join(file_dir, '*.csv')\n",
  56. "files = glob.glob(file_path)\n",
  57. "files"
  58. ]
  59. },
  60. {
  61. "cell_type": "code",
  62. "execution_count": 9,
  63. "metadata": {},
  64. "outputs": [
  65. {
  66. "name": "stdout",
  67. "output_type": "stream",
  68. "text": [
  69. "ar 拡張現実\n",
  70. "4385\n",
  71. "ar 開発\n",
  72. "5326\n",
  73. "ar カード\n",
  74. "6122\n",
  75. "ar カメラ\n",
  76. "4893\n",
  77. "ar 髪型\n",
  78. "8147\n",
  79. "ar 会計\n",
  80. "7343\n",
  81. "ar 活用事例\n",
  82. "10593\n",
  83. "ar 化学\n",
  84. "1066\n",
  85. "ar 観光\n",
  86. "7484\n",
  87. "ar 関連銘柄\n",
  88. "1029\n",
  89. "css\n",
  90. "4554\n",
  91. "css 書き方\n",
  92. "16432\n",
  93. "cssとは\n",
  94. "6201\n",
  95. "css セレクタ\n",
  96. "11057\n",
  97. "css border\n",
  98. "17158\n",
  99. "css position\n",
  100. "17167\n",
  101. "css important\n",
  102. "6121\n",
  103. "css display\n",
  104. "6226\n",
  105. "css margin\n",
  106. "2744\n",
  107. "css float\n",
  108. "18878\n",
  109. "エクセル 関数\n",
  110. "9717\n",
  111. "エクセル プルダウン\n",
  112. "7716\n",
  113. "エクセルシオール\n",
  114. "11493\n",
  115. "エクセル マクロ\n",
  116. "3291\n",
  117. "エクセル 改行\n",
  118. "3812\n",
  119. "エクセル グラフ\n",
  120. "5632\n",
  121. "エクセル チェックボックス\n",
  122. "11050\n",
  123. "エクセル 表\n",
  124. "9726\n",
  125. "エクセル パスワード\n",
  126. "16223\n",
  127. "html\n",
  128. "7010\n",
  129. "html5\n",
  130. "13780\n",
  131. "htmlとは\n",
  132. "9124\n",
  133. "html 色\n",
  134. "4822\n",
  135. "html タグ\n",
  136. "14354\n",
  137. "html 改行\n",
  138. "19721\n",
  139. "html コメントアウト\n",
  140. "2362\n",
  141. "html リンク\n",
  142. "12423\n",
  143. "html table\n",
  144. "7926\n",
  145. "html css\n",
  146. "4262\n",
  147. "illustrator\n",
  148. "18221\n",
  149. "illustrator 無料\n",
  150. "3795\n",
  151. "illustrator cs6\n",
  152. "6345\n",
  153. "illustrator 使い方\n",
  154. "1143\n",
  155. "illustrator 価格\n",
  156. "4316\n",
  157. "illustrator グラデーション\n",
  158. "14278\n",
  159. "illustrator 体験版\n",
  160. "17971\n",
  161. "illustrator 塗りつぶし\n",
  162. "5862\n",
  163. "illustrator 矢印\n",
  164. "4962\n",
  165. "illustrator アウトライン化\n",
  166. "1127\n",
  167. "java\n",
  168. "1072\n",
  169. "javascript\n",
  170. "11564\n",
  171. "java 入門\n",
  172. "15137\n",
  173. "javascript 配列\n",
  174. "12994\n",
  175. "javaとは\n",
  176. "14846\n",
  177. "javascript 入門\n",
  178. "14994\n",
  179. "java9\n",
  180. "8058\n",
  181. "java 配列\n",
  182. "4258\n",
  183. "java アップデート\n",
  184. "5010\n",
  185. "java インストール\n",
  186. "4953\n",
  187. "photoshop\n",
  188. "18215\n",
  189. "photoshop elements\n",
  190. "13083\n",
  191. "photoshop 無料\n",
  192. "19727\n",
  193. "photoshop 切り抜き\n",
  194. "16110\n",
  195. "photoshop vip\n",
  196. "4239\n",
  197. "photoshop 使い方\n",
  198. "9581\n",
  199. "photoshop cs2\n",
  200. "5172\n",
  201. "photoshop ブラシ\n",
  202. "18882\n",
  203. "photoshop 値段\n",
  204. "7855\n",
  205. "photoshop 体験版\n",
  206. "6000\n",
  207. "php\n",
  208. "19934\n",
  209. "phpmyadmin\n",
  210. "3483\n",
  211. "php研究所\n",
  212. "5052\n",
  213. "php date\n",
  214. "16900\n",
  215. "php 入門\n",
  216. "19818\n",
  217. "php foreach\n",
  218. "9893\n",
  219. "phpとは\n",
  220. "4652\n",
  221. "php 配列\n",
  222. "3594\n",
  223. "php isset\n",
  224. "7566\n",
  225. "php explode\n",
  226. "15881\n",
  227. "プログラミング\n",
  228. "4523\n",
  229. "プログラミング言語\n",
  230. "13190\n",
  231. "プログラミングとは\n",
  232. "15038\n",
  233. "プログラミング教室\n",
  234. "10221\n",
  235. "プログラミング教育\n",
  236. "7952\n",
  237. "プログラミング 独学\n",
  238. "11961\n",
  239. "プログラミング ゲーム\n",
  240. "19126\n",
  241. "プログラミング 資格\n",
  242. "1682\n",
  243. "プログラミング アプリ\n",
  244. "1804\n",
  245. "プログラミング 勉強\n",
  246. "18401\n",
  247. "python\n",
  248. "18495\n",
  249. "python 入門\n",
  250. "19292\n",
  251. "python for\n",
  252. "15969\n",
  253. "python if\n",
  254. "3481\n",
  255. "python 配列\n",
  256. "17525\n",
  257. "python インストール\n",
  258. "4284\n",
  259. "python print\n",
  260. "19930\n",
  261. "python3\n",
  262. "3240\n",
  263. "python 機械学習\n",
  264. "16022\n",
  265. "python 正規表現\n",
  266. "10719\n",
  267. "ruby on rails\n",
  268. "10495\n",
  269. "ruby 入門\n",
  270. "15825\n",
  271. "ruby rose\n",
  272. "15763\n",
  273. "ruby 正規表現\n",
  274. "12917\n",
  275. "ruby hash\n",
  276. "10580\n",
  277. "ruby 配列\n",
  278. "14363\n",
  279. "ruby インストール\n",
  280. "3774\n",
  281. "ruby and you\n",
  282. "7543\n",
  283. "ruby if\n",
  284. "11710\n",
  285. "ruby array\n",
  286. "1470\n",
  287. "swift\n",
  288. "5443\n",
  289. "swift code\n",
  290. "15786\n",
  291. "swift 意味\n",
  292. "17142\n",
  293. "swift 入門\n",
  294. "1283\n",
  295. "swift playgrounds\n",
  296. "13779\n",
  297. "swift sports\n",
  298. "13801\n",
  299. "swift4\n",
  300. "10061\n",
  301. "swiftkey\n",
  302. "14115\n",
  303. "swift windows\n",
  304. "17327\n",
  305. "swift3\n",
  306. "2787\n",
  307. "vr 新宿\n",
  308. "4049\n",
  309. "vr ゴーグル\n",
  310. "14639\n",
  311. "vr 体験\n",
  312. "16875\n",
  313. "vr ゲーム\n",
  314. "3591\n",
  315. "vr スマホ\n",
  316. "2395\n",
  317. "vr アプリ\n",
  318. "5007\n",
  319. "vr おすすめ\n",
  320. "19251\n",
  321. "vr 値段\n",
  322. "6642\n",
  323. "vr 渋谷\n",
  324. "6189\n",
  325. "vr ソフト\n",
  326. "18419\n"
  327. ]
  328. }
  329. ],
  330. "source": [
  331. "# CSVファイルを読み込み、全データの抽出をしてみます。\n",
  332. "import csv\n",
  333. "\n",
  334. "# 上記のCSVファイルを1つ1つreaderモードで読み込みを開始します。\n",
  335. "for file in files :\n",
  336. " with open(file, 'r') as csvfile:\n",
  337. " reader = csv.reader(csvfile)\n",
  338. " \n",
  339. " # readerモードで読み込んだものを分解し、CSVファイルの中身をみていきます。\n",
  340. " for row in reader:\n",
  341. " for cell in row:\n",
  342. " print(cell)"
  343. ]
  344. },
  345. {
  346. "cell_type": "markdown",
  347. "metadata": {},
  348. "source": [
  349. "### Step2 : 取得したデータを1つのExcelファイルにまとめていきます。"
  350. ]
  351. },
  352. {
  353. "cell_type": "code",
  354. "execution_count": null,
  355. "metadata": {
  356. "collapsed": true
  357. },
  358. "outputs": [],
  359. "source": [
  360. "# Excelファイルに上記のデータを書き込んでいきます。\n",
  361. "import openpyxl as px\n",
  362. "import csv\n",
  363. "\n",
  364. "# Excelファイルを作成します。\n",
  365. "wb = px.Workbook()\n",
  366. "\n",
  367. "# ExcelファイルにrubyやcssというCSVデータの名前のシートを作成します。\n",
  368. "for file in files :\n",
  369. " ws = wb.create_sheet(file)  # ここで1つ1つ名前のシートを作成しています。\n",
  370. " with open(file, 'r') as csvfile:\n",
  371. " reader = csv.reader(csvfile)\n",
  372. " \n",
  373. " # readerモードで読み込んだものを分解し、CSVファイルの中身をみていきます。\n",
  374. " for row in reader:\n",
  375. " for cell in row:\n",
  376. " print(cell)"
  377. ]
  378. },
  379. {
  380. "cell_type": "markdown",
  381. "metadata": {},
  382. "source": [
  383. "次に値の書き込みを行います。上記で出力したcellの動きとExcelファイルの動きを合わせてあげます。"
  384. ]
  385. },
  386. {
  387. "cell_type": "code",
  388. "execution_count": null,
  389. "metadata": {
  390. "collapsed": true
  391. },
  392. "outputs": [],
  393. "source": [
  394. "# Excelファイルに上記のデータを書き込んでいきます。\n",
  395. "import openpyxl as px\n",
  396. "import csv\n",
  397. "\n",
  398. "# Excelファイルを作成します。\n",
  399. "wb = px.Workbook()\n",
  400. "\n",
  401. "# ExcelファイルにrubyやcssというCSVデータの名前のシートを作成します。\n",
  402. "for file in files :\n",
  403. " ws = wb.create_sheet(file)\n",
  404. " with open(file, 'r') as csvfile:\n",
  405. " reader = csv.reader(csvfile)\n",
  406. " \n",
  407. " # 値の書き込み(enumerateメソッドを使うと、インデックスの数を算出することができます。)\n",
  408. " for i ,row in enumerate(reader):\n",
  409. " for k, cell in enumerate(row):\n",
  410. " ws.cell(row = i + 1, column = k + 1 ).value = cell "
  411. ]
  412. },
  413. {
  414. "cell_type": "markdown",
  415. "metadata": {},
  416. "source": [
  417. "file名の保存でエラーが出ています。\n",
  418. "Excelのシート名では、/を使うことができないためです。\n",
  419. "\n",
  420. "ここからは文字列の置換を行い、/を省いていきましょう。"
  421. ]
  422. },
  423. {
  424. "cell_type": "code",
  425. "execution_count": 5,
  426. "metadata": {},
  427. "outputs": [
  428. {
  429. "name": "stdout",
  430. "output_type": "stream",
  431. "text": [
  432. "ms_ar.csv\n",
  433. "ms_css.csv\n",
  434. "ms_excel.csv\n",
  435. "ms_html.csv\n",
  436. "ms_illustrator.csv\n",
  437. "ms_java.csv\n",
  438. "ms_photoshop.csv\n",
  439. "ms_php.csv\n",
  440. "ms_programming.csv\n",
  441. "ms_python.csv\n",
  442. "ms_ruby.csv\n",
  443. "ms_swift.csv\n",
  444. "ms_vr.csv\n"
  445. ]
  446. }
  447. ],
  448. "source": [
  449. "# Excelファイルに上記のデータを書き込んでいきます。\n",
  450. "import openpyxl as px\n",
  451. "import csv\n",
  452. "\n",
  453. "\n",
  454. "# Excelファイルを作成します。\n",
  455. "wb = px.Workbook()\n",
  456. "\n",
  457. "# 新たにreという正規表現を活用した置換をするライブラリをインポートします。\n",
  458. "import re\n",
  459. "\n",
  460. "for file in files :\n",
  461. " # ここで文字列の置換のテストを行います。 \n",
  462. " file = re.sub(r'(/Users/yanomekeita/dhu/monthly_searches/)', \"\", file)\n",
  463. " print(file)"
  464. ]
  465. },
  466. {
  467. "cell_type": "markdown",
  468. "metadata": {},
  469. "source": [
  470. "しっかりと/を省くことができました。\n",
  471. "\n",
  472. "もし、.csvを削除したい場合は、もう一度.csvを省く処理を入れてあげれば.csvを省くことができます。"
  473. ]
  474. },
  475. {
  476. "cell_type": "code",
  477. "execution_count": 6,
  478. "metadata": {},
  479. "outputs": [
  480. {
  481. "name": "stdout",
  482. "output_type": "stream",
  483. "text": [
  484. "ms_ar\n",
  485. "ms_css\n",
  486. "ms_excel\n",
  487. "ms_html\n",
  488. "ms_illustrator\n",
  489. "ms_java\n",
  490. "ms_photoshop\n",
  491. "ms_php\n",
  492. "ms_programming\n",
  493. "ms_python\n",
  494. "ms_ruby\n",
  495. "ms_swift\n",
  496. "ms_vr\n"
  497. ]
  498. }
  499. ],
  500. "source": [
  501. "# Excelファイルに上記のデータを書き込んでいきます。\n",
  502. "import openpyxl as px\n",
  503. "import csv\n",
  504. "\n",
  505. "# Excelファイルを作成します。\n",
  506. "wb = px.Workbook()\n",
  507. "\n",
  508. "# 新たにreという正規表現を活用した置換をするライブラリをインポートします。\n",
  509. "import re\n",
  510. "for file in files :\n",
  511. " # 1回目の置換(/を省く)\n",
  512. " transform = re.sub(r'(/Users/yanomekeita/dhu/monthly_searches/)', \"\", file)\n",
  513. " # 2回目の置換(.csvを省く)\n",
  514. " transform = re.sub(r'(.csv)', \"\", transform )\n",
  515. " print(transform)"
  516. ]
  517. },
  518. {
  519. "cell_type": "code",
  520. "execution_count": 11,
  521. "metadata": {
  522. "collapsed": true
  523. },
  524. "outputs": [],
  525. "source": [
  526. "# Excelファイルに上記のデータを書き込んでいきます。\n",
  527. "import openpyxl as px\n",
  528. "import csv\n",
  529. "import re\n",
  530. "\n",
  531. "# Excelファイルを作成します。\n",
  532. "wb = px.Workbook()\n",
  533. "\n",
  534. "\n",
  535. "for file in files :\n",
  536. " # 置換を行い、Excelシートを作成します。\n",
  537. " transform = re.sub(r'(/Users/yanomekeita/dhu/monthly_searches/)', \"\", file)\n",
  538. " transform = re.sub(r'(.csv)', \"\", transform )\n",
  539. " ws = wb.create_sheet(transform)\n",
  540. " \n",
  541. " # CSVファイルを読み込みます。\n",
  542. " with open(file, 'r') as csvfile:\n",
  543. " reader = csv.reader(csvfile)\n",
  544. " \n",
  545. " # 値の書き込み(enumerateメソッドを使うと、インデックスの数を算出することができます。)\n",
  546. " for i ,row in enumerate(reader):\n",
  547. " for k, cell in enumerate(row):\n",
  548. " ws.cell(row = i + 1, column = k + 1 ).value = cell\n",
  549. " wb.save('monthly_search.xlsx')"
  550. ]
  551. },
  552. {
  553. "cell_type": "markdown",
  554. "metadata": {},
  555. "source": [
  556. "1つ目のsheet1が不必要なので、削除してしまいましょう。"
  557. ]
  558. },
  559. {
  560. "cell_type": "code",
  561. "execution_count": 12,
  562. "metadata": {
  563. "collapsed": true
  564. },
  565. "outputs": [],
  566. "source": [
  567. "# Excelファイルに上記のデータを書き込んでいきます。\n",
  568. "import openpyxl as px\n",
  569. "import csv\n",
  570. "import re\n",
  571. "\n",
  572. "# Excelファイルを作成します。\n",
  573. "wb = px.Workbook()\n",
  574. "wb.remove_sheet(wb.active)\n",
  575. "\n",
  576. "for file in files :\n",
  577. " # 1回目の置換(/を省く)\n",
  578. " transform = re.sub(r'(/Users/yanomekeita/dhu/monthly_searches/)', \"\", file)\n",
  579. " # 2回目の置換(.csvを省く)\n",
  580. " transform = re.sub(r'(.csv)', \"\", transform )\n",
  581. " ws = wb.create_sheet(transform)\n",
  582. " with open(file, 'r') as csvfile:\n",
  583. " reader = csv.reader(csvfile)\n",
  584. " \n",
  585. " # 値の書き込み(enumerateメソッドを使うと、インデックスの数を算出することができます。)\n",
  586. " for i ,row in enumerate(reader):\n",
  587. " for k, cell in enumerate(row):\n",
  588. " ws.cell(row = i + 1, column = k + 1 ).value = cell\n",
  589. " wb.save('monthly_search.xlsx')"
  590. ]
  591. },
  592. {
  593. "cell_type": "code",
  594. "execution_count": null,
  595. "metadata": {
  596. "collapsed": true
  597. },
  598. "outputs": [],
  599. "source": []
  600. },
  601. {
  602. "cell_type": "code",
  603. "execution_count": null,
  604. "metadata": {
  605. "collapsed": true
  606. },
  607. "outputs": [],
  608. "source": []
  609. },
  610. {
  611. "cell_type": "code",
  612. "execution_count": null,
  613. "metadata": {
  614. "collapsed": true
  615. },
  616. "outputs": [],
  617. "source": []
  618. }
  619. ],
  620. "metadata": {
  621. "kernelspec": {
  622. "display_name": "Python 3",
  623. "language": "python",
  624. "name": "python3"
  625. },
  626. "language_info": {
  627. "codemirror_mode": {
  628. "name": "ipython",
  629. "version": 3
  630. },
  631. "file_extension": ".py",
  632. "mimetype": "text/x-python",
  633. "name": "python",
  634. "nbconvert_exporter": "python",
  635. "pygments_lexer": "ipython3",
  636. "version": "3.6.1"
  637. }
  638. },
  639. "nbformat": 4,
  640. "nbformat_minor": 2
  641. }
Add Comment
Please, Sign In to add comment