Advertisement
Guest User

Untitled

a guest
Jul 23rd, 2019
90
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 25.82 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import pandas as pd\n",
  10. "import xml.etree.ElementTree as ET"
  11. ]
  12. },
  13. {
  14. "cell_type": "code",
  15. "execution_count": 2,
  16. "metadata": {},
  17. "outputs": [],
  18. "source": [
  19. "path_train = r'C:\\Users\\Kyuyeon Rhee\\myworks\\laptop2\\sentiment\\ABSA16_Laptops_Train_SB1_v2.xml'\n",
  20. "\n",
  21. "def get_list(path):\n",
  22. " tree = ET.parse(path)\n",
  23. " root = tree.getroot()\n",
  24. " text_list = []\n",
  25. " opinion_list = []\n",
  26. " for review in root.findall('Review'):\n",
  27. " str_text = \"\"\n",
  28. " opinion_inner_list = []\n",
  29. " for sent in review.findall('./sentences/sentence'):\n",
  30. " str_text = str_text + sent.find('text').text\n",
  31. " text_list.append(str_text)\n",
  32. " for opinion in review.findall('./sentences/sentence/Opinions/Opinion'):\n",
  33. " opinion_dict = {opinion.get('category').replace('#','_'): opinion.get('polarity')}\n",
  34. " opinion_inner_list.append(opinion_dict)\n",
  35. " opinion_list.append(opinion_inner_list)\n",
  36. " return text_list,opinion_list"
  37. ]
  38. },
  39. {
  40. "cell_type": "code",
  41. "execution_count": 3,
  42. "metadata": {},
  43. "outputs": [],
  44. "source": [
  45. "train_text_list, train_opinion_list = get_list(path_train)"
  46. ]
  47. },
  48. {
  49. "cell_type": "code",
  50. "execution_count": 4,
  51. "metadata": {},
  52. "outputs": [],
  53. "source": [
  54. "df1 = pd.DataFrame(data = train_text_list, columns = ['text'])\n",
  55. "df2 = pd.DataFrame(data = train_opinion_list)"
  56. ]
  57. },
  58. {
  59. "cell_type": "code",
  60. "execution_count": 5,
  61. "metadata": {
  62. "scrolled": true
  63. },
  64. "outputs": [
  65. {
  66. "data": {
  67. "text/html": [
  68. "<div>\n",
  69. "<style scoped>\n",
  70. " .dataframe tbody tr th:only-of-type {\n",
  71. " vertical-align: middle;\n",
  72. " }\n",
  73. "\n",
  74. " .dataframe tbody tr th {\n",
  75. " vertical-align: top;\n",
  76. " }\n",
  77. "\n",
  78. " .dataframe thead th {\n",
  79. " text-align: right;\n",
  80. " }\n",
  81. "</style>\n",
  82. "<table border=\"1\" class=\"dataframe\">\n",
  83. " <thead>\n",
  84. " <tr style=\"text-align: right;\">\n",
  85. " <th></th>\n",
  86. " <th>text</th>\n",
  87. " </tr>\n",
  88. " </thead>\n",
  89. " <tbody>\n",
  90. " <tr>\n",
  91. " <th>0</th>\n",
  92. " <td>Being a PC user my whole life....This computer...</td>\n",
  93. " </tr>\n",
  94. " <tr>\n",
  95. " <th>1</th>\n",
  96. " <td>the laptop was really good and it goes really ...</td>\n",
  97. " </tr>\n",
  98. " <tr>\n",
  99. " <th>2</th>\n",
  100. " <td>As a lifelong Windows user, I was extremely pl...</td>\n",
  101. " </tr>\n",
  102. " <tr>\n",
  103. " <th>3</th>\n",
  104. " <td>Oh my goodness-I am not a happy camper.My HP i...</td>\n",
  105. " </tr>\n",
  106. " <tr>\n",
  107. " <th>4</th>\n",
  108. " <td>Since I purchased my Toshiba netbook, I have b...</td>\n",
  109. " </tr>\n",
  110. " </tbody>\n",
  111. "</table>\n",
  112. "</div>"
  113. ],
  114. "text/plain": [
  115. " text\n",
  116. "0 Being a PC user my whole life....This computer...\n",
  117. "1 the laptop was really good and it goes really ...\n",
  118. "2 As a lifelong Windows user, I was extremely pl...\n",
  119. "3 Oh my goodness-I am not a happy camper.My HP i...\n",
  120. "4 Since I purchased my Toshiba netbook, I have b..."
  121. ]
  122. },
  123. "execution_count": 5,
  124. "metadata": {},
  125. "output_type": "execute_result"
  126. }
  127. ],
  128. "source": [
  129. "df1.head()"
  130. ]
  131. },
  132. {
  133. "cell_type": "code",
  134. "execution_count": 6,
  135. "metadata": {},
  136. "outputs": [
  137. {
  138. "data": {
  139. "text/html": [
  140. "<div>\n",
  141. "<style scoped>\n",
  142. " .dataframe tbody tr th:only-of-type {\n",
  143. " vertical-align: middle;\n",
  144. " }\n",
  145. "\n",
  146. " .dataframe tbody tr th {\n",
  147. " vertical-align: top;\n",
  148. " }\n",
  149. "\n",
  150. " .dataframe thead th {\n",
  151. " text-align: right;\n",
  152. " }\n",
  153. "</style>\n",
  154. "<table border=\"1\" class=\"dataframe\">\n",
  155. " <thead>\n",
  156. " <tr style=\"text-align: right;\">\n",
  157. " <th></th>\n",
  158. " <th>0</th>\n",
  159. " <th>1</th>\n",
  160. " <th>2</th>\n",
  161. " <th>3</th>\n",
  162. " <th>4</th>\n",
  163. " <th>5</th>\n",
  164. " <th>6</th>\n",
  165. " <th>7</th>\n",
  166. " <th>8</th>\n",
  167. " <th>9</th>\n",
  168. " <th>...</th>\n",
  169. " <th>15</th>\n",
  170. " <th>16</th>\n",
  171. " <th>17</th>\n",
  172. " <th>18</th>\n",
  173. " <th>19</th>\n",
  174. " <th>20</th>\n",
  175. " <th>21</th>\n",
  176. " <th>22</th>\n",
  177. " <th>23</th>\n",
  178. " <th>24</th>\n",
  179. " </tr>\n",
  180. " </thead>\n",
  181. " <tbody>\n",
  182. " <tr>\n",
  183. " <th>0</th>\n",
  184. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  185. " <td>{'BATTERY_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  186. " <td>{'CPU_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  187. " <td>{'GRAPHICS_GENERAL': 'positive'}</td>\n",
  188. " <td>{'HARD_DISC_DESIGN_FEATURES': 'positive'}</td>\n",
  189. " <td>{'LAPTOP_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  190. " <td>{'LAPTOP_USABILITY': 'positive'}</td>\n",
  191. " <td>{'LAPTOP_USABILITY': 'positive'}</td>\n",
  192. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  193. " <td>{'CPU_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  194. " <td>...</td>\n",
  195. " <td>None</td>\n",
  196. " <td>None</td>\n",
  197. " <td>None</td>\n",
  198. " <td>None</td>\n",
  199. " <td>None</td>\n",
  200. " <td>None</td>\n",
  201. " <td>None</td>\n",
  202. " <td>None</td>\n",
  203. " <td>None</td>\n",
  204. " <td>None</td>\n",
  205. " </tr>\n",
  206. " <tr>\n",
  207. " <th>1</th>\n",
  208. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  209. " <td>{'LAPTOP_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  210. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  211. " <td>{'LAPTOP_PRICE': 'positive'}</td>\n",
  212. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  213. " <td>None</td>\n",
  214. " <td>None</td>\n",
  215. " <td>None</td>\n",
  216. " <td>None</td>\n",
  217. " <td>None</td>\n",
  218. " <td>...</td>\n",
  219. " <td>None</td>\n",
  220. " <td>None</td>\n",
  221. " <td>None</td>\n",
  222. " <td>None</td>\n",
  223. " <td>None</td>\n",
  224. " <td>None</td>\n",
  225. " <td>None</td>\n",
  226. " <td>None</td>\n",
  227. " <td>None</td>\n",
  228. " <td>None</td>\n",
  229. " </tr>\n",
  230. " <tr>\n",
  231. " <th>2</th>\n",
  232. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  233. " <td>{'LAPTOP_QUALITY': 'positive'}</td>\n",
  234. " <td>{'LAPTOP_USABILITY': 'positive'}</td>\n",
  235. " <td>{'LAPTOP_DESIGN_FEATURES': 'positive'}</td>\n",
  236. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  237. " <td>{'LAPTOP_MISCELLANEOUS': 'positive'}</td>\n",
  238. " <td>{'LAPTOP_MISCELLANEOUS': 'positive'}</td>\n",
  239. " <td>{'LAPTOP_QUALITY': 'positive'}</td>\n",
  240. " <td>{'OS_DESIGN_FEATURES': 'positive'}</td>\n",
  241. " <td>None</td>\n",
  242. " <td>...</td>\n",
  243. " <td>None</td>\n",
  244. " <td>None</td>\n",
  245. " <td>None</td>\n",
  246. " <td>None</td>\n",
  247. " <td>None</td>\n",
  248. " <td>None</td>\n",
  249. " <td>None</td>\n",
  250. " <td>None</td>\n",
  251. " <td>None</td>\n",
  252. " <td>None</td>\n",
  253. " </tr>\n",
  254. " <tr>\n",
  255. " <th>3</th>\n",
  256. " <td>{'LAPTOP_GENERAL': 'negative'}</td>\n",
  257. " <td>{'LAPTOP_DESIGN_FEATURES': 'negative'}</td>\n",
  258. " <td>{'LAPTOP_PORTABILITY': 'negative'}</td>\n",
  259. " <td>{'BATTERY_QUALITY': 'negative'}</td>\n",
  260. " <td>{'BATTERY_OPERATION_PERFORMANCE': 'negative'}</td>\n",
  261. " <td>{'BATTERY_QUALITY': 'negative'}</td>\n",
  262. " <td>{'SUPPORT_QUALITY': 'negative'}</td>\n",
  263. " <td>{'SUPPORT_QUALITY': 'negative'}</td>\n",
  264. " <td>{'SUPPORT_QUALITY': 'negative'}</td>\n",
  265. " <td>{'LAPTOP_GENERAL': 'negative'}</td>\n",
  266. " <td>...</td>\n",
  267. " <td>{'COMPANY_GENERAL': 'negative'}</td>\n",
  268. " <td>None</td>\n",
  269. " <td>None</td>\n",
  270. " <td>None</td>\n",
  271. " <td>None</td>\n",
  272. " <td>None</td>\n",
  273. " <td>None</td>\n",
  274. " <td>None</td>\n",
  275. " <td>None</td>\n",
  276. " <td>None</td>\n",
  277. " </tr>\n",
  278. " <tr>\n",
  279. " <th>4</th>\n",
  280. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  281. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  282. " <td>{'DISPLAY_USABILITY': 'neutral'}</td>\n",
  283. " <td>{'DISPLAY_DESIGN_FEATURES': 'neutral'}</td>\n",
  284. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  285. " <td>None</td>\n",
  286. " <td>None</td>\n",
  287. " <td>None</td>\n",
  288. " <td>None</td>\n",
  289. " <td>None</td>\n",
  290. " <td>...</td>\n",
  291. " <td>None</td>\n",
  292. " <td>None</td>\n",
  293. " <td>None</td>\n",
  294. " <td>None</td>\n",
  295. " <td>None</td>\n",
  296. " <td>None</td>\n",
  297. " <td>None</td>\n",
  298. " <td>None</td>\n",
  299. " <td>None</td>\n",
  300. " <td>None</td>\n",
  301. " </tr>\n",
  302. " </tbody>\n",
  303. "</table>\n",
  304. "<p>5 rows × 25 columns</p>\n",
  305. "</div>"
  306. ],
  307. "text/plain": [
  308. " 0 \\\n",
  309. "0 {'LAPTOP_GENERAL': 'positive'} \n",
  310. "1 {'LAPTOP_GENERAL': 'positive'} \n",
  311. "2 {'LAPTOP_PORTABILITY': 'positive'} \n",
  312. "3 {'LAPTOP_GENERAL': 'negative'} \n",
  313. "4 {'LAPTOP_GENERAL': 'positive'} \n",
  314. "\n",
  315. " 1 \\\n",
  316. "0 {'BATTERY_OPERATION_PERFORMANCE': 'positive'} \n",
  317. "1 {'LAPTOP_OPERATION_PERFORMANCE': 'positive'} \n",
  318. "2 {'LAPTOP_QUALITY': 'positive'} \n",
  319. "3 {'LAPTOP_DESIGN_FEATURES': 'negative'} \n",
  320. "4 {'LAPTOP_PORTABILITY': 'positive'} \n",
  321. "\n",
  322. " 2 \\\n",
  323. "0 {'CPU_OPERATION_PERFORMANCE': 'positive'} \n",
  324. "1 {'LAPTOP_GENERAL': 'positive'} \n",
  325. "2 {'LAPTOP_USABILITY': 'positive'} \n",
  326. "3 {'LAPTOP_PORTABILITY': 'negative'} \n",
  327. "4 {'DISPLAY_USABILITY': 'neutral'} \n",
  328. "\n",
  329. " 3 \\\n",
  330. "0 {'GRAPHICS_GENERAL': 'positive'} \n",
  331. "1 {'LAPTOP_PRICE': 'positive'} \n",
  332. "2 {'LAPTOP_DESIGN_FEATURES': 'positive'} \n",
  333. "3 {'BATTERY_QUALITY': 'negative'} \n",
  334. "4 {'DISPLAY_DESIGN_FEATURES': 'neutral'} \n",
  335. "\n",
  336. " 4 \\\n",
  337. "0 {'HARD_DISC_DESIGN_FEATURES': 'positive'} \n",
  338. "1 {'LAPTOP_GENERAL': 'positive'} \n",
  339. "2 {'LAPTOP_PORTABILITY': 'positive'} \n",
  340. "3 {'BATTERY_OPERATION_PERFORMANCE': 'negative'} \n",
  341. "4 {'LAPTOP_GENERAL': 'positive'} \n",
  342. "\n",
  343. " 5 \\\n",
  344. "0 {'LAPTOP_OPERATION_PERFORMANCE': 'positive'} \n",
  345. "1 None \n",
  346. "2 {'LAPTOP_MISCELLANEOUS': 'positive'} \n",
  347. "3 {'BATTERY_QUALITY': 'negative'} \n",
  348. "4 None \n",
  349. "\n",
  350. " 6 7 \\\n",
  351. "0 {'LAPTOP_USABILITY': 'positive'} {'LAPTOP_USABILITY': 'positive'} \n",
  352. "1 None None \n",
  353. "2 {'LAPTOP_MISCELLANEOUS': 'positive'} {'LAPTOP_QUALITY': 'positive'} \n",
  354. "3 {'SUPPORT_QUALITY': 'negative'} {'SUPPORT_QUALITY': 'negative'} \n",
  355. "4 None None \n",
  356. "\n",
  357. " 8 \\\n",
  358. "0 {'LAPTOP_PORTABILITY': 'positive'} \n",
  359. "1 None \n",
  360. "2 {'OS_DESIGN_FEATURES': 'positive'} \n",
  361. "3 {'SUPPORT_QUALITY': 'negative'} \n",
  362. "4 None \n",
  363. "\n",
  364. " 9 ... \\\n",
  365. "0 {'CPU_OPERATION_PERFORMANCE': 'positive'} ... \n",
  366. "1 None ... \n",
  367. "2 None ... \n",
  368. "3 {'LAPTOP_GENERAL': 'negative'} ... \n",
  369. "4 None ... \n",
  370. "\n",
  371. " 15 16 17 18 19 20 21 22 \\\n",
  372. "0 None None None None None None None None \n",
  373. "1 None None None None None None None None \n",
  374. "2 None None None None None None None None \n",
  375. "3 {'COMPANY_GENERAL': 'negative'} None None None None None None None \n",
  376. "4 None None None None None None None None \n",
  377. "\n",
  378. " 23 24 \n",
  379. "0 None None \n",
  380. "1 None None \n",
  381. "2 None None \n",
  382. "3 None None \n",
  383. "4 None None \n",
  384. "\n",
  385. "[5 rows x 25 columns]"
  386. ]
  387. },
  388. "execution_count": 6,
  389. "metadata": {},
  390. "output_type": "execute_result"
  391. }
  392. ],
  393. "source": [
  394. "df2.head()"
  395. ]
  396. },
  397. {
  398. "cell_type": "code",
  399. "execution_count": 8,
  400. "metadata": {},
  401. "outputs": [
  402. {
  403. "data": {
  404. "text/html": [
  405. "<div>\n",
  406. "<style scoped>\n",
  407. " .dataframe tbody tr th:only-of-type {\n",
  408. " vertical-align: middle;\n",
  409. " }\n",
  410. "\n",
  411. " .dataframe tbody tr th {\n",
  412. " vertical-align: top;\n",
  413. " }\n",
  414. "\n",
  415. " .dataframe thead th {\n",
  416. " text-align: right;\n",
  417. " }\n",
  418. "</style>\n",
  419. "<table border=\"1\" class=\"dataframe\">\n",
  420. " <thead>\n",
  421. " <tr style=\"text-align: right;\">\n",
  422. " <th></th>\n",
  423. " <th>text</th>\n",
  424. " <th>0</th>\n",
  425. " <th>1</th>\n",
  426. " <th>2</th>\n",
  427. " <th>3</th>\n",
  428. " <th>4</th>\n",
  429. " <th>5</th>\n",
  430. " <th>6</th>\n",
  431. " <th>7</th>\n",
  432. " <th>8</th>\n",
  433. " <th>...</th>\n",
  434. " <th>15</th>\n",
  435. " <th>16</th>\n",
  436. " <th>17</th>\n",
  437. " <th>18</th>\n",
  438. " <th>19</th>\n",
  439. " <th>20</th>\n",
  440. " <th>21</th>\n",
  441. " <th>22</th>\n",
  442. " <th>23</th>\n",
  443. " <th>24</th>\n",
  444. " </tr>\n",
  445. " </thead>\n",
  446. " <tbody>\n",
  447. " <tr>\n",
  448. " <th>0</th>\n",
  449. " <td>Being a PC user my whole life....This computer...</td>\n",
  450. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  451. " <td>{'BATTERY_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  452. " <td>{'CPU_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  453. " <td>{'GRAPHICS_GENERAL': 'positive'}</td>\n",
  454. " <td>{'HARD_DISC_DESIGN_FEATURES': 'positive'}</td>\n",
  455. " <td>{'LAPTOP_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  456. " <td>{'LAPTOP_USABILITY': 'positive'}</td>\n",
  457. " <td>{'LAPTOP_USABILITY': 'positive'}</td>\n",
  458. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  459. " <td>...</td>\n",
  460. " <td>None</td>\n",
  461. " <td>None</td>\n",
  462. " <td>None</td>\n",
  463. " <td>None</td>\n",
  464. " <td>None</td>\n",
  465. " <td>None</td>\n",
  466. " <td>None</td>\n",
  467. " <td>None</td>\n",
  468. " <td>None</td>\n",
  469. " <td>None</td>\n",
  470. " </tr>\n",
  471. " <tr>\n",
  472. " <th>1</th>\n",
  473. " <td>the laptop was really good and it goes really ...</td>\n",
  474. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  475. " <td>{'LAPTOP_OPERATION_PERFORMANCE': 'positive'}</td>\n",
  476. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  477. " <td>{'LAPTOP_PRICE': 'positive'}</td>\n",
  478. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  479. " <td>None</td>\n",
  480. " <td>None</td>\n",
  481. " <td>None</td>\n",
  482. " <td>None</td>\n",
  483. " <td>...</td>\n",
  484. " <td>None</td>\n",
  485. " <td>None</td>\n",
  486. " <td>None</td>\n",
  487. " <td>None</td>\n",
  488. " <td>None</td>\n",
  489. " <td>None</td>\n",
  490. " <td>None</td>\n",
  491. " <td>None</td>\n",
  492. " <td>None</td>\n",
  493. " <td>None</td>\n",
  494. " </tr>\n",
  495. " <tr>\n",
  496. " <th>2</th>\n",
  497. " <td>As a lifelong Windows user, I was extremely pl...</td>\n",
  498. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  499. " <td>{'LAPTOP_QUALITY': 'positive'}</td>\n",
  500. " <td>{'LAPTOP_USABILITY': 'positive'}</td>\n",
  501. " <td>{'LAPTOP_DESIGN_FEATURES': 'positive'}</td>\n",
  502. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  503. " <td>{'LAPTOP_MISCELLANEOUS': 'positive'}</td>\n",
  504. " <td>{'LAPTOP_MISCELLANEOUS': 'positive'}</td>\n",
  505. " <td>{'LAPTOP_QUALITY': 'positive'}</td>\n",
  506. " <td>{'OS_DESIGN_FEATURES': 'positive'}</td>\n",
  507. " <td>...</td>\n",
  508. " <td>None</td>\n",
  509. " <td>None</td>\n",
  510. " <td>None</td>\n",
  511. " <td>None</td>\n",
  512. " <td>None</td>\n",
  513. " <td>None</td>\n",
  514. " <td>None</td>\n",
  515. " <td>None</td>\n",
  516. " <td>None</td>\n",
  517. " <td>None</td>\n",
  518. " </tr>\n",
  519. " <tr>\n",
  520. " <th>3</th>\n",
  521. " <td>Oh my goodness-I am not a happy camper.My HP i...</td>\n",
  522. " <td>{'LAPTOP_GENERAL': 'negative'}</td>\n",
  523. " <td>{'LAPTOP_DESIGN_FEATURES': 'negative'}</td>\n",
  524. " <td>{'LAPTOP_PORTABILITY': 'negative'}</td>\n",
  525. " <td>{'BATTERY_QUALITY': 'negative'}</td>\n",
  526. " <td>{'BATTERY_OPERATION_PERFORMANCE': 'negative'}</td>\n",
  527. " <td>{'BATTERY_QUALITY': 'negative'}</td>\n",
  528. " <td>{'SUPPORT_QUALITY': 'negative'}</td>\n",
  529. " <td>{'SUPPORT_QUALITY': 'negative'}</td>\n",
  530. " <td>{'SUPPORT_QUALITY': 'negative'}</td>\n",
  531. " <td>...</td>\n",
  532. " <td>{'COMPANY_GENERAL': 'negative'}</td>\n",
  533. " <td>None</td>\n",
  534. " <td>None</td>\n",
  535. " <td>None</td>\n",
  536. " <td>None</td>\n",
  537. " <td>None</td>\n",
  538. " <td>None</td>\n",
  539. " <td>None</td>\n",
  540. " <td>None</td>\n",
  541. " <td>None</td>\n",
  542. " </tr>\n",
  543. " <tr>\n",
  544. " <th>4</th>\n",
  545. " <td>Since I purchased my Toshiba netbook, I have b...</td>\n",
  546. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  547. " <td>{'LAPTOP_PORTABILITY': 'positive'}</td>\n",
  548. " <td>{'DISPLAY_USABILITY': 'neutral'}</td>\n",
  549. " <td>{'DISPLAY_DESIGN_FEATURES': 'neutral'}</td>\n",
  550. " <td>{'LAPTOP_GENERAL': 'positive'}</td>\n",
  551. " <td>None</td>\n",
  552. " <td>None</td>\n",
  553. " <td>None</td>\n",
  554. " <td>None</td>\n",
  555. " <td>...</td>\n",
  556. " <td>None</td>\n",
  557. " <td>None</td>\n",
  558. " <td>None</td>\n",
  559. " <td>None</td>\n",
  560. " <td>None</td>\n",
  561. " <td>None</td>\n",
  562. " <td>None</td>\n",
  563. " <td>None</td>\n",
  564. " <td>None</td>\n",
  565. " <td>None</td>\n",
  566. " </tr>\n",
  567. " </tbody>\n",
  568. "</table>\n",
  569. "<p>5 rows × 26 columns</p>\n",
  570. "</div>"
  571. ],
  572. "text/plain": [
  573. " text \\\n",
  574. "0 Being a PC user my whole life....This computer... \n",
  575. "1 the laptop was really good and it goes really ... \n",
  576. "2 As a lifelong Windows user, I was extremely pl... \n",
  577. "3 Oh my goodness-I am not a happy camper.My HP i... \n",
  578. "4 Since I purchased my Toshiba netbook, I have b... \n",
  579. "\n",
  580. " 0 \\\n",
  581. "0 {'LAPTOP_GENERAL': 'positive'} \n",
  582. "1 {'LAPTOP_GENERAL': 'positive'} \n",
  583. "2 {'LAPTOP_PORTABILITY': 'positive'} \n",
  584. "3 {'LAPTOP_GENERAL': 'negative'} \n",
  585. "4 {'LAPTOP_GENERAL': 'positive'} \n",
  586. "\n",
  587. " 1 \\\n",
  588. "0 {'BATTERY_OPERATION_PERFORMANCE': 'positive'} \n",
  589. "1 {'LAPTOP_OPERATION_PERFORMANCE': 'positive'} \n",
  590. "2 {'LAPTOP_QUALITY': 'positive'} \n",
  591. "3 {'LAPTOP_DESIGN_FEATURES': 'negative'} \n",
  592. "4 {'LAPTOP_PORTABILITY': 'positive'} \n",
  593. "\n",
  594. " 2 \\\n",
  595. "0 {'CPU_OPERATION_PERFORMANCE': 'positive'} \n",
  596. "1 {'LAPTOP_GENERAL': 'positive'} \n",
  597. "2 {'LAPTOP_USABILITY': 'positive'} \n",
  598. "3 {'LAPTOP_PORTABILITY': 'negative'} \n",
  599. "4 {'DISPLAY_USABILITY': 'neutral'} \n",
  600. "\n",
  601. " 3 \\\n",
  602. "0 {'GRAPHICS_GENERAL': 'positive'} \n",
  603. "1 {'LAPTOP_PRICE': 'positive'} \n",
  604. "2 {'LAPTOP_DESIGN_FEATURES': 'positive'} \n",
  605. "3 {'BATTERY_QUALITY': 'negative'} \n",
  606. "4 {'DISPLAY_DESIGN_FEATURES': 'neutral'} \n",
  607. "\n",
  608. " 4 \\\n",
  609. "0 {'HARD_DISC_DESIGN_FEATURES': 'positive'} \n",
  610. "1 {'LAPTOP_GENERAL': 'positive'} \n",
  611. "2 {'LAPTOP_PORTABILITY': 'positive'} \n",
  612. "3 {'BATTERY_OPERATION_PERFORMANCE': 'negative'} \n",
  613. "4 {'LAPTOP_GENERAL': 'positive'} \n",
  614. "\n",
  615. " 5 \\\n",
  616. "0 {'LAPTOP_OPERATION_PERFORMANCE': 'positive'} \n",
  617. "1 None \n",
  618. "2 {'LAPTOP_MISCELLANEOUS': 'positive'} \n",
  619. "3 {'BATTERY_QUALITY': 'negative'} \n",
  620. "4 None \n",
  621. "\n",
  622. " 6 7 \\\n",
  623. "0 {'LAPTOP_USABILITY': 'positive'} {'LAPTOP_USABILITY': 'positive'} \n",
  624. "1 None None \n",
  625. "2 {'LAPTOP_MISCELLANEOUS': 'positive'} {'LAPTOP_QUALITY': 'positive'} \n",
  626. "3 {'SUPPORT_QUALITY': 'negative'} {'SUPPORT_QUALITY': 'negative'} \n",
  627. "4 None None \n",
  628. "\n",
  629. " 8 ... 15 \\\n",
  630. "0 {'LAPTOP_PORTABILITY': 'positive'} ... None \n",
  631. "1 None ... None \n",
  632. "2 {'OS_DESIGN_FEATURES': 'positive'} ... None \n",
  633. "3 {'SUPPORT_QUALITY': 'negative'} ... {'COMPANY_GENERAL': 'negative'} \n",
  634. "4 None ... None \n",
  635. "\n",
  636. " 16 17 18 19 20 21 22 23 24 \n",
  637. "0 None None None None None None None None None \n",
  638. "1 None None None None None None None None None \n",
  639. "2 None None None None None None None None None \n",
  640. "3 None None None None None None None None None \n",
  641. "4 None None None None None None None None None \n",
  642. "\n",
  643. "[5 rows x 26 columns]"
  644. ]
  645. },
  646. "execution_count": 8,
  647. "metadata": {},
  648. "output_type": "execute_result"
  649. }
  650. ],
  651. "source": [
  652. "result = pd.concat([df1, df2], axis=1, sort=False)\n",
  653. "result.head()"
  654. ]
  655. },
  656. {
  657. "cell_type": "code",
  658. "execution_count": 28,
  659. "metadata": {},
  660. "outputs": [],
  661. "source": [
  662. "result.to_csv(r'C:\\Users\\Kyuyeon Rhee\\myworks\\laptop2\\sentiment\\laptop_reviews.csv')"
  663. ]
  664. }
  665. ],
  666. "metadata": {
  667. "kernelspec": {
  668. "display_name": "Python 3",
  669. "language": "python",
  670. "name": "python3"
  671. },
  672. "language_info": {
  673. "codemirror_mode": {
  674. "name": "ipython",
  675. "version": 3
  676. },
  677. "file_extension": ".py",
  678. "mimetype": "text/x-python",
  679. "name": "python",
  680. "nbconvert_exporter": "python",
  681. "pygments_lexer": "ipython3",
  682. "version": "3.6.8"
  683. }
  684. },
  685. "nbformat": 4,
  686. "nbformat_minor": 2
  687. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement