Guest User

Untitled

a guest
Jun 20th, 2018
67
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 3.57 KB | None | 0 0
  1. {
  2. "cells": [
  3. {
  4. "cell_type": "code",
  5. "execution_count": 1,
  6. "metadata": {},
  7. "outputs": [],
  8. "source": [
  9. "import pandas as pd\n",
  10. "import numpy as np\n",
  11. "import matplotlib.pyplot as plt"
  12. ]
  13. },
  14. {
  15. "cell_type": "code",
  16. "execution_count": 2,
  17. "metadata": {},
  18. "outputs": [],
  19. "source": [
  20. "from sklearn.feature_extraction.text import TfidfVectorizer\n",
  21. "from sklearn.metrics.pairwise import linear_kernel"
  22. ]
  23. },
  24. {
  25. "cell_type": "code",
  26. "execution_count": 3,
  27. "metadata": {},
  28. "outputs": [],
  29. "source": [
  30. "from Rec_Engine.text_rec import RecommendationSystem"
  31. ]
  32. },
  33. {
  34. "cell_type": "code",
  35. "execution_count": 4,
  36. "metadata": {},
  37. "outputs": [],
  38. "source": [
  39. "test_class = RecommendationSystem('wc_teams.csv')"
  40. ]
  41. },
  42. {
  43. "cell_type": "code",
  44. "execution_count": 5,
  45. "metadata": {},
  46. "outputs": [
  47. {
  48. "data": {
  49. "text/plain": [
  50. "((0.999999999999962,\n",
  51. " 'United_States',\n",
  52. " 0.04496156283698449,\n",
  53. " 'Switzerland',\n",
  54. " 0.04366393350633942,\n",
  55. " 'Peru',\n",
  56. " 0.043520486662234195,\n",
  57. " 'Sweden'),\n",
  58. " 'United_States,Switzerland,Peru,Sweden')"
  59. ]
  60. },
  61. "execution_count": 5,
  62. "metadata": {},
  63. "output_type": "execute_result"
  64. }
  65. ],
  66. "source": [
  67. "matrix = test_class.populate()\n",
  68. "test_class.generate_similarity(matrix)"
  69. ]
  70. },
  71. {
  72. "cell_type": "code",
  73. "execution_count": 6,
  74. "metadata": {},
  75. "outputs": [],
  76. "source": [
  77. "df = pd.read_csv('similar_text_matrix.csv')"
  78. ]
  79. },
  80. {
  81. "cell_type": "code",
  82. "execution_count": 15,
  83. "metadata": {},
  84. "outputs": [
  85. {
  86. "data": {
  87. "text/html": [
  88. "<div>\n",
  89. "<style scoped>\n",
  90. " .dataframe tbody tr th:only-of-type {\n",
  91. " vertical-align: middle;\n",
  92. " }\n",
  93. "\n",
  94. " .dataframe tbody tr th {\n",
  95. " vertical-align: top;\n",
  96. " }\n",
  97. "\n",
  98. " .dataframe thead th {\n",
  99. " text-align: right;\n",
  100. " }\n",
  101. "</style>\n",
  102. "<table border=\"1\" class=\"dataframe\">\n",
  103. " <thead>\n",
  104. " <tr style=\"text-align: right;\">\n",
  105. " <th></th>\n",
  106. " <th>self_id</th>\n",
  107. " <th>id1</th>\n",
  108. " <th>id2</th>\n",
  109. " <th>id3</th>\n",
  110. " </tr>\n",
  111. " </thead>\n",
  112. " <tbody>\n",
  113. " <tr>\n",
  114. " <th>32</th>\n",
  115. " <td>United_States</td>\n",
  116. " <td>Switzerland</td>\n",
  117. " <td>Peru</td>\n",
  118. " <td>Sweden</td>\n",
  119. " </tr>\n",
  120. " </tbody>\n",
  121. "</table>\n",
  122. "</div>"
  123. ],
  124. "text/plain": [
  125. " self_id id1 id2 id3\n",
  126. "32 United_States Switzerland Peru Sweden"
  127. ]
  128. },
  129. "execution_count": 15,
  130. "metadata": {},
  131. "output_type": "execute_result"
  132. }
  133. ],
  134. "source": [
  135. "df.loc[df['self_id'] == 'United_States']"
  136. ]
  137. },
  138. {
  139. "cell_type": "code",
  140. "execution_count": null,
  141. "metadata": {},
  142. "outputs": [],
  143. "source": []
  144. }
  145. ],
  146. "metadata": {
  147. "kernelspec": {
  148. "display_name": "Python 3",
  149. "language": "python",
  150. "name": "python3"
  151. },
  152. "language_info": {
  153. "codemirror_mode": {
  154. "name": "ipython",
  155. "version": 3
  156. },
  157. "file_extension": ".py",
  158. "mimetype": "text/x-python",
  159. "name": "python",
  160. "nbconvert_exporter": "python",
  161. "pygments_lexer": "ipython3",
  162. "version": "3.6.4"
  163. }
  164. },
  165. "nbformat": 4,
  166. "nbformat_minor": 2
  167. }
Add Comment
Please, Sign In to add comment