Guest User

Untitled

a guest
Jan 18th, 2019
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.22 KB | None | 0 0
  1. import pandas as pd
  2.  
  3. f1 = pd.read_csv('/home/kopal/Desktop/jobs.csv')
  4. f1.head()
  5.  
  6.  
  7.  
  8.  
  9. df = pd.DataFrame(f1)
  10. df.head()
  11.  
  12.  
  13.  
  14.  
  15.  
  16. df.loc[df['id'] == 13]
  17.  
  18.  
  19.  
  20.  
  21.  
  22. df.loc[df['location'] == 'Mumbai']
  23.  
  24.  
  25.  
  26.  
  27. f2 = pd.read_csv('/home/kopal/Desktop/candidate_details.csv')
  28. f2.head()
  29.  
  30.  
  31.  
  32.  
  33.  
  34. df1 = pd.DataFrame(f2)
  35. df1.head()
  36.  
  37.  
  38.  
  39.  
  40. df2=df1[['candidateName','skillSet','relevantExperiance','qualification','location','industryType']]
  41. df2
  42.  
  43.  
  44.  
  45.  
  46. a=df2.head(1000)
  47. a
  48.  
  49.  
  50.  
  51.  
  52. df3=df[['job_title','keyskills','location']]
  53. df3
  54. df3.loc[df['id'] == 13 ]
  55.  
  56.  
  57.  
  58.  
  59.  
  60. import requests
  61. import matplotlib.pyplot as plt ##Visualization
  62. import nltk ##NLP
  63. from textblob import TextBlob
  64. from textblob.sentiments import NaiveBayesAnalyzer ##MACHINE LEARNING
  65. import re, math
  66. from collections import Counter
  67. import numpy as np
  68. blob = TextBlob(df3.keyskills[4], analyzer=NaiveBayesAnalyzer())
  69. blob1= TextBlob(df3.location[4], analyzer=NaiveBayesAnalyzer())
  70.  
  71.  
  72.  
  73. df4=pd.DataFrame()
  74. df4['skills']=a.skillSet
  75. df4['location_prefer']=a.location
  76. df4
  77.  
  78.  
  79.  
  80.  
  81.  
  82. text1 = df3.keySkills[4]
  83. text4 = df3.location[4]
  84.  
  85.  
  86.  
  87.  
  88.  
  89. class Similarity():
  90. def compute_cosine_similarity(self, string1, string2):
  91. # intersects the words that are common
  92. # in the set of the two words
  93. intersection = set(string1.keys()) & set(string2.keys())
  94. # dot matrix of vec1 and vec2
  95. numerator = sum([string1[x] * string2[x] for x in intersection])
  96.  
  97. # sum of the squares of each vector
  98. # sum1 is the sum of text1 and same for sum2 for text2
  99. sum1 = sum([string1[x]**2 for x in string1.keys()])
  100. sum2 = sum([string2[x]**2 for x in string2.keys()])
  101.  
  102. # product of the square root of both sum(s)
  103. denominator = math.sqrt(sum1) * math.sqrt(sum2)
  104. if not denominator:
  105. return 0.0
  106. else:
  107. return round(numerator/float(denominator),4)
  108.  
  109. def text_to_vector(self,text):
  110. WORD = re.compile(r'w+')
  111. words = WORD.findall(text)
  112. return Counter(words)
  113.  
  114. # Jaccard Similarity
  115. def tokenize(self,string):
  116. return string.lower().split(" ")
  117.  
  118. def jaccard_similarity(self, string1, string2):
  119. intersection = set(string1).intersection(set(string2))
  120. union = set(string1).union(set(string2))
  121. return len(intersection)/float(len(union))
  122. cosine=[]
  123.  
  124. for text2 in a.skillSet:
  125.  
  126. #print(i)
  127. similarity = Similarity()
  128.  
  129. vector1 = similarity.text_to_vector(text1)
  130. vector2 = similarity.text_to_vector(text2)
  131.  
  132.  
  133. token1 = similarity.tokenize(text1)
  134. token2 = similarity.tokenize(text2)
  135.  
  136. cosine.append(similarity.compute_cosine_similarity(vector1, vector2))
  137.  
  138.  
  139.  
  140.  
  141.  
  142.  
  143.  
  144.  
  145. class Similarity():
  146. def compute_cosine_similarity(self, string1, string2):
  147. # intersects the words that are common
  148. # in the set of the two words
  149. intersection = set(string1.keys()) & set(string2.keys())
  150. # dot matrix of vec1 and vec2
  151. numerator = sum([string1[x] * string2[x] for x in intersection])
  152.  
  153. # sum of the squares of each vector
  154. # sum1 is the sum of text1 and same for sum2 for text2
  155. sum1 = sum([string1[x]**2 for x in string1.keys()])
  156. sum2 = sum([string2[x]**2 for x in string2.keys()])
  157.  
  158. # product of the square root of both sum(s)
  159. denominator = math.sqrt(sum1) * math.sqrt(sum2)
  160. if not denominator:
  161. return 0.0
  162. else:
  163. return round(numerator/float(denominator),4)
  164.  
  165. def text_to_vector(self,text):
  166. WORD = re.compile(r'w+')
  167. words = WORD.findall(text)
  168. return Counter(words)
  169.  
  170.  
  171.  
  172. cosine1=[]
  173. for text3 in str(a.location):
  174. similarity1 = Similarity()
  175.  
  176. vector4 = similarity1.text_to_vector(text4)
  177. vector3 = similarity1.text_to_vector(text3)
  178.  
  179. token4 = similarity1.tokenize(text4)
  180. token3 = similarity1.tokenize(text3)
  181.  
  182. cosine1.append(similarity1.compute_cosine_similarity(vector4, vector3))
  183.  
  184.  
  185.  
  186.  
  187.  
  188. print(str(a.location))
  189.  
  190.  
  191.  
  192.  
  193.  
  194. df4['similarity_for_skills']=cosine
  195. se = pd.Series(cosine1)
  196. df4['similarity_for_location'] = se
  197. #df4.insert(loc=0, column='simlarity_for_location', value=se)
  198. #df4['similarity_for_location']=str(cosine1)
  199. df4
  200.  
  201.  
  202.  
  203.  
  204.  
  205. df5 = pd.concat([a.candidateName,df4], axis=1)
  206.  
  207.  
  208.  
  209.  
  210.  
  211.  
  212. result=df5.sort_values('similarity_for_skills',ascending=False)
  213.  
  214.  
  215.  
  216.  
  217.  
  218. df6=pd.DataFrame(result)
  219. df6
Add Comment
Please, Sign In to add comment