daily pastebin goal
41%
SHARE
TWEET

Untitled

a guest Jan 18th, 2019 64 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. import pandas as pd
  2.  
  3. f1 = pd.read_csv('/home/kopal/Desktop/jobs.csv')
  4. f1.head()
  5.  
  6.  
  7.  
  8.  
  9. df = pd.DataFrame(f1)
  10. df.head()
  11.  
  12.  
  13.  
  14.  
  15.  
  16. df.loc[df['id'] == 13]
  17.  
  18.  
  19.  
  20.  
  21.  
  22. df.loc[df['location'] == 'Mumbai']
  23.  
  24.  
  25.  
  26.  
  27. f2 = pd.read_csv('/home/kopal/Desktop/candidate_details.csv')
  28. f2.head()
  29.  
  30.  
  31.  
  32.  
  33.  
  34. df1 = pd.DataFrame(f2)
  35. df1.head()
  36.  
  37.  
  38.  
  39.  
  40. df2=df1[['candidateName','skillSet','relevantExperiance','qualification','location','industryType']]
  41. df2
  42.  
  43.  
  44.  
  45.  
  46. a=df2.head(1000)
  47. a
  48.  
  49.  
  50.  
  51.  
  52. df3=df[['job_title','keyskills','location']]
  53. df3
  54. df3.loc[df['id'] == 13 ]
  55.  
  56.  
  57.  
  58.  
  59.  
  60. import requests
  61. import matplotlib.pyplot as plt ##Visualization
  62. import nltk ##NLP
  63. from textblob import TextBlob
  64. from textblob.sentiments import NaiveBayesAnalyzer ##MACHINE LEARNING
  65. import re, math
  66. from collections import Counter
  67. import numpy as np
  68. blob = TextBlob(df3.keyskills[4], analyzer=NaiveBayesAnalyzer())
  69. blob1= TextBlob(df3.location[4], analyzer=NaiveBayesAnalyzer())
  70.  
  71.  
  72.  
  73. df4=pd.DataFrame()
  74. df4['skills']=a.skillSet
  75. df4['location_prefer']=a.location
  76. df4
  77.  
  78.  
  79.  
  80.  
  81.  
  82. text1 = df3.keySkills[4]
  83. text4 = df3.location[4]
  84.  
  85.  
  86.  
  87.  
  88.  
  89. class Similarity():
  90.     def compute_cosine_similarity(self, string1, string2):
  91.          # intersects the words that are common
  92.          # in the set of the two words
  93.          intersection = set(string1.keys()) & set(string2.keys())
  94.          # dot matrix of vec1 and vec2
  95.          numerator = sum([string1[x] * string2[x] for x in intersection])
  96.  
  97.          # sum of the squares of each vector
  98.          # sum1 is the sum of text1 and same for sum2 for text2
  99.          sum1 = sum([string1[x]**2 for x in string1.keys()])
  100.          sum2 = sum([string2[x]**2 for x in string2.keys()])
  101.  
  102.          # product of the square root of both sum(s)
  103.          denominator = math.sqrt(sum1) * math.sqrt(sum2)
  104.          if not denominator:
  105.             return 0.0
  106.          else:
  107.             return round(numerator/float(denominator),4)
  108.  
  109.     def text_to_vector(self,text):
  110.         WORD = re.compile(r'w+')
  111.         words = WORD.findall(text)
  112.         return Counter(words)
  113.  
  114.     # Jaccard Similarity
  115.     def tokenize(self,string):
  116.         return string.lower().split(" ")
  117.  
  118.     def jaccard_similarity(self, string1, string2):
  119.         intersection = set(string1).intersection(set(string2))
  120.         union = set(string1).union(set(string2))
  121.         return len(intersection)/float(len(union))
  122. cosine=[]
  123.  
  124. for text2 in a.skillSet:
  125.  
  126.     #print(i)
  127.     similarity = Similarity()
  128.  
  129.     vector1 = similarity.text_to_vector(text1)
  130.     vector2 = similarity.text_to_vector(text2)
  131.  
  132.  
  133.     token1 = similarity.tokenize(text1)
  134.     token2 = similarity.tokenize(text2)
  135.  
  136.     cosine.append(similarity.compute_cosine_similarity(vector1, vector2))
  137.  
  138.  
  139.  
  140.  
  141.  
  142.  
  143.  
  144.  
  145. class Similarity():
  146.     def compute_cosine_similarity(self, string1, string2):
  147.          # intersects the words that are common
  148.          # in the set of the two words
  149.          intersection = set(string1.keys()) & set(string2.keys())
  150.          # dot matrix of vec1 and vec2
  151.          numerator = sum([string1[x] * string2[x] for x in intersection])
  152.  
  153.          # sum of the squares of each vector
  154.          # sum1 is the sum of text1 and same for sum2 for text2
  155.          sum1 = sum([string1[x]**2 for x in string1.keys()])
  156.          sum2 = sum([string2[x]**2 for x in string2.keys()])
  157.  
  158.          # product of the square root of both sum(s)
  159.          denominator = math.sqrt(sum1) * math.sqrt(sum2)
  160.          if not denominator:
  161.             return 0.0
  162.          else:
  163.             return round(numerator/float(denominator),4)
  164.  
  165.     def text_to_vector(self,text):
  166.         WORD = re.compile(r'w+')
  167.         words = WORD.findall(text)
  168.         return Counter(words)
  169.  
  170.  
  171.  
  172. cosine1=[]    
  173. for text3 in str(a.location):
  174.     similarity1 = Similarity()
  175.  
  176.     vector4 = similarity1.text_to_vector(text4)
  177.     vector3 = similarity1.text_to_vector(text3)
  178.  
  179.     token4 = similarity1.tokenize(text4)
  180.     token3 = similarity1.tokenize(text3)
  181.  
  182.     cosine1.append(similarity1.compute_cosine_similarity(vector4, vector3))
  183.  
  184.  
  185.  
  186.  
  187.  
  188. print(str(a.location))
  189.  
  190.  
  191.  
  192.  
  193.  
  194. df4['similarity_for_skills']=cosine
  195. se = pd.Series(cosine1)
  196. df4['similarity_for_location'] = se
  197. #df4.insert(loc=0, column='simlarity_for_location', value=se)
  198. #df4['similarity_for_location']=str(cosine1)
  199. df4
  200.  
  201.  
  202.  
  203.  
  204.  
  205. df5 = pd.concat([a.candidateName,df4], axis=1)
  206.  
  207.  
  208.  
  209.  
  210.  
  211.  
  212. result=df5.sort_values('similarity_for_skills',ascending=False)
  213.  
  214.  
  215.  
  216.  
  217.  
  218. df6=pd.DataFrame(result)
  219. df6
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top