Untitled

import pandas as pd

f1 = pd.read_csv('/home/kopal/Desktop/jobs.csv')
f1.head()


df = pd.DataFrame(f1)
df.head()


df.loc[df['id'] == 13]


df.loc[df['location'] == 'Mumbai']


f2 = pd.read_csv('/home/kopal/Desktop/candidate_details.csv')
f2.head()


df1 = pd.DataFrame(f2)
df1.head()


df2=df1[['candidateName','skillSet','relevantExperiance','qualification','location','industryType']]
df2


a=df2.head(1000)
a


df3=df[['job_title','keyskills','location']]
df3
df3.loc[df['id'] == 13 ]


import requests
import matplotlib.pyplot as plt ##Visualization
import nltk ##NLP
from textblob import TextBlob
from textblob.sentiments import NaiveBayesAnalyzer ##MACHINE LEARNING
import re, math
from collections import Counter
import numpy as np
blob = TextBlob(df3.keyskills[4], analyzer=NaiveBayesAnalyzer())
blob1= TextBlob(df3.location[4], analyzer=NaiveBayesAnalyzer())


df4=pd.DataFrame()
df4['skills']=a.skillSet
df4['location_prefer']=a.location
df4


text1 = df3.keySkills[4]
text4 = df3.location[4]


class Similarity():
    def compute_cosine_similarity(self, string1, string2):
         # intersects the words that are common
         # in the set of the two words
         intersection = set(string1.keys()) & set(string2.keys())
         # dot matrix of vec1 and vec2
         numerator = sum([string1[x] * string2[x] for x in intersection])

         # sum of the squares of each vector
         # sum1 is the sum of text1 and same for sum2 for text2
         sum1 = sum([string1[x]**2 for x in string1.keys()])
         sum2 = sum([string2[x]**2 for x in string2.keys()])

         # product of the square root of both sum(s)
         denominator = math.sqrt(sum1) * math.sqrt(sum2)
         if not denominator:
            return 0.0
         else:
            return round(numerator/float(denominator),4)

    def text_to_vector(self,text):
        WORD = re.compile(r'w+')
        words = WORD.findall(text)
        return Counter(words)

    # Jaccard Similarity
    def tokenize(self,string):
        return string.lower().split(" ")

    def jaccard_similarity(self, string1, string2):
        intersection = set(string1).intersection(set(string2))
        union = set(string1).union(set(string2))
        return len(intersection)/float(len(union))
cosine=[]

for text2 in a.skillSet:

    #print(i)
    similarity = Similarity()

    vector1 = similarity.text_to_vector(text1)
    vector2 = similarity.text_to_vector(text2)


    token1 = similarity.tokenize(text1)
    token2 = similarity.tokenize(text2)

    cosine.append(similarity.compute_cosine_similarity(vector1, vector2))


class Similarity():
    def compute_cosine_similarity(self, string1, string2):
         # intersects the words that are common
         # in the set of the two words
         intersection = set(string1.keys()) & set(string2.keys())
         # dot matrix of vec1 and vec2
         numerator = sum([string1[x] * string2[x] for x in intersection])

         # sum of the squares of each vector
         # sum1 is the sum of text1 and same for sum2 for text2
         sum1 = sum([string1[x]**2 for x in string1.keys()])
         sum2 = sum([string2[x]**2 for x in string2.keys()])

         # product of the square root of both sum(s)
         denominator = math.sqrt(sum1) * math.sqrt(sum2)
         if not denominator:
            return 0.0
         else:
            return round(numerator/float(denominator),4)

    def text_to_vector(self,text):
        WORD = re.compile(r'w+')
        words = WORD.findall(text)
        return Counter(words)


cosine1=[]
for text3 in str(a.location):
    similarity1 = Similarity()

    vector4 = similarity1.text_to_vector(text4)
    vector3 = similarity1.text_to_vector(text3)

    token4 = similarity1.tokenize(text4)
    token3 = similarity1.tokenize(text3)

    cosine1.append(similarity1.compute_cosine_similarity(vector4, vector3))


print(str(a.location))


df4['similarity_for_skills']=cosine
se = pd.Series(cosine1)
df4['similarity_for_location'] = se
#df4.insert(loc=0, column='simlarity_for_location', value=se)
#df4['similarity_for_location']=str(cosine1)
df4


df5 = pd.concat([a.candidateName,df4], axis=1)


result=df5.sort_values('similarity_for_skills',ascending=False)


df6=pd.DataFrame(result)
df6