Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- import matplotlib.pyplot as plt
- import pandas as pd
- import re
- from scipy.spatial.distance import cosine as cos
- import collections
- l=[]
- with open(r'Myfile.txt') as fl:
- l=fl.readlines()
- data=pd.DataFrame()
- for i in l:
- i=i.lower()
- sen=re.split('[^a-z]',i)
- ma= {}
- for j in sen:
- if len(j)>=2:
- if not ma.get(j):
- ma[j]=1
- else:
- ma[j]+=1
- data=data.append(ma, ignore_index=True)
- data.fillna(0,inplace=True)
- data=data.apply(np.intc)
- res= [-1,-1]
- x=[2,2]
- for i in range(1,data.shape[0]):
- q=cos(data.iloc[0,:],data.iloc[i,:])
- if(q<=x[0]):
- x[1]=x[0]
- x[0]=q
- res[1]=res[0]
- res[0]=i
- elif q<x[1]:
- x[1]=q
- res[1]=i
- print(res,l[0],l[6],l[4])
Add Comment
Please, Sign In to add comment