Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def sumaeuc(dicc1,dicc2):
- suma=0
- for llave in dicc1:
- if llave in dicc2:
- suma+=(dicc1[llave]-dicc2[llave])**2
- else:
- suma+=(dicc1[llave])**2
- for llave2 in dicc2:
- if llave2 not in dicc1:
- suma+=(dicc2[llave2])**2
- return suma**0.5
- def sumacos(dicc,dicc2):
- suma=0.0
- divisor=0.0
- divisor2=0.0
- for palabra in dicc:
- divisor+=dicc[palabra]**2
- if palabra in dicc2:
- suma+=dicc[palabra]*dicc2[palabra]
- for palabra in dicc2:
- divisor2+=dicc2[palabra]**2
- return 1-(suma/((divisor*divisor2)**0.5))
- def parrafosadicc(arc):
- arch=open(arc)
- dicc={}
- cont=-1
- for x in arch:
- dparrafo={}
- cont+=1
- parrafo=x.strip().upper().replace(","," ").replace("."," ").replace("?"," ").replace("*"," ").replace("?"," ").replace("!"," ").replace("?"," ").replace("("," ").replace(")"," ").split()
- for x in parrafo:
- if x in dparrafo:
- dparrafo[x]+=1
- else:
- dparrafo[x]=1
- for x in dparrafo:
- dparrafo[x]=dparrafo[x]/float(len(parrafo))
- dicc["p"+str(cont)]=dparrafo
- arch.close()
- return dicc
- def comparar(dicc):
- histogramaeuc=[]
- histogramacos=[]
- for i in range(len(dicc)):
- histogramaeuc.append([])
- histogramacos.append([])
- for a in range(len(dicc)):
- if i==a:
- euc="00.00"
- cos="00.00"
- else:
- euc=round(sumaeuc(dicc["p"+str(i)],dicc["p"+str(a)])*100,2)
- cos=round(sumacos (dicc["p"+str(i)],dicc["p"+str(a)])*100,2)
- histogramaeuc[i].append(euc)
- histogramacos[i].append(cos)
- for i in range(len(histogramacos)):
- print "."
- for x in range(len(histogramacos)):
- print histogramacos[x][i],
- print " "
- for i in range(len(histogramaeuc)):
- print "."
- for x in range(len(histogramaeuc)):
- print histogramaeuc[x][i],
- return histogramacos,histogramaeuc
- dicc=parrafosadicc('eso.txt')
- a=comparar(dicc)
- cont=-1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement