Advertisement
Guest User

Untitled

a guest
Oct 4th, 2015
83
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.16 KB | None | 0 0
  1. def sumaeuc(dicc1,dicc2):
  2. suma=0
  3. for llave in dicc1:
  4. if llave in dicc2:
  5. suma+=(dicc1[llave]-dicc2[llave])**2
  6. else:
  7. suma+=(dicc1[llave])**2
  8. for llave2 in dicc2:
  9. if llave2 not in dicc1:
  10. suma+=(dicc2[llave2])**2
  11. return suma**0.5
  12.  
  13. def sumacos(dicc,dicc2):
  14. suma=0.0
  15. divisor=0.0
  16. divisor2=0.0
  17. for palabra in dicc:
  18. divisor+=dicc[palabra]**2
  19. if palabra in dicc2:
  20. suma+=dicc[palabra]*dicc2[palabra]
  21. for palabra in dicc2:
  22. divisor2+=dicc2[palabra]**2
  23. return 1-(suma/((divisor*divisor2)**0.5))
  24. def parrafosadicc(arc):
  25. arch=open(arc)
  26. dicc={}
  27. cont=-1
  28. for x in arch:
  29. dparrafo={}
  30. cont+=1
  31. parrafo=x.strip().upper().replace(","," ").replace("."," ").replace("?"," ").replace("*"," ").replace("?"," ").replace("!"," ").replace("?"," ").replace("("," ").replace(")"," ").split()
  32. for x in parrafo:
  33. if x in dparrafo:
  34. dparrafo[x]+=1
  35. else:
  36. dparrafo[x]=1
  37. for x in dparrafo:
  38. dparrafo[x]=dparrafo[x]/float(len(parrafo))
  39. dicc["p"+str(cont)]=dparrafo
  40. arch.close()
  41. return dicc
  42.  
  43. def comparar(dicc):
  44. histogramaeuc=[]
  45. histogramacos=[]
  46. for i in range(len(dicc)):
  47. histogramaeuc.append([])
  48. histogramacos.append([])
  49. for a in range(len(dicc)):
  50. if i==a:
  51. euc="00.00"
  52. cos="00.00"
  53. else:
  54. euc=round(sumaeuc(dicc["p"+str(i)],dicc["p"+str(a)])*100,2)
  55. cos=round(sumacos (dicc["p"+str(i)],dicc["p"+str(a)])*100,2)
  56. histogramaeuc[i].append(euc)
  57. histogramacos[i].append(cos)
  58. for i in range(len(histogramacos)):
  59. print "."
  60. for x in range(len(histogramacos)):
  61. print histogramacos[x][i],
  62. print " "
  63. for i in range(len(histogramaeuc)):
  64. print "."
  65. for x in range(len(histogramaeuc)):
  66. print histogramaeuc[x][i],
  67. return histogramacos,histogramaeuc
  68. dicc=parrafosadicc('eso.txt')
  69. a=comparar(dicc)
  70. cont=-1
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement