Guest User

Untitled

a guest
Mar 21st, 2018
75
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.56 KB | None | 0 0
  1. from itertools import product
  2. from scipy.stats import chisquare
  3. from numpy.random import randint
  4.  
  5.  
  6. multiple = 10
  7. number_of_random_numbers = 17576*multiple #There are 17576 possible two letter words. Therefore it's easy use a sequence length of
  8. # 17576 times some natural number.
  9.  
  10. expectation = multiple #Given that the null hypothesis is true
  11. # (equivalent to saying: given that the random sequence is iid and uniform) we expect
  12. # that each word occurs once every 17576 times. Therefore the expected occurrence of each
  13. # combinations is equal to the multiple. If we have a random sequence of length 2*17576 we
  14. # expect each word to occur twice.
  15. def seedlcg(init_Val):
  16. global rand
  17. rand = init_Val
  18.  
  19. def lcg_randu():
  20. a = 65539
  21. c = 0
  22. m = 2**31
  23. global rand
  24. rand = (a*rand + c) % m
  25. return rand/m
  26.  
  27. seedlcg(300000)
  28. random_sequence = []
  29. for i in range(1,number_of_random_numbers):
  30. sequence_2.append(lcg_randu())
  31.  
  32. # def collect_words():
  33. # This function collects the observed words from the sequence of random numbers. It
  34. # returns a list that contains the observed words.
  35.  
  36. def collect_words(random_sequence):
  37. list_of_words = []
  38. for i in range(0,len(random_sequence)):
  39. list_of_words.append((random_sequence[i-2],random_sequence[i-1], random_sequence[i]))
  40. return list_of_words
  41.  
  42.  
  43. # def initiate_list_of_possible_words():
  44. # This function initiates a list of all possible words and returns that list.
  45.  
  46. def initiate_list_of_possible_words():
  47. alphabet = [x for x in range(1,27)]
  48. list_of_possible_words = [letter for letter in product(alphabet, repeat=3)]
  49. return list_of_possible_words
  50.  
  51. # def count_words(observed_words):
  52. # This function checks how many times each possible word combination occurred. It returns
  53. # a list with the number of occurrences for each word.
  54.  
  55. def count_words(observed_words):
  56. list_of_possible_words = initiate_list_of_possible_words()
  57. word_count = []
  58. for word in list_of_possible_words:
  59. word_count.append(observed_words.count(word))
  60. return word_count
  61.  
  62. word_combinations = collect_words(random_sequence) #Collect the observed words from the sequence
  63. word_count = count_words(word_combinations) #Collect how many times each possible word has occurred.
  64.  
  65. print(chisquare(word_count, multiple)) #Compute and print the Chisquare statistic and the corresponding p-value.
  66.  
  67. Power_divergenceResult(statistic=117915.47220000002, pvalue=0.0)
Add Comment
Please, Sign In to add comment