Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from itertools import product
- from scipy.stats import chisquare
- from numpy.random import randint
- multiple = 10
- number_of_random_numbers = 17576*multiple #There are 17576 possible two letter words. Therefore it's easy use a sequence length of
- # 17576 times some natural number.
- expectation = multiple #Given that the null hypothesis is true
- # (equivalent to saying: given that the random sequence is iid and uniform) we expect
- # that each word occurs once every 17576 times. Therefore the expected occurrence of each
- # combinations is equal to the multiple. If we have a random sequence of length 2*17576 we
- # expect each word to occur twice.
- def seedlcg(init_Val):
- global rand
- rand = init_Val
- def lcg_randu():
- a = 65539
- c = 0
- m = 2**31
- global rand
- rand = (a*rand + c) % m
- return rand/m
- seedlcg(300000)
- random_sequence = []
- for i in range(1,number_of_random_numbers):
- sequence_2.append(lcg_randu())
- # def collect_words():
- # This function collects the observed words from the sequence of random numbers. It
- # returns a list that contains the observed words.
- def collect_words(random_sequence):
- list_of_words = []
- for i in range(0,len(random_sequence)):
- list_of_words.append((random_sequence[i-2],random_sequence[i-1], random_sequence[i]))
- return list_of_words
- # def initiate_list_of_possible_words():
- # This function initiates a list of all possible words and returns that list.
- def initiate_list_of_possible_words():
- alphabet = [x for x in range(1,27)]
- list_of_possible_words = [letter for letter in product(alphabet, repeat=3)]
- return list_of_possible_words
- # def count_words(observed_words):
- # This function checks how many times each possible word combination occurred. It returns
- # a list with the number of occurrences for each word.
- def count_words(observed_words):
- list_of_possible_words = initiate_list_of_possible_words()
- word_count = []
- for word in list_of_possible_words:
- word_count.append(observed_words.count(word))
- return word_count
- word_combinations = collect_words(random_sequence) #Collect the observed words from the sequence
- word_count = count_words(word_combinations) #Collect how many times each possible word has occurred.
- print(chisquare(word_count, multiple)) #Compute and print the Chisquare statistic and the corresponding p-value.
- Power_divergenceResult(statistic=117915.47220000002, pvalue=0.0)
Add Comment
Please, Sign In to add comment