Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Node:
- def __repr__(self):
- return str(self.key) + ' ' + str(self.value)
- def __init__(self, key, value, next): # node = Node()
- self.key = key
- self.value = value
- self.next = next
- class HashMap:
- def __init__(self, init_capacity=4, loaf_factor=0.75): # hm = HashMap()
- self.arr = [None] * init_capacity
- self.load_factor = loaf_factor
- self.size = 0
- self.capacity = init_capacity
- def hash(self, string):
- shash = 0
- for i in string:
- shash = ord(i) + shash * 31
- return shash
- def index(self, key):
- return self.hash(key) % self.capacity
- def get_all_nodes(self):
- nodes = []
- for i in self.arr:
- node = i
- while node is not None:
- nodes.append(node)
- node = node.next
- return nodes
- def resize(self):
- if self.size >= int(self.load_factor*self.capacity):
- self.capacity *= 3
- nodes = self.get_all_nodes()
- self.arr = [None] * self.capacity
- self.size = 0
- for node in nodes:
- index = self.index(node.key)
- self.arr[index] = Node(node.key, node.value, self.arr[index])
- self.size += 1
- def insert(self, key, value):
- index = self.index(key)
- #print(self.arr)
- node = self.arr[index]
- while node is not None:
- if node.key == key:
- node.value.append(value)
- return
- node = node.next
- self.arr[index] = Node(key, value, self.arr[index])
- self.size += 1
- self.resize()
- def __getitem__(self, item): # hashmap["anarchy"]
- index = self.index(item)
- node = self.arr[index]
- while node is not None:
- if node.key == item:
- return node.value
- node = node.next
- return -1
- class HashSet:
- def __init__(self, key):
- self.map = HashMap(10)
- self.map.insert(key, "Present")
- def append(self, key):
- try:
- self.map.insert(key, "Present")
- except Exception:
- pass
- def get(self):
- string = ""
- nodes = self.map.get_all_nodes()
- for node in nodes:
- string += str(node.key) + " "
- return string
- class InvertedIndex:
- def __init__(self): # ii = Iventdf(234,34)
- self.map = HashMap()
- def index_document(self, document, document_id):
- for word in document.split(" "):
- # print(word, self.map.arr, document.split(" "))
- if self.map[word] != -1:
- self.map.insert(word, str(document_id))
- else:
- self.map.insert(word, HashSet(str(document_id)))
- def search(self, word):
- hs_word = self.map[word]
- if hs_word != -1:
- documents = hs_word.get()
- return documents
- return hs_word
- def get_average(self):
- nodes = self.map.get_all_nodes()
- size = 0
- counter = 0
- for node in nodes:
- size += node.value.map.size
- counter+= 1
- return int(round(size/counter ,0)) # 1.0
- # !/bin/python3
- import math
- import os
- import random
- import re
- import sys
- if __name__ == '__main__':
- num_documents = int(input().strip())
- documents = []
- for _ in range(num_documents):
- documents_item = input()
- documents.append(documents_item)
- num_queries = int(input().strip())
- queries = []
- for _ in range(num_queries):
- queries_item = input()
- queries.append(queries_item)
- # Write your code here
- index = InvertedIndex()
- for i in range(len(documents)):
- index.index_document(documents[i], i)
- for word in queries:
- print(index.search(word))
- print(index.get_average())
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement