Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import numpy as np
- from random import randint
- class _Node:
- def __init__(self, att=0, list_value=None, list_Obidset=None, count=None, pos=0, num_class=0):
- if list_value is None:
- list_value = []
- if list_Obidset is None:
- list_Obidset = []
- if count is None:
- count = [0] * num_class
- self.att = att
- self.list_value = list_value
- self.list_Obidset = list_Obidset
- self.count = count
- self.pos = pos
- class _NodeRule:
- def __init__(self, att, list_value, label_pos, rule_sup, conf):
- self.att = att
- self.list_value = list_value
- self.label_pos = label_pos
- self.rule_sup = rule_sup
- self.conf = conf
- class CarMiner:
- 'Car-Miner estimator'
- def __init__(self, minSup, minConf, verbose=False):
- self.minSup = minSup
- self.minConf = minConf
- self.verbose = verbose
- # Ham lay GIAO 2 list
- def __Intersect(self, x, y):
- z = []
- for item in x:
- if item in y:
- z.append(item)
- return z
- # Ham lay HOP 2 list
- def __Union(self, x, y):
- return list(set(x + y))
- def __BuildOneItemset(self, list_Lr, X, y):
- # Xay dung danh sach cac nut 1-itemset pho bien
- for i in range(self.num_attr):
- temp = X[:,i]
- # Duyet lan 1 de dinh dang lai du lieu doc: itemset <=> Cac Obidset
- list_ValueTemp = np.unique(temp)
- list_ObidsetTemp = []
- list_count = []
- # Lay ra cac Obidset, count ma item don xuat hien, item j se ung voi list obidset thu j
- for value1 in list_ValueTemp:
- Obidset_temp = []
- for k in range(len(temp)):
- if temp[k] == value1:
- Obidset_temp.append(k)
- list_ObidsetTemp.append(Obidset_temp)
- #Dem so dong tuong ung voi so lop
- count_temp = [0] * self.num_class
- for k in Obidset_temp:
- count_temp[y[k]] += 1
- list_count.append(count_temp)
- # Add node to list_Lr
- for j in range(len(list_ValueTemp)):
- # Check if >= self.minSup
- if np.max(list_count[j]) < self.minSup * self.num_sample:
- continue
- node = _Node(
- 2**i,
- [list_ValueTemp[j]],
- list_ObidsetTemp[j],
- list_count[j],
- np.argmax(list_count[j]),
- self.num_class
- )
- # Add node to list_Lr
- list_Lr.append(node)
- def __CarMiner(self, list_Lr, y):
- for i in range(len(list_Lr)):
- conf = list_Lr[i].count[list_Lr[i].pos] / len(list_Lr[i].list_Obidset)
- if conf >= self.minConf:
- node_Rule = _NodeRule(
- list_Lr[i].att,
- list_Lr[i].list_value,
- list_Lr[i].pos,
- list_Lr[i].count[list_Lr[i].pos],
- conf
- )
- self.CARs.append(node_Rule)
- Pi = []
- for j in range(i+1, len(list_Lr)):
- if list_Lr[i].att != list_Lr[j].att: # Menh de 1
- node = _Node(
- list_Lr[i].att | list_Lr[j].att,
- self.__Union(list_Lr[i].list_value, list_Lr[j].list_value),
- self.__Intersect(list_Lr[i].list_Obidset, list_Lr[j].list_Obidset),
- num_class=self.num_class
- )
- if len(node.list_Obidset) == len(list_Lr[i].list_Obidset): # Menh de 2
- node.count = list_Lr[i].count
- node.pos = list_Lr[i].pos
- elif len(node.list_Obidset) == len(list_Lr[j].list_Obidset): # Menh de 2
- node.count = list_Lr[j].count
- node.pos = list_Lr[j].pos
- elif len(node.list_Obidset) == 0:
- continue
- else:
- for k in node.list_Obidset:
- node.count[y[k]] += 1
- node.pos = np.argmax(node.count)
- if node.count[node.pos] >= self.minSup * self.num_sample:
- Pi.append(node)
- self.__CarMiner(Pi, y)
- def fit(self, X, y):
- self.num_class = len(np.unique(y))
- self.num_attr = X.shape[1]
- self.num_sample = X.shape[0]
- self.list_Lr = []
- self.CARs = []
- self.__BuildOneItemset(self.list_Lr, X, y)
- self.__CarMiner(self.list_Lr, y)
- self.CARs = sorted(self.CARs, key=lambda k: (k.conf, k.rule_sup), reverse=True)
- # Print results
- if self.verbose:
- print("Number of Rules: %d" % len(self.CARs))
- def predict(self, X, verbose=False):
- y_pred = np.asarray([])
- randomPredict = 0
- for x in X:
- flag = False
- for rule in self.CARs:
- x_value = []
- att = rule.att
- for i in range(self.num_attr):
- if att % 2 == 1:
- x_value.append(x[i])
- att = att >> 1
- # Compare x_value with rule's value
- if x_value == rule.list_value:
- y_pred = np.append(y_pred, [rule.label_pos], axis=0)
- flag = True
- break
- # If no rule matched, randomly predict label
- if not flag:
- randomPredict += 1
- y_pred = np.append(y_pred, [randint(0, self.num_class - 1)], axis=0)
- if verbose:
- print("Number of randomly predicted items: %d" % randomPredict)
- return y_pred
Add Comment
Please, Sign In to add comment