Untitled

import numpy as np
from random import randint

class _Node:
    def __init__(self, att=0, list_value=None, list_Obidset=None, count=None, pos=0, num_class=0):
        if list_value is None:
            list_value = []
        if list_Obidset is None:
            list_Obidset = []
        if count is None:
            count = [0] * num_class
        self.att = att
        self.list_value = list_value
        self.list_Obidset = list_Obidset
        self.count = count
        self.pos = pos

class _NodeRule:
    def __init__(self, att, list_value, label_pos, rule_sup, conf):
        self.att = att
        self.list_value = list_value
        self.label_pos = label_pos
        self.rule_sup = rule_sup
        self.conf = conf


class CarMiner:
    'Car-Miner estimator'
    def __init__(self, minSup, minConf, verbose=False):
          self.minSup = minSup
          self.minConf = minConf
          self.verbose = verbose

    # Ham lay GIAO 2 list
    def __Intersect(self, x, y):
        z = []
        for item in x:
            if item in y:
                z.append(item)
        return z

    # Ham lay HOP 2 list
    def __Union(self, x, y):
        return list(set(x + y))

    def __BuildOneItemset(self, list_Lr, X, y):
        # Xay dung danh sach cac nut 1-itemset pho bien
        for i in range(self.num_attr):
            temp = X[:,i]
            # Duyet lan 1 de dinh dang lai du lieu doc: itemset <=> Cac Obidset
            list_ValueTemp = np.unique(temp)
            list_ObidsetTemp = []
            list_count = []

            # Lay ra cac Obidset, count ma item don xuat hien, item j se ung voi list obidset thu j
            for value1 in list_ValueTemp:
                Obidset_temp = []
                for k in range(len(temp)):
                    if temp[k] == value1:
                        Obidset_temp.append(k)
                list_ObidsetTemp.append(Obidset_temp)

                #Dem so dong tuong ung voi so lop
                count_temp = [0] * self.num_class
                for k in Obidset_temp:
                    count_temp[y[k]] += 1
                list_count.append(count_temp)

            # Add node to list_Lr
            for j in range(len(list_ValueTemp)):
                # Check if >= self.minSup
                if np.max(list_count[j]) < self.minSup * self.num_sample:
                    continue
                node = _Node(
                    2**i,
                    [list_ValueTemp[j]],
                    list_ObidsetTemp[j],
                    list_count[j],
                    np.argmax(list_count[j]),
                    self.num_class
                )
                # Add node to list_Lr
                list_Lr.append(node)

    def __CarMiner(self, list_Lr, y):
        for i in range(len(list_Lr)):
            conf = list_Lr[i].count[list_Lr[i].pos] / len(list_Lr[i].list_Obidset)
            if conf >= self.minConf:
                node_Rule = _NodeRule(
                    list_Lr[i].att,
                    list_Lr[i].list_value,
                    list_Lr[i].pos,
                    list_Lr[i].count[list_Lr[i].pos],
                    conf
                )
                self.CARs.append(node_Rule)

            Pi = []
            for j in range(i+1, len(list_Lr)):
                if list_Lr[i].att != list_Lr[j].att: # Menh de 1
                    node = _Node(
                        list_Lr[i].att | list_Lr[j].att,
                        self.__Union(list_Lr[i].list_value, list_Lr[j].list_value),
                        self.__Intersect(list_Lr[i].list_Obidset, list_Lr[j].list_Obidset),
                        num_class=self.num_class
                    )

                    if len(node.list_Obidset) == len(list_Lr[i].list_Obidset): # Menh de 2
                        node.count = list_Lr[i].count
                        node.pos = list_Lr[i].pos

                    elif len(node.list_Obidset) == len(list_Lr[j].list_Obidset): # Menh de 2
                        node.count = list_Lr[j].count
                        node.pos = list_Lr[j].pos

                    elif len(node.list_Obidset) == 0:
                        continue

                    else:
                        for k in node.list_Obidset:
                            node.count[y[k]] += 1
                        node.pos = np.argmax(node.count)

                    if node.count[node.pos] >= self.minSup  * self.num_sample:
                        Pi.append(node)
            self.__CarMiner(Pi, y)

    def fit(self, X, y):
        self.num_class = len(np.unique(y))
        self.num_attr = X.shape[1]
        self.num_sample = X.shape[0]
        self.list_Lr = []
        self.CARs = []
        self.__BuildOneItemset(self.list_Lr, X, y)
        self.__CarMiner(self.list_Lr, y)
        self.CARs = sorted(self.CARs, key=lambda k: (k.conf, k.rule_sup), reverse=True)
        # Print results
        if self.verbose:
            print("Number of Rules: %d" % len(self.CARs))

    def predict(self, X, verbose=False):
        y_pred = np.asarray([])
        randomPredict = 0
        for x in X:
            flag = False
            for rule in self.CARs:
                x_value = []
                att = rule.att
                for i in range(self.num_attr):
                    if att % 2 == 1:
                        x_value.append(x[i])
                    att = att >> 1
                # Compare x_value with rule's value
                if x_value == rule.list_value:
                    y_pred = np.append(y_pred, [rule.label_pos], axis=0)
                    flag = True
                    break
            # If no rule matched, randomly predict label
            if not flag:
                randomPredict += 1
                y_pred = np.append(y_pred, [randint(0, self.num_class - 1)], axis=0)
        if verbose:
            print("Number of randomly predicted items: %d" % randomPredict)
        return y_pred