Untitled

import numpy as np
import itertools

class RBM:

  def __init__(self, num_visible, num_hidden, learning_rate = 0.1,max_epochs = 1000):
    self.num_hidden = num_hidden
    self.num_visible = num_visible
    self.learning_rate = learning_rate
    self.norm_dict = {}
    # Initialize a weight matrix, of dimensions (num_visible x num_hidden), using
    # a Gaussian distribution with mean 0 and standard deviation 0.1.
    self.weights = 0.1 * np.random.randn(self.num_visible, self.num_hidden)
    # Insert weights for the bias units into the first row and first column.
    self.weights = np.insert(self.weights, 0, 0, axis = 0)
    self.weights = np.insert(self.weights, 0, 0, axis = 1)
    self.max_epochs = max_epochs

  def fit(self, data):
    """
    Train the machine.

    Parameters
    ----------
    data: A matrix where each row is a training example consisting of the states of visible units.
    """

    num_examples = data.shape[0]

    # Insert bias units of 1 into the first column.
    data = np.insert(data, 0, 1, axis = 1)

    for epoch in range(self.max_epochs):
      # Clamp to the data and sample from the hidden units.
      # (This is the "positive CD phase", aka the reality phase.)
      pos_hidden_activations = np.dot(data, self.weights)
      pos_hidden_probs = self._logistic(pos_hidden_activations)
      pos_hidden_states = pos_hidden_probs > np.random.rand(num_examples, self.num_hidden + 1)

      tmp = np.array(pos_hidden_states).astype(float)
      pos_visible_states = self.run_hidden(tmp[:,1:])

      # While alternating h and v's are generated, remember these in a dictionary
      # so the universe of h,v can later be used to sum over all probabilities for
      # the normalization constant
      for h,v in itertools.izip(pos_hidden_states.astype(float), pos_visible_states):
        v = np.insert(v, 0, 1)
        self.norm_dict[(tuple(h),tuple(v))] = 1

      # Note that we're using the activation *probabilities* of the hidden states, not the hidden states
      # themselves, when computing associations. We could also use the states; see section 3 of Hinton's
      # "A Practical Guide to Training Restricted Boltzmann Machines" for more.
      pos_associations = np.dot(data.T, pos_hidden_probs)

      # Reconstruct the visible units and sample again from the hidden units.
      # (This is the "negative CD phase", aka the daydreaming phase.)
      neg_visible_activations = np.dot(pos_hidden_states, self.weights.T)
      neg_visible_probs = self._logistic(neg_visible_activations)
      neg_visible_probs[:,0] = 1 # Fix the bias unit.
      neg_hidden_activations = np.dot(neg_visible_probs, self.weights)
      neg_hidden_probs = self._logistic(neg_hidden_activations)
      # Note, again, that we're using the activation *probabilities* when computing associations, not the states
      # themselves.
      neg_associations = np.dot(neg_visible_probs.T, neg_hidden_probs)

      # Update weights.
      self.weights += self.learning_rate * ((pos_associations - neg_associations) / num_examples)

      error = np.sum((data - neg_visible_probs) ** 2)
      #print("Epoch %s: error is %s" % (epoch, error))

    self.norm_c = self.norm_constant()

  # TODO: Remove the code duplication between this method and `run_visible`?
  def run_hidden(self, data):
    """
    Assuming the RBM has been trained (so that weights for the network have been learned),
    run the network on a set of hidden units, to get a sample of the visible units.

    Parameters
    ----------
    data: A matrix where each row consists of the states of the hidden units.

    Returns
    -------
    visible_states: A matrix where each row consists of the visible units activated from the hidden
    units in the data matrix passed in.
    """

    num_examples = data.shape[0]

    # Create a matrix, where each row is to be the visible units (plus a bias unit)
    # sampled from a training example.
    visible_states = np.ones((num_examples, self.num_visible + 1))

    # Insert bias units of 1 into the first column of data.
    data = np.insert(data, 0, 1, axis = 1)

    # Calculate the activations of the visible units.
    visible_activations = np.dot(data, self.weights.T)
    # Calculate the probabilities of turning the visible units on.
    visible_probs = self._logistic(visible_activations)
    # Turn the visible units on with their specified probabilities.
    visible_states[:,:] = visible_probs > np.random.rand(num_examples, self.num_visible + 1)

    # Ignore the bias units.
    visible_states = visible_states[:,1:]
    return visible_states

  def run_visible(self, data):
    """
    Assuming the RBM has been trained (so that weights for the network have been learned),
    run the network on a set of visible units, to get a sample of the hidden units.

    Parameters
    ----------
    data: A matrix where each row consists of the states of the visible units.

    Returns
    -------
    hidden_states: A matrix where each row consists of the hidden units activated from the visible
    units in the data matrix passed in.
    """

    num_examples = data.shape[0]

    # Create a matrix, where each row is to be the hidden units (plus a bias unit)
    # sampled from a training example.
    hidden_states = np.ones((num_examples, self.num_hidden + 1))

    # Insert bias units of 1 into the first column of data.
    data = np.insert(data, 0, 1, axis = 1)

    # Calculate the activations of the hidden units.
    hidden_activations = np.dot(data, self.weights)
    # Calculate the probabilities of turning the hidden units on.
    hidden_probs = self._logistic(hidden_activations)
    # Turn the hidden units on with their specified probabilities.
    hidden_states[:,:] = hidden_probs > np.random.rand(num_examples, self.num_hidden + 1)
    # Ignore the bias units.
    hidden_states = hidden_states[:,1:]
    return hidden_states

  def norm_constant(self):
    """
    Calculates the normalization constant
    """
    sum = 0
    for h,v in self.norm_dict:
      h = np.array(h); v = np.array(v)
      sum += np.dot(np.dot(h.T,self.weights.T), v)
    return sum

  def predict_proba(self, X):
    hs = self.run_visible(X)
    hs = np.insert(hs, 0, 1,axis=1)
    res = []
    for i in range(len(X)):
      tmp = np.dot(hs[i],self.weights.T)
      res.append(np.dot(tmp.T,np.insert(X[i], 0, 1)))
    return np.array(res) / self.norm_c

  def _logistic(self, x):
    return 1.0 / (1 + np.exp(-x))

if __name__ == '__main__':
  from sklearn import neighbors
  from sklearn.cross_validation import train_test_split
  import numpy as np

  x = np.loadtxt('binarydigits.txt')
  labels = np.ravel(np.loadtxt('bindigitlabels.txt'))
  X_train, X_test, y_train, y_test = train_test_split(x, labels, test_size=0.2,random_state=0)
  print X_train.shape

  clfs = {}
  for label in [0,5,7]:
      x = X_train[y_train==label]
      print x.shape
      clf = RBM(num_visible = 64, num_hidden = 2, max_epochs = 500)
      clf.fit(x)
      clfs[label] = clf

  res = []
  for label in [0,5,7]:
      res.append(clfs[label].predict_proba(X_test))
  res3 = np.argmax(np.array(res).T,axis=1)
  res3[res3==1] = 5
  res3[res3==2] = 7
  res3 = map(lambda x: float(x), res3)
  print 'RBM', np.sum(res3==y_test) / float(len(y_test))

  clf = neighbors.KNeighborsClassifier()
  clf.fit(X_train,y_train)
  res3 = clf.predict(X_test)
  print 'KNN', np.sum(res3==y_test) / float(len(y_test))