Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import os
- import re
- import sklearn
- import numpy as np
- import tensorflow as tf
- def main():
- path = './normalized/'
- data = []
- for i in range(51):
- data.append([])
- labels = []
- for f in os.listdir(path):
- m = re.match(r'(\w)_\d\.txt', f)
- if (m == None):
- continue
- char = m.group(1)
- with open(os.path.join(path + f), 'r') as lines:
- i = 0
- for line in lines:
- data[i].append(int(line.rstrip()))
- i += 1
- labels.append(0 if char == 'a' else 1 if char == 'b' else 2)
- # construct the ml stuff
- features = {}
- feature_columns = []
- for i in range(len(data)):
- key = str(i)
- features[key] = np.array(data[i])
- feature_columns.append(tf.feature_column.numeric_column(key=key))
- # Build 2 hidden layer DNN with 10, 10 units respectively.
- classifier = tf.estimator.DNNClassifier(
- feature_columns=feature_columns,
- # Two hidden layers of 10 nodes each.
- hidden_units=[10, 10],
- # The model must choose between 3 classes.
- n_classes=3)
- #classifier = tf.estimator.LinearClassifier(
- # feature_columns=feature_columns,
- # # The model must choose between 3 classes.
- # n_classes=3)
- batch_size = 100
- train_steps = 1000
- # Train the Model.
- classifier.train(
- input_fn=lambda:train_input_fn(features, np.array(labels),
- batch_size),
- steps=train_steps)
- # Evaluate the model.
- eval_result = classifier.evaluate(
- input_fn=lambda:eval_input_fn(features, np.array(labels),
- batch_size))
- print('\nTest set accuracy: {accuracy:0.3f}\n'.format(**eval_result))
- test_data = []
- test_labels = ['2', '0', '1']
- for i in range(51):
- test_data.append([])
- with open(os.path.join(path + 'c.txt'), 'r') as lines:
- i = 0
- for line in lines:
- test_data[i].append(int(line.rstrip()))
- i += 1
- with open(os.path.join(path + 'a.txt'), 'r') as lines:
- i = 0
- for line in lines:
- test_data[i].append(int(line.rstrip()))
- i += 1
- with open(os.path.join(path + 'b.txt'), 'r') as lines:
- i = 0
- for line in lines:
- test_data[i].append(int(line.rstrip()))
- i += 1
- test_features = {}
- for i in range(len(test_data)):
- test_features[str(i)] = np.array(test_data[i])
- predictions = classifier.predict(
- input_fn=lambda:eval_input_fn(test_features,
- labels=None,
- batch_size=batch_size))
- template = ('\nPrediction is "{}" ({:.1f}%), expected "{}"')
- for pred_dict, expec in zip(predictions, test_labels):
- class_id = pred_dict['class_ids'][0]
- probability = pred_dict['probabilities'][class_id]
- print(template.format(class_id,
- 100 * probability, expec))
- def train_input_fn(features, labels, batch_size):
- """An input function for training"""
- # Convert the inputs to a Dataset.
- dataset = tf.data.Dataset.from_tensor_slices((dict(features), labels))
- # Shuffle, repeat, and batch the examples.
- #dataset = dataset.shuffle(1000).repeat().batch(batch_size)
- dataset = dataset.repeat().batch(batch_size)
- # Return the dataset.
- return dataset
- def eval_input_fn(features, labels, batch_size):
- """An input function for evaluation or prediction"""
- features=dict(features)
- if labels is None:
- # No labels, use only features.
- inputs = features
- else:
- inputs = (features, labels)
- # Convert the inputs to a Dataset.
- dataset = tf.data.Dataset.from_tensor_slices(inputs)
- # Batch the examples
- assert batch_size is not None, "batch_size must not be None"
- dataset = dataset.batch(batch_size)
- # Return the dataset.
- return dataset
- main()
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement