Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "import numpy as np\n",
- "from keras.datasets import imdb\n",
- "from keras import models\n",
- "from keras import layers\n",
- "from keras import optimizers\n",
- "from keras import losses\n",
- "from keras import metrics\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Load the data, keeping only 10,000 of the most frequently occuring words\n",
- "(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words = 10000)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Check the first label\n",
- "train_labels[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Here is a list of maximum indexes in every review --- we search the maximum index in this\n",
- "print(type([max(sequence) for sequence in train_data]))\n",
- "# Find the maximum of all max indexes\n",
- "max([max(sequence) for sequence in train_data])\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Let's quickly decode a review\n",
- "# step 1: load the dictionary mappings from word to integer index\n",
- "word_index = imdb.get_word_index()\n",
- "# step 2: reverse word index to map integer indexes to their respective words\n",
- "reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])\n",
- "# Step 3: decode the review, mapping integer indices to words\n",
- "#\n",
- "# indices are off by 3 because 0, 1, and 2 are reserverd indices for \"padding\", \"Start of se\n",
- "decoded_review = ' '.join([reverse_word_index.get(i-3, '?') for i in train_data[0]])\n",
- "decoded_review"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "len(reverse_word_index)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "def vectorize_sequences(sequences, dimension=10000):\n",
- " results = np.zeros((len(sequences), dimension)) # Creates an all zero matrix of shape\n",
- " for i,sequence in enumerate(sequences):\n",
- " results[i,sequence] = 1\n",
- " return results\n",
- "\n",
- "# Vectorize training Data\n",
- "X_train = vectorize_sequences(train_data)\n",
- "# Vectorize testing Data\n",
- "X_test = vectorize_sequences(test_data)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "X_train[0]"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "X_train.shape"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#vectorize labels\n",
- "y_train = np.asarray(train_labels).astype('float32')\n",
- "y_test = np.asarray(test_labels).astype('float32')\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#model definition\n",
- "model = models.Sequential()\n",
- "model.add(layers.Dense(16, activation='relu', input_shape=(10000,)))\n",
- "model.add(layers.Dense(16, activation='relu'))\n",
- "model.add(layers.Dense(1, activation='sigmoid'))\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "#compiling model\n",
- "model.compile(\n",
- "optimizer=optimizers.RMSprop(learning_rate=0.001),\n",
- "loss = losses.binary_crossentropy,\n",
- "metrics = [metrics.binary_accuracy]\n",
- ")\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Input for Validation\n",
- "X_val = X_train[:10000]\n",
- "partial_X_train = X_train[10000:]\n",
- "# Labels for validation\n",
- "y_val = y_train[:10000]\n",
- "partial_y_train = y_train[10000:]\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "history = model.fit(\n",
- "partial_X_train,\n",
- "partial_y_train,\n",
- "epochs=20,\n",
- "batch_size=512,\n",
- "validation_data=(X_val, y_val)\n",
- ")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "history_dict = history.history\n",
- "history_dict.keys()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Plotting losses\n",
- "loss_values = history_dict['loss']\n",
- "val_loss_values = history_dict['val_loss']\n",
- "epochs = range(1, len(loss_values) + 1)\n",
- "plt.plot(epochs, loss_values, 'g', label=\"Training Loss\")\n",
- "plt.plot(epochs, val_loss_values, 'b', label=\"Validation Loss\")\n",
- "plt.title('Training and Validation Loss')\n",
- "plt.xlabel('Epochs')\n",
- "plt.ylabel('Loss Value')\n",
- "plt.legend()\n",
- "plt.show()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "\n",
- "# Training and Validation Accuracy\n",
- "acc_values = history_dict['binary_accuracy']\n",
- "val_acc_values = history_dict['val_binary_accuracy']\n",
- "epochs = range(1, len(loss_values) + 1)\n",
- "plt.plot(epochs, acc_values, 'g', label=\"Training Accuracy\")\n",
- "plt.plot(epochs, val_acc_values, 'b', label=\"Validation Accuracy\")\n",
- "plt.title('Training and Validation Accuraccy')\n",
- "plt.xlabel('Epochs')\n",
- "plt.ylabel('Accuracy')\n",
- "plt.legend()\n",
- "plt.show()\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# Making Predictions for testing data\n",
- "np.set_printoptions(suppress=True)\n",
- "result = model.predict(X_test)\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "result"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python 3 (ipykernel)",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.11.8"
- }
- },
- "nbformat": 4,
- "nbformat_minor": 4
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement