Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- {
- "cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "window_size = 8\n",
- "alpha = 0.025 # learning rate\n",
- "p = 100 # p = dimensions of document vectors (no. of features)\n",
- "m = len(vocab) # number of words in the corpus \n",
- "\n",
- "D = np.random.rand(p, n) # matrix of document embeddings\n",
- "U = scipy.stats.truncnorm.rvs(-2, 2, loc=0, scale=1, size=(m, p)) # matrix of softmax weights\n",
- "\n",
- "epochs = 1\n",
- "for epoch in range(epochs):\n",
- " for i in range(n):\n",
- " # Feed-forward\n",
- " d = np.array(np.zeros(n), ndmin=2).T\n",
- " d[i] = 1\n",
- " \n",
- " e = np.array(np.dot(D, d), ndmin=2)\n",
- " k = np.array(np.dot(U, e), ndmin=2)\n",
- " t_hat = softmax(k)\n",
- " \n",
- " doc_words = documents[i]\n",
- " middle = randint(window_size, len(doc_words) - window_size - 1)\n",
- "\n",
- " #window_words = [] \n",
- " \n",
- " errors_out = (np.array(np.zeros(m), ndmin=2).T)\n",
- " errors_middle = (np.array(np.zeros(p), ndmin=2).T)\n",
- " \n",
- " for c in range(middle - window_size, middle + window_size):\n",
- " #window_words.append(doc_words[c])\n",
- " \n",
- " t = (np.array(np.zeros(len(vocab)), ndmin=2).T)\n",
- " t[vocab.index(doc_words[c])] = 1\n",
- " errors_out += t_hat - t\n",
- " errors_middle += np.dot(U.T, errors_out)\n",
- " \n",
- " if c == middle:\n",
- " print(cross_entropy_loss(t, t_hat))\n",
- "\n",
- " # Backprogation\n",
- " U += - alpha * np.dot(errors_out, e.T)\n",
- " D += - alpha * np.dot(errors_middle, d.T)"
- ]
- }
- ],
- "metadata": {
- "kernelspec": {
- "display_name": "Python [default]",
- "language": "python",
- "name": "python3"
- },
- "language_info": {
- "codemirror_mode": {
- "name": "ipython",
- "version": 3
- },
- "file_extension": ".py",
- "mimetype": "text/x-python",
- "name": "python",
- "nbconvert_exporter": "python",
- "pygments_lexer": "ipython3",
- "version": "3.6.3"
- },
- "toc": {
- "nav_menu": {},
- "number_sections": true,
- "sideBar": true,
- "skip_h1_title": false,
- "toc_cell": false,
- "toc_position": {},
- "toc_section_display": "block",
- "toc_window_display": false
- },
- "varInspector": {
- "cols": {
- "lenName": 16,
- "lenType": 16,
- "lenVar": 40
- },
- "kernels_config": {
- "python": {
- "delete_cmd_postfix": "",
- "delete_cmd_prefix": "del ",
- "library": "var_list.py",
- "varRefreshCmd": "print(var_dic_list())"
- },
- "r": {
- "delete_cmd_postfix": ") ",
- "delete_cmd_prefix": "rm(",
- "library": "var_list.r",
- "varRefreshCmd": "cat(var_dic_list()) "
- }
- },
- "types_to_exclude": [
- "module",
- "function",
- "builtin_function_or_method",
- "instance",
- "_Feature"
- ],
- "window_display": false
- }
- },
- "nbformat": 4,
- "nbformat_minor": 2
- }
Add Comment
Please, Sign In to add comment