Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # 1: Imports.
- import numpy as np
- # 2: Invocation. Requires format string + any # of input args.
- arg0 = np.random.normal(...)
- arg1 = np.random.normal(...)
- ...
- argn = np.random.normal(...)
- # ...
- dst = np.einsum(" ", arg0, arg1, ..., argn)
- # 3: Format string. Incomplete example with 3 input args.
- dst = np.einsum(" , , -> ", arg0, arg1, arg2)
- # 4: Format string. Incomplete example with 3 input args.
- dst = np.einsum(" , , -> ", arg0, arg1, arg2)
- assert arg0.ndim == len(" ") # (Order-2 Tensor)
- assert arg1.ndim == len(" ") # (Order-3 Tensor)
- assert arg2.ndim == len(" ") # (Order-2 Tensor)
- assert dst .ndim == len(" ") # (Order-4 Tensor)
- # 5: Format string. Complete examples with 1 and 2 args.
- s = np.einsum("a->", v )
- T = np.einsum("ij->ji", M )
- C = np.einsum("mn,np->mp", A, B)
- assert v.ndim == len("a")
- assert s.ndim == len("")
- assert M.ndim == len("ij")
- assert T.ndim == len("ji")
- assert A.ndim == len("mn")
- assert B.ndim == len("np")
- assert C.ndim == len("mp")
- # 6: Elaborated Example. Matrix multiplication.
- # C = A * B
- # Ni x Nj = Ni x Nk * Nk x Nj
- # | ^-----^ |
- # | Match |
- # | |
- # -------- ---------
- # V
- # Ni x Nj <------- Ni x Nj
- C = np.empty((Ni,Nj))
- for i in xrange(Ni):
- for j in xrange(Nj):
- total = 0
- for k in xrange(Nk):
- total += A[i,k]*B[k,j]
- C[i,j] = total
- C = np.einsum("ik,kj->ij", A, B)
- # 7. Free Indices
- C = np.empty((Ni,Nj))
- for i in xrange(Ni):
- for j in xrange(Nj):
- total = 0
- for k in xrange(Nk):
- total += A[i,k]*B[k,j]
- C[i,j] = total
- C = np.einsum("ik,kj->ij", A, B)
- # 8. Summation Indices
- C = np.empty((Ni,Nj))
- for i in xrange(Ni):
- for j in xrange(Nj):
- total = 0
- for k in xrange(Nk):
- total += A[i,k]*B[k,j]
- C[i,j] = total
- C = np.einsum("ik,kj->ij", A, B)
- # 9. Elaborated Example. Matrix diagonal extraction.
- d = np.empty((Ni))
- for i in xrange(Ni):
- total = 0
- total += M[i,i]
- d[i] = total
- d = np.einsum("ii->i", M)
- # 10. Elaborated Example. Matrix trace.
- Tr = 0 # Scalar! Has dimension 0 and no indexes
- total = 0
- for i in xrange(Ni):
- total += M[i,i]
- Tr = total
- Tr = np.einsum("ii->", M)
- # 11. Elaborated Example. Quadratic Form.
- x = 0
- total = 0
- for s in xrange(Ns):
- for t in xrange(Nt):
- total += v[s]*M[s,t]*v[t]
- x = total
- x = np.einsum("s,st,t->", v, M, v)
- # 12. Elaborated example. Batched outer product.
- R = np.empty((NB,ni,nj))
- for B in xrange(NB):
- for i in xrange(Ni):
- for j in xrange(Nj):
- total = 0
- total += P[B,i]*Q[B,j]
- R[B,i,j] = total
- R = np.einsum("Bi,Bj->Bij", P, Q)
- # 13. Natural consequences of einsum definition.
- # Requirements on size of individual axes.
- C = np.einsum("ik,kj->ij", A, B)
- assert A.shape == (Ni,Nk)
- assert B.shape == (Nk,Nj)
- assert C.shape == (Ni,Nj)
- # Requirement for identical size of certain axes due to
- # shared index label.
- # Example 1: Matrix Multiplication
- C = np.einsum("ik,kj->ij", A, B)
- assert A.shape[1] == B.shape[0] # == Nk
- assert A.shape[0] == C.shape[0] # == Ni
- assert B.shape[1] == C.shape[1] # == Nj
- # Example 2: Matrix Diagonal Extraction
- d = np.einsum("ii->i", D)
- assert D.shape[0] == D.shape[1] # == Ni
- assert D.shape[1] == d.shape[0] # == Ni
- # 14: Format Strings. Rules.
- # Bad. Number of input index groups doesn't match number of
- # arguments.
- np.einsum("ab,bc->ac", A)
- # Bad. Indexes must be ASCII upper/lowercase letters.
- np.einsum("012,1^%->:;?", A, B)
- # Bad. Argument 0 has 3 dimensions but only 2 indexes are
- # given.
- A = np.random.normal(size = (2,3,4))
- B = np.random.normal(size = (4,5,6))
- np.einsum("ab,bcd->a", A, B)
- # Bad. One of the output indexes isn't in the set of all
- # input indexes.
- np.einsum("ab,bc->acz", A, B)
- # Bad. Output has a repeated index.
- np.einsum("ab,bc->baa", A, B)
- # Bad. Mismatches in the sizes of input argument axes
- # that are labelled with the same index.
- A = np.random.normal(size = (2,3,4))
- B = np.random.normal(size = (3,4,5))
- np.einsum("ckj,cqq->c", A, B)
- assert A.shape[0] == B.shape[0] # ERROR: 2 != 3
- assert B.shape[1] == B.shape[2] # ERROR: 4 != 5
- # 15: MLP Backprop done easily (stochastic version).
- # h = sigmoid(Wx + b)
- # y = softmax(Vh + c)
- Ni = 784
- Nh = 500
- No = 10
- W = np.random.normal(size = (Nh,Ni)) # Nh x Ni
- b = np.random.normal(size = (Nh,)) # Nh
- V = np.random.normal(size = (No,Nh)) # No x Nh
- c = np.random.normal(size = (No,)) # No
- # Load x and t...
- x, t = train_set[k]
- # With a judicious, consistent choice of index labels, we can
- # express fprop() and bprop() extremely tersely; No thought
- # needs to be given about the details of shoehorning matrices
- # into np.dot(), such as the exact argument order and the
- # required transpositions.
- #
- # Let
- #
- # 'i' be the input dimension label.
- # 'h' be the hidden dimension label.
- # 'o' be the output dimension label.
- #
- # Then
- # Fprop
- ha = np.einsum("hi, i -> h", W, x) + b
- h = sigmoid(ha)
- ya = np.einsum("oh, h -> o", V, h) + c
- y = softmax(ya)
- # Bprop
- dLdya = y - t
- dLdV = np.einsum("h , o -> oh", h, dLdya)
- dLdc = dLdya
- dLdh = np.einsum("oh, o -> h ", V, dLdya)
- dLdha = dLdh * sigmoidgrad(ha)
- dLdW = np.einsum("i, h -> hi", x, dLdha)
- dLdb = dLdha
- # 16: MLP Backprop done easily (batch version).
- # But we want to exploit hardware with a batch version!
- # This is trivially implemented with simple additions
- # to np.einsum's format string, in addition to the usual
- # averaging logic required when handling batches. We
- # implement even that logic with einsum for demonstration
- # and elegance purposes.
- batch_size = 128
- # Let
- # 'B' be the batch dimension label.
- # 'i' be the input dimension label.
- # 'h' be the hidden dimension label.
- # 'o' be the output dimension label.
- #
- # Then
- # Fprop
- ha = np.einsum("hi, Bi -> Bh", W, x) + b
- h = sigmoid(ha)
- ya = np.einsum("oh, Bh -> Bo", V, h) + c
- y = softmax(ya)
- # Bprop
- dLdya = y - t
- dLdV = np.einsum("Bh, Bo -> oh", h, dLdya) / batch_size
- dLdc = np.einsum("Bo -> o ", dLdya) / batch_size
- dLdh = np.einsum("oh, Bo -> Bh", V, dLdya)
- dLdha = dLdh * sigmoidgrad(ha)
- dLdW = np.einsum("Bi, Bh -> hi", x, dLdha) / batch_size
- dLdb = np.einsum("Bh -> h ", dLdha) / batch_size
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement