Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- 1/14 fused_nn_conv2d_multiply_expand_dims_add_nn_relu_negative_nn_relu_multiply_add_2:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["f5fce0ea43fa89dfd53142957b424aa7", [1, 512, 512, 20], [1, 1, 20, 40], [40], [40], [1, 1, 1, 40], [1, 512, 512, 40]]
- placeholder = PLACEHOLDER [1, 512, 512, 20]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 20, 40]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [40]
- placeholder = PLACEHOLDER [40]
- T_multiply(ax0) = (placeholder[ax0]*placeholder[ax0])
- T_expand_dims(ax0, ax1, ax2, ax3) = T_multiply[ax3]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + T_expand_dims[ax0, 0, 0, ax3])
- T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
- placeholder = PLACEHOLDER [1, 1, 1, 40]
- T_negative(ax0, ax1, ax2, ax3) = (0f - T_add[ax0, ax1, ax2, ax3])
- T_relu(ax0, ax1, ax2, ax3) = max(T_negative[ax0, ax1, ax2, ax3], 0f)
- T_multiply(ax0, ax1, ax2, ax3) = (placeholder[ax0, 0, 0, ax3]*T_relu[ax0, ax1, ax2, ax3])
- T_add(ax0, ax1, ax2, ax3) = (T_relu[ax0, ax1, ax2, ax3] + T_multiply[ax0, ax1, ax2, ax3])
- 2/14 fused_nn_conv2d_add_fast_tanh_2:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["f96d216db5f296dab654242dc885f498", [1, 1, 1, 8], [1, 1, 8, 4], [1, 1, 1, 4], [1, 1, 1, 4]]
- placeholder = PLACEHOLDER [1, 1, 1, 8]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 8, 4]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [1, 1, 1, 4]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3])
- T_minimum(ax0, ax1, ax2, ax3) = min(T_add[ax0, ax1, ax2, ax3], 9f)
- T_maximum(ax0, ax1, ax2, ax3) = max(T_minimum[ax0, ax1, ax2, ax3], -9f)
- T_fast_tanh(ax0, ax1, ax2, ax3) = ((T_maximum[ax0, ax1, ax2, ax3]*(((T_maximum[ax0, ax1, ax2, ax3]*T_maximum[ax0, ax1, ax2, ax3] ..(OMITTED).. )*(((T_maximum[ax0, ax1, ax2, ax3]*T_maximum[ax0, ax1, ax2, ax3])*1.19826e-06f) + 0.000118535f)) + 0.00226843f)) + 0.00489353f))
- 3/14 fused_nn_conv2d_2:8
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["4f6690042e1fb8fac22d45ca04ee17a9", [1, 258, 258, 80], [3, 3, 80, 1], [1, 256, 256, 80]]
- placeholder = PLACEHOLDER [1, 258, 258, 80]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [3, 3, 80, 1]
- DepthwiseConv2d(b, i, j, c) += (PaddedInput[b, (i + di), (j + dj), c]*placeholder[di, dj, c, 0])
- 4/14 fused_nn_conv2d:2
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["4f6690042e1fb8fac22d45ca04ee17a9", [1, 514, 514, 40], [3, 3, 40, 1], [1, 512, 512, 40]]
- placeholder = PLACEHOLDER [1, 514, 514, 40]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [3, 3, 40, 1]
- DepthwiseConv2d(b, i, j, c) += (PaddedInput[b, (i + di), (j + dj), c]*placeholder[di, dj, c, 0])
- 5/14 fused_nn_conv2d_multiply_expand_dims_add:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["a69b4f95edfd32f7f4d07ab68a8a0aa6", [1, 512, 512, 40], [1, 1, 40, 40], [40], [40], [1, 512, 512, 40]]
- placeholder = PLACEHOLDER [1, 512, 512, 40]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 40, 40]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [40]
- placeholder = PLACEHOLDER [40]
- T_multiply(ax0) = (placeholder[ax0]*placeholder[ax0])
- T_expand_dims(ax0, ax1, ax2, ax3) = T_multiply[ax3]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + T_expand_dims[ax0, 0, 0, ax3])
- 6/14 fused_nn_conv2d_add_fast_tanh:3
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["f96d216db5f296dab654242dc885f498", [1, 1, 1, 8], [1, 1, 8, 40], [1, 1, 1, 40], [1, 1, 1, 40]]
- placeholder = PLACEHOLDER [1, 1, 1, 8]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 8, 40]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [1, 1, 1, 40]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3])
- T_minimum(ax0, ax1, ax2, ax3) = min(T_add[ax0, ax1, ax2, ax3], 9f)
- T_maximum(ax0, ax1, ax2, ax3) = max(T_minimum[ax0, ax1, ax2, ax3], -9f)
- T_fast_tanh(ax0, ax1, ax2, ax3) = ((T_maximum[ax0, ax1, ax2, ax3]*(((T_maximum[ax0, ax1, ax2, ax3]*T_maximum[ax0, ax1, ax2, ax3] ..(OMITTED).. )*(((T_maximum[ax0, ax1, ax2, ax3]*T_maximum[ax0, ax1, ax2, ax3])*1.19826e-06f) + 0.000118535f)) + 0.00226843f)) + 0.00489353f))
- 7/14 fused_nn_conv2d_add_fast_tanh_1:25
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["f96d216db5f296dab654242dc885f498", [1, 1, 1, 8], [1, 1, 8, 80], [1, 1, 1, 80], [1, 1, 1, 80]]
- placeholder = PLACEHOLDER [1, 1, 1, 8]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 8, 80]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [1, 1, 1, 80]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3])
- T_minimum(ax0, ax1, ax2, ax3) = min(T_add[ax0, ax1, ax2, ax3], 9f)
- T_maximum(ax0, ax1, ax2, ax3) = max(T_minimum[ax0, ax1, ax2, ax3], -9f)
- T_fast_tanh(ax0, ax1, ax2, ax3) = ((T_maximum[ax0, ax1, ax2, ax3]*(((T_maximum[ax0, ax1, ax2, ax3]*T_maximum[ax0, ax1, ax2, ax3] ..(OMITTED).. )*(((T_maximum[ax0, ax1, ax2, ax3]*T_maximum[ax0, ax1, ax2, ax3])*1.19826e-06f) + 0.000118535f)) + 0.00226843f)) + 0.00489353f))
- 8/14 fused_nn_conv2d_multiply_expand_dims_add_nn_relu_negative_nn_relu_multiply_add:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["f5fce0ea43fa89dfd53142957b424aa7", [1, 512, 512, 4], [1, 1, 4, 40], [40], [40], [1, 1, 1, 40], [1, 512, 512, 40]]
- placeholder = PLACEHOLDER [1, 512, 512, 4]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 4, 40]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [40]
- placeholder = PLACEHOLDER [40]
- T_multiply(ax0) = (placeholder[ax0]*placeholder[ax0])
- T_expand_dims(ax0, ax1, ax2, ax3) = T_multiply[ax3]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + T_expand_dims[ax0, 0, 0, ax3])
- T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
- placeholder = PLACEHOLDER [1, 1, 1, 40]
- T_negative(ax0, ax1, ax2, ax3) = (0f - T_add[ax0, ax1, ax2, ax3])
- T_relu(ax0, ax1, ax2, ax3) = max(T_negative[ax0, ax1, ax2, ax3], 0f)
- T_multiply(ax0, ax1, ax2, ax3) = (placeholder[ax0, 0, 0, ax3]*T_relu[ax0, ax1, ax2, ax3])
- T_add(ax0, ax1, ax2, ax3) = (T_relu[ax0, ax1, ax2, ax3] + T_multiply[ax0, ax1, ax2, ax3])
- 9/14 fused_nn_conv2d_multiply_expand_dims_add_add:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["3b642a36ecdff080fb7797fdf88a3592", [1, 512, 512, 40], [1, 1, 40, 4], [4], [4], [1, 512, 512, 4], [1, 512, 512, 4]]
- placeholder = PLACEHOLDER [1, 512, 512, 40]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 40, 4]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [4]
- placeholder = PLACEHOLDER [4]
- T_multiply(ax0) = (placeholder[ax0]*placeholder[ax0])
- T_expand_dims(ax0, ax1, ax2, ax3) = T_multiply[ax3]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + T_expand_dims[ax0, 0, 0, ax3])
- placeholder = PLACEHOLDER [1, 512, 512, 4]
- T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3])
- 10/14 fused_nn_conv2d_1:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["4f6690042e1fb8fac22d45ca04ee17a9", [1, 258, 258, 160], [3, 3, 160, 1], [1, 256, 256, 160]]
- placeholder = PLACEHOLDER [1, 258, 258, 160]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [3, 3, 160, 1]
- DepthwiseConv2d(b, i, j, c) += (PaddedInput[b, (i + di), (j + dj), c]*placeholder[di, dj, c, 0])
- 11/14 fused_nn_conv2d_add:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["1cc666833c122282e3fcf3595901b12b", [1, 1, 1, 1], [1, 1, 1, 232], [1, 1, 1, 232], [1, 1, 1, 232]]
- placeholder = PLACEHOLDER [1, 1, 1, 1]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 1, 232]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [1, 1, 1, 232]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + placeholder[ax0, ax1, ax2, ax3])
- 12/14 fused_nn_conv2d_multiply_expand_dims_add_multiply_add:8
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["bb9b318362b5e0751c1f053b058762f7", [1, 256, 256, 80], [1, 1, 80, 80], [80], [80], [1, 256, 256, 80], [1, 256, 256, 80]]
- placeholder = PLACEHOLDER [1, 256, 256, 80]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 80, 80]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [80]
- placeholder = PLACEHOLDER [80]
- T_multiply(ax0) = (placeholder[ax0]*placeholder[ax0])
- T_expand_dims(ax0, ax1, ax2, ax3) = T_multiply[ax3]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + T_expand_dims[ax0, 0, 0, ax3])
- compile_engine_const() = 1.5f
- placeholder = PLACEHOLDER [1, 256, 256, 80]
- T_multiply(ax0, ax1, ax2, ax3) = (compile_engine_const[]*placeholder[ax0, ax1, ax2, ax3])
- T_add(ax0, ax1, ax2, ax3) = (T_add[ax0, ax1, ax2, ax3] + T_multiply[ax0, ax1, ax2, ax3])
- 13/14 fused_nn_conv2d_multiply_expand_dims_add_1:1
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["a69b4f95edfd32f7f4d07ab68a8a0aa6", [1, 256, 256, 160], [1, 1, 160, 80], [80], [80], [1, 256, 256, 80]]
- placeholder = PLACEHOLDER [1, 256, 256, 160]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 160, 80]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [80]
- placeholder = PLACEHOLDER [80]
- T_multiply(ax0) = (placeholder[ax0]*placeholder[ax0])
- T_expand_dims(ax0, ax1, ax2, ax3) = T_multiply[ax3]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + T_expand_dims[ax0, 0, 0, ax3])
- 14/14 fused_nn_conv2d_multiply_expand_dims_add_nn_relu_negative_nn_relu_multiply_add_1:16
- Target :opencl -keys=mali,opencl,gpu -device=mali -max_num_threads=256 -thread_warp_size=1
- TargetHost:llvm -keys=cpu -link-params=0 -mtriple=arm64-linux-android
- Layout :2
- ["f5fce0ea43fa89dfd53142957b424aa7", [1, 256, 256, 80], [1, 1, 80, 80], [80], [80], [1, 1, 1, 80], [1, 256, 256, 80]]
- placeholder = PLACEHOLDER [1, 256, 256, 80]
- PaddedInput(i0, i1, i2, i3) = placeholder[i0, i1, i2, i3]
- placeholder = PLACEHOLDER [1, 1, 80, 80]
- Conv2dOutput(nn, yy, xx, ff) += (PaddedInput[nn, (yy + ry), (xx + rx), rc]*placeholder[ry, rx, rc, ff])
- placeholder = PLACEHOLDER [80]
- placeholder = PLACEHOLDER [80]
- T_multiply(ax0) = (placeholder[ax0]*placeholder[ax0])
- T_expand_dims(ax0, ax1, ax2, ax3) = T_multiply[ax3]
- T_add(ax0, ax1, ax2, ax3) = (Conv2dOutput[ax0, ax1, ax2, ax3] + T_expand_dims[ax0, 0, 0, ax3])
- T_relu(ax0, ax1, ax2, ax3) = max(T_add[ax0, ax1, ax2, ax3], 0f)
- placeholder = PLACEHOLDER [1, 1, 1, 80]
- T_negative(ax0, ax1, ax2, ax3) = (0f - T_add[ax0, ax1, ax2, ax3])
- T_relu(ax0, ax1, ax2, ax3) = max(T_negative[ax0, ax1, ax2, ax3], 0f)
- T_multiply(ax0, ax1, ax2, ax3) = (placeholder[ax0, 0, 0, ax3]*T_relu[ax0, ax1, ax2, ax3])
- T_add(ax0, ax1, ax2, ax3) = (T_relu[ax0, ax1, ax2, ax3] + T_multiply[ax0, ax1, ax2, ax3])
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement