Untitled

WARNING: Logging before InitGoogleLogging() is written to STDERR
I0819 14:26:04.340821 12127 parallel.cpp:47] P2PManager::Init @ ntu329-ESC8000-G3-Series-Invalid-entry-length-16-Fixed-up-to-11
I0819 14:26:04.808403 12127 caffe.cpp:709] This is NVCaffe 0.17.2 started at Mon Aug 19 14:26:04 2019
I0819 14:26:04.808634 12127 caffe.cpp:711] CuDNN version: 7103
I0819 14:26:04.808640 12127 caffe.cpp:712] CuBLAS version: 9000
I0819 14:26:04.808645 12127 caffe.cpp:713] CUDA version: 9000
I0819 14:26:04.808648 12127 caffe.cpp:714] CUDA driver version: 9000
I0819 14:26:04.808655 12127 caffe.cpp:715] Arguments:
[0]: ./build/tools/caffe
[1]: train
[2]: --solver=examples/mnist/lenet_solver_fp16.prototxt
[3]: -gpu
[4]: all
I0819 14:26:04.811754 12127 caffe.cpp:220] Using GPUs 0, 1, 2, 3, 4, 5, 6, 7
I0819 14:26:04.852030 12127 gpu_memory.cpp:105] GPUMemory::Manager initialized
I0819 14:26:04.853935 12127 gpu_memory.cpp:107] Total memory: 11714691072, Free: 11253448704, dev_info[0]: total=11714691072 free=11253448704
I0819 14:26:04.855993 12127 gpu_memory.cpp:107] Total memory: 11715084288, Free: 11253448704, dev_info[1]: total=11714691072 free=11253448704
I0819 14:26:04.858072 12127 gpu_memory.cpp:107] Total memory: 11715084288, Free: 11253448704, dev_info[2]: total=11714691072 free=11253448704
I0819 14:26:04.860294 12127 gpu_memory.cpp:107] Total memory: 11715084288, Free: 11253448704, dev_info[3]: total=11714691072 free=11253448704
I0819 14:26:04.862391 12127 gpu_memory.cpp:107] Total memory: 11715084288, Free: 11253448704, dev_info[4]: total=11714691072 free=11253448704
I0819 14:26:04.864476 12127 gpu_memory.cpp:107] Total memory: 11715084288, Free: 11253448704, dev_info[5]: total=11714691072 free=11253448704
I0819 14:26:04.866564 12127 gpu_memory.cpp:107] Total memory: 11715084288, Free: 11253448704, dev_info[6]: total=11714691072 free=11253448704
I0819 14:26:04.868671 12127 gpu_memory.cpp:107] Total memory: 11715084288, Free: 11253448704, dev_info[7]: total=11714691072 free=11253448704
I0819 14:26:04.870052 12127 caffe.cpp:227] GPU 0: GeForce GTX 1080 Ti
I0819 14:26:04.871790 12127 caffe.cpp:227] GPU 1: GeForce GTX 1080 Ti
I0819 14:26:04.873723 12127 caffe.cpp:227] GPU 2: GeForce GTX 1080 Ti
I0819 14:26:04.875409 12127 caffe.cpp:227] GPU 3: GeForce GTX 1080 Ti
I0819 14:26:04.877002 12127 caffe.cpp:227] GPU 4: GeForce GTX 1080 Ti
I0819 14:26:04.878690 12127 caffe.cpp:227] GPU 5: GeForce GTX 1080 Ti
I0819 14:26:04.880345 12127 caffe.cpp:227] GPU 6: GeForce GTX 1080 Ti
I0819 14:26:04.881867 12127 caffe.cpp:227] GPU 7: GeForce GTX 1080 Ti
I0819 14:26:04.881983 12127 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:04.911844 12127 solver.cpp:43] Initializing solver from parameters:
test_iter: 100
test_interval: 500
base_lr: 0.006
display: 100
max_iter: 20000
lr_policy: "inv"
gamma: 0.0001
power: 0.75
momentum: 0.9
weight_decay: 0.0005
solver_mode: GPU
device_id: 0
net: "examples/mnist/lenet_train_test_fp16.prototxt"
train_state {
  level: 0
  stage: ""
}
solver_data_type: FLOAT16
I0819 14:26:04.912662 12127 solver.cpp:85] Creating training net from net file: examples/mnist/lenet_train_test_fp16.prototxt
I0819 14:26:04.913134 12127 net.cpp:459] The NetState phase (0) differed from the phase (1) specified by a rule in layer mnist
I0819 14:26:04.913177 12127 net.cpp:459] The NetState phase (0) differed from the phase (1) specified by a rule in layer accuracy
I0819 14:26:04.913367 12127 net.cpp:83] Initializing net from parameters:
name: "LeNet-fp16"
state {
  phase: TRAIN
  level: 0
  stage: ""
}
default_forward_type: FLOAT16
default_backward_type: FLOAT16
default_forward_math: FLOAT16
default_backward_math: FLOAT16
global_grad_scale: 1
layer {
  name: "mnist"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TRAIN
  }
  transform_param {
    scale: 0.00390625
  }
  data_param {
    source: "examples/mnist/mnist_train_lmdb"
    batch_size: 8
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "ip2"
  bottom: "label"
  top: "loss"
}
I0819 14:26:04.913514 12127 layer_factory.hpp:172] Creating layer 'mnist' of type 'Data'
I0819 14:26:04.913532 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:04.913703 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:04.914325 12127 net.cpp:202] Created Layer mnist (0)
I0819 14:26:04.914341 12127 net.cpp:544] mnist -> data
I0819 14:26:04.914397 12127 net.cpp:544] mnist -> label
I0819 14:26:04.914441 12127 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:04.916122 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:04.916152 12173 blocking_queue.cpp:40] Data layer prefetch queue empty
I0819 14:26:04.917896 12174 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:04.919813 12127 data_layer.cpp:200] [n0.d0.r0] Output data size: 8, 1, 28, 28
I0819 14:26:04.919858 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:04.919941 12127 net.cpp:262] Setting up mnist
I0819 14:26:04.919961 12127 net.cpp:269] TRAIN Top shape for layer 0 'mnist' 8 1 28 28 (6272)
I0819 14:26:04.919975 12127 net.cpp:269] TRAIN Top shape for layer 0 'mnist' 8 (8)
I0819 14:26:04.919986 12127 layer_factory.hpp:172] Creating layer 'conv1' of type 'Convolution'
I0819 14:26:04.919994 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:04.921756 12127 net.cpp:202] Created Layer conv1 (1)
I0819 14:26:04.921785 12127 net.cpp:574] conv1 <- data
I0819 14:26:04.921828 12127 net.cpp:544] conv1 -> conv1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:05.810344 12127 net.cpp:262] Setting up conv1
I0819 14:26:05.810415 12127 net.cpp:269] TRAIN Top shape for layer 1 'conv1' 8 20 24 24 (92160)
I0819 14:26:05.810488 12127 layer_factory.hpp:172] Creating layer 'pool1' of type 'Pooling'
I0819 14:26:05.810503 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.810540 12127 net.cpp:202] Created Layer pool1 (2)
I0819 14:26:05.810556 12127 net.cpp:574] pool1 <- conv1
I0819 14:26:05.810569 12127 net.cpp:544] pool1 -> pool1
I0819 14:26:05.810711 12127 net.cpp:262] Setting up pool1
I0819 14:26:05.810725 12127 net.cpp:269] TRAIN Top shape for layer 2 'pool1' 8 20 12 12 (23040)
I0819 14:26:05.810734 12127 layer_factory.hpp:172] Creating layer 'conv2' of type 'Convolution'
I0819 14:26:05.810751 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.810786 12127 net.cpp:202] Created Layer conv2 (3)
I0819 14:26:05.810794 12127 net.cpp:574] conv2 <- pool1
I0819 14:26:05.810801 12127 net.cpp:544] conv2 -> conv2
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:05.822500 12127 net.cpp:262] Setting up conv2
I0819 14:26:05.822523 12127 net.cpp:269] TRAIN Top shape for layer 3 'conv2' 8 50 8 8 (25600)
I0819 14:26:05.822582 12127 layer_factory.hpp:172] Creating layer 'pool2' of type 'Pooling'
I0819 14:26:05.822590 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.822615 12127 net.cpp:202] Created Layer pool2 (4)
I0819 14:26:05.822623 12127 net.cpp:574] pool2 <- conv2
I0819 14:26:05.822631 12127 net.cpp:544] pool2 -> pool2
I0819 14:26:05.822722 12127 net.cpp:262] Setting up pool2
I0819 14:26:05.822737 12127 net.cpp:269] TRAIN Top shape for layer 4 'pool2' 8 50 4 4 (6400)
I0819 14:26:05.822743 12127 layer_factory.hpp:172] Creating layer 'ip1' of type 'InnerProduct'
I0819 14:26:05.822751 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.822767 12127 net.cpp:202] Created Layer ip1 (5)
I0819 14:26:05.822774 12127 net.cpp:574] ip1 <- pool2
I0819 14:26:05.822782 12127 net.cpp:544] ip1 -> ip1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:05.976035 12127 net.cpp:262] Setting up ip1
I0819 14:26:05.976068 12127 net.cpp:269] TRAIN Top shape for layer 5 'ip1' 8 500 (4000)
I0819 14:26:05.976094 12127 layer_factory.hpp:172] Creating layer 'relu1' of type 'ReLU'
I0819 14:26:05.976104 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.976135 12127 net.cpp:202] Created Layer relu1 (6)
I0819 14:26:05.976143 12127 net.cpp:574] relu1 <- ip1
I0819 14:26:05.976152 12127 net.cpp:529] relu1 -> ip1 (in-place)
I0819 14:26:05.976186 12127 net.cpp:262] Setting up relu1
I0819 14:26:05.976192 12127 net.cpp:269] TRAIN Top shape for layer 6 'relu1' 8 500 (4000)
I0819 14:26:05.976198 12127 layer_factory.hpp:172] Creating layer 'ip2' of type 'InnerProduct'
I0819 14:26:05.976204 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.976214 12127 net.cpp:202] Created Layer ip2 (7)
I0819 14:26:05.976220 12127 net.cpp:574] ip2 <- ip1
I0819 14:26:05.976228 12127 net.cpp:544] ip2 -> ip2
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:05.979912 12127 net.cpp:262] Setting up ip2
I0819 14:26:05.979929 12127 net.cpp:269] TRAIN Top shape for layer 7 'ip2' 8 10 (80)
I0819 14:26:05.979940 12127 layer_factory.hpp:172] Creating layer 'loss' of type 'SoftmaxWithLoss'
I0819 14:26:05.979948 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.979971 12127 net.cpp:202] Created Layer loss (8)
I0819 14:26:05.979979 12127 net.cpp:574] loss <- ip2
I0819 14:26:05.979984 12127 net.cpp:574] loss <- label
I0819 14:26:05.979992 12127 net.cpp:544] loss -> loss
I0819 14:26:05.980211 12127 net.cpp:262] Setting up loss
I0819 14:26:05.980224 12127 net.cpp:269] TRAIN Top shape for layer 8 'loss' (1)
I0819 14:26:05.980228 12127 net.cpp:273]     with loss weight 1
I0819 14:26:05.980237 12127 net.cpp:338] loss needs backward computation.
I0819 14:26:05.980242 12127 net.cpp:338] ip2 needs backward computation.
I0819 14:26:05.980247 12127 net.cpp:338] relu1 needs backward computation.
I0819 14:26:05.980250 12127 net.cpp:338] ip1 needs backward computation.
I0819 14:26:05.980255 12127 net.cpp:338] pool2 needs backward computation.
I0819 14:26:05.980260 12127 net.cpp:338] conv2 needs backward computation.
I0819 14:26:05.980264 12127 net.cpp:338] pool1 needs backward computation.
I0819 14:26:05.980268 12127 net.cpp:338] conv1 needs backward computation.
I0819 14:26:05.980273 12127 net.cpp:340] mnist does not need backward computation.
I0819 14:26:05.980278 12127 net.cpp:382] This network produces output loss
I0819 14:26:05.980299 12127 net.cpp:405] Top memory (TRAIN) required for data: 323124 diff: 335684
I0819 14:26:05.980305 12127 net.cpp:408] Bottom memory (TRAIN) required for data: 323120 diff: 335680
I0819 14:26:05.980309 12127 net.cpp:411] Shared (in-place) memory (TRAIN) by data: 8000 diff: 8000
I0819 14:26:05.980314 12127 net.cpp:414] Parameters memory (TRAIN) required for data: 862160 diff: 862160
I0819 14:26:05.980319 12127 net.cpp:417] Parameters shared memory (TRAIN) by data: 0 diff: 0
I0819 14:26:05.980342 12127 net.cpp:423] Network initialization done.
I0819 14:26:05.980953 12127 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
I0819 14:26:05.980998 12127 net.cpp:459] The NetState phase (1) differed from the phase (0) specified by a rule in layer mnist
W0819 14:26:05.981026 12127 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:05.981329 12127 net.cpp:83] Initializing net from parameters:
name: "LeNet-fp16"
state {
  phase: TEST
}
default_forward_type: FLOAT16
default_backward_type: FLOAT16
default_forward_math: FLOAT16
default_backward_math: FLOAT16
global_grad_scale: 1
layer {
  name: "mnist"
  type: "Data"
  top: "data"
  top: "label"
  include {
    phase: TEST
  }
  transform_param {
    scale: 0.00390625
  }
  data_param {
    source: "examples/mnist/mnist_test_lmdb"
    batch_size: 13
    backend: LMDB
  }
}
layer {
  name: "conv1"
  type: "Convolution"
  bottom: "data"
  top: "conv1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 20
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "pool1"
  type: "Pooling"
  bottom: "conv1"
  top: "pool1"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "conv2"
  type: "Convolution"
  bottom: "pool1"
  top: "conv2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  convolution_param {
    num_output: 50
    kernel_size: 5
    stride: 1
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "pool2"
  type: "Pooling"
  bottom: "conv2"
  top: "pool2"
  pooling_param {
    pool: MAX
    kernel_size: 2
    stride: 2
  }
}
layer {
  name: "ip1"
  type: "InnerProduct"
  bottom: "pool2"
  top: "ip1"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 500
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "relu1"
  type: "ReLU"
  bottom: "ip1"
  top: "ip1"
}
layer {
  name: "ip2"
  type: "InnerProduct"
  bottom: "ip1"
  top: "ip2"
  param {
    lr_mult: 1
  }
  param {
    lr_mult: 2
  }
  inner_product_param {
    num_output: 10
    weight_filler {
      type: "gspt_xavier"
    }
    bias_filler {
      type: "gspt_constant"
    }
  }
}
layer {
  name: "accuracy"
  type: "Accuracy"
  bottom: "ip2"
  bottom: "label"
  top: "accuracy"
  include {
    phase: TEST
  }
}
layer {
  name: "loss"
  type: "SoftmaxWithLoss"
  bottom: "ip2"
  bottom: "label"
  top: "loss"
}
I0819 14:26:05.981458 12127 layer_factory.hpp:172] Creating layer 'mnist' of type 'Data'
I0819 14:26:05.981469 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.981510 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:05.981616 12127 net.cpp:202] Created Layer mnist (0)
I0819 14:26:05.981624 12127 net.cpp:544] mnist -> data
I0819 14:26:05.981634 12127 net.cpp:544] mnist -> label
I0819 14:26:05.981652 12127 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:05.981688 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:05.985260 12177 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
I0819 14:26:05.985519 12127 data_layer.cpp:200] (n0.d0.r0) Output data size: 13, 1, 28, 28
I0819 14:26:05.985556 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:05.986106 12127 net.cpp:262] Setting up mnist
I0819 14:26:05.986117 12127 net.cpp:269] TEST Top shape for layer 0 'mnist' 13 1 28 28 (10192)
I0819 14:26:05.986125 12127 net.cpp:269] TEST Top shape for layer 0 'mnist' 13 (13)
I0819 14:26:05.986131 12127 layer_factory.hpp:172] Creating layer 'label_mnist_1_split' of type 'Split'
I0819 14:26:05.986137 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.986166 12127 net.cpp:202] Created Layer label_mnist_1_split (1)
I0819 14:26:05.986171 12127 net.cpp:574] label_mnist_1_split <- label
I0819 14:26:05.986181 12127 net.cpp:544] label_mnist_1_split -> label_mnist_1_split_0
I0819 14:26:05.986191 12127 net.cpp:544] label_mnist_1_split -> label_mnist_1_split_1
I0819 14:26:05.987752 12178 data_layer.cpp:105] (n0.d0.r0) Parser threads: 1
I0819 14:26:05.987764 12127 net.cpp:262] Setting up label_mnist_1_split
I0819 14:26:05.987776 12178 data_layer.cpp:107] (n0.d0.r0) Transformer threads: 1
I0819 14:26:05.987787 12127 net.cpp:269] TEST Top shape for layer 1 'label_mnist_1_split' 13 (13)
I0819 14:26:05.987794 12127 net.cpp:269] TEST Top shape for layer 1 'label_mnist_1_split' 13 (13)
I0819 14:26:05.987802 12127 layer_factory.hpp:172] Creating layer 'conv1' of type 'Convolution'
I0819 14:26:05.987808 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.987841 12127 net.cpp:202] Created Layer conv1 (2)
I0819 14:26:05.987848 12127 net.cpp:574] conv1 <- data
I0819 14:26:05.987854 12127 net.cpp:544] conv1 -> conv1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:05.990432 12127 net.cpp:262] Setting up conv1
I0819 14:26:05.990456 12127 net.cpp:269] TEST Top shape for layer 2 'conv1' 13 20 24 24 (149760)
I0819 14:26:05.990481 12127 layer_factory.hpp:172] Creating layer 'pool1' of type 'Pooling'
I0819 14:26:05.990489 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.990506 12127 net.cpp:202] Created Layer pool1 (3)
I0819 14:26:05.990512 12127 net.cpp:574] pool1 <- conv1
I0819 14:26:05.990520 12127 net.cpp:544] pool1 -> pool1
I0819 14:26:05.990665 12127 net.cpp:262] Setting up pool1
I0819 14:26:05.990681 12127 net.cpp:269] TEST Top shape for layer 3 'pool1' 13 20 12 12 (37440)
I0819 14:26:05.990689 12127 layer_factory.hpp:172] Creating layer 'conv2' of type 'Convolution'
I0819 14:26:05.990694 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.990754 12127 net.cpp:202] Created Layer conv2 (4)
I0819 14:26:05.990761 12127 net.cpp:574] conv2 <- pool1
I0819 14:26:05.990767 12127 net.cpp:544] conv2 -> conv2
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:05.999617 12127 net.cpp:262] Setting up conv2
I0819 14:26:05.999635 12127 net.cpp:269] TEST Top shape for layer 4 'conv2' 13 50 8 8 (41600)
I0819 14:26:05.999650 12127 layer_factory.hpp:172] Creating layer 'pool2' of type 'Pooling'
I0819 14:26:05.999656 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.999672 12127 net.cpp:202] Created Layer pool2 (5)
I0819 14:26:05.999680 12127 net.cpp:574] pool2 <- conv2
I0819 14:26:05.999686 12127 net.cpp:544] pool2 -> pool2
I0819 14:26:05.999768 12127 net.cpp:262] Setting up pool2
I0819 14:26:05.999778 12127 net.cpp:269] TEST Top shape for layer 5 'pool2' 13 50 4 4 (10400)
I0819 14:26:05.999783 12127 layer_factory.hpp:172] Creating layer 'ip1' of type 'InnerProduct'
I0819 14:26:05.999788 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:05.999801 12127 net.cpp:202] Created Layer ip1 (6)
I0819 14:26:05.999806 12127 net.cpp:574] ip1 <- pool2
I0819 14:26:05.999812 12127 net.cpp:544] ip1 -> ip1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:06.132076 12127 net.cpp:262] Setting up ip1
I0819 14:26:06.132113 12127 net.cpp:269] TEST Top shape for layer 6 'ip1' 13 500 (6500)
I0819 14:26:06.132139 12127 layer_factory.hpp:172] Creating layer 'relu1' of type 'ReLU'
I0819 14:26:06.132150 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:06.132164 12127 net.cpp:202] Created Layer relu1 (7)
I0819 14:26:06.132171 12127 net.cpp:574] relu1 <- ip1
I0819 14:26:06.132180 12127 net.cpp:529] relu1 -> ip1 (in-place)
I0819 14:26:06.132201 12127 net.cpp:262] Setting up relu1
I0819 14:26:06.132208 12127 net.cpp:269] TEST Top shape for layer 7 'relu1' 13 500 (6500)
I0819 14:26:06.132233 12127 layer_factory.hpp:172] Creating layer 'ip2' of type 'InnerProduct'
I0819 14:26:06.132238 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:06.132254 12127 net.cpp:202] Created Layer ip2 (8)
I0819 14:26:06.132261 12127 net.cpp:574] ip2 <- ip1
I0819 14:26:06.132266 12127 net.cpp:544] ip2 -> ip2
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:06.134950 12127 net.cpp:262] Setting up ip2
I0819 14:26:06.134966 12127 net.cpp:269] TEST Top shape for layer 8 'ip2' 13 10 (130)
I0819 14:26:06.134982 12127 layer_factory.hpp:172] Creating layer 'ip2_ip2_0_split' of type 'Split'
I0819 14:26:06.134989 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:06.134999 12127 net.cpp:202] Created Layer ip2_ip2_0_split (9)
I0819 14:26:06.135005 12127 net.cpp:574] ip2_ip2_0_split <- ip2
I0819 14:26:06.135011 12127 net.cpp:544] ip2_ip2_0_split -> ip2_ip2_0_split_0
I0819 14:26:06.135020 12127 net.cpp:544] ip2_ip2_0_split -> ip2_ip2_0_split_1
I0819 14:26:06.135076 12127 net.cpp:262] Setting up ip2_ip2_0_split
I0819 14:26:06.135087 12127 net.cpp:269] TEST Top shape for layer 9 'ip2_ip2_0_split' 13 10 (130)
I0819 14:26:06.135092 12127 net.cpp:269] TEST Top shape for layer 9 'ip2_ip2_0_split' 13 10 (130)
I0819 14:26:06.135097 12127 layer_factory.hpp:172] Creating layer 'accuracy' of type 'Accuracy'
I0819 14:26:06.135102 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:06.135118 12127 net.cpp:202] Created Layer accuracy (10)
I0819 14:26:06.135124 12127 net.cpp:574] accuracy <- ip2_ip2_0_split_0
I0819 14:26:06.135130 12127 net.cpp:574] accuracy <- label_mnist_1_split_0
I0819 14:26:06.135136 12127 net.cpp:544] accuracy -> accuracy
I0819 14:26:06.135172 12127 net.cpp:262] Setting up accuracy
I0819 14:26:06.135183 12127 net.cpp:269] TEST Top shape for layer 10 'accuracy' (1)
I0819 14:26:06.135188 12127 layer_factory.hpp:172] Creating layer 'loss' of type 'SoftmaxWithLoss'
I0819 14:26:06.135193 12127 layer_factory.hpp:184] Layer's types are Ftype:FLOAT16 Btype:FLOAT16 Fmath:FLOAT16 Bmath:FLOAT16
I0819 14:26:06.135210 12127 net.cpp:202] Created Layer loss (11)
I0819 14:26:06.135215 12127 net.cpp:574] loss <- ip2_ip2_0_split_1
I0819 14:26:06.135221 12127 net.cpp:574] loss <- label_mnist_1_split_1
I0819 14:26:06.135226 12127 net.cpp:544] loss -> loss
I0819 14:26:06.135409 12127 net.cpp:262] Setting up loss
I0819 14:26:06.135419 12127 net.cpp:269] TEST Top shape for layer 11 'loss' (1)
I0819 14:26:06.135423 12127 net.cpp:273]     with loss weight 1
I0819 14:26:06.135430 12127 net.cpp:338] loss needs backward computation.
I0819 14:26:06.135437 12127 net.cpp:340] accuracy does not need backward computation.
I0819 14:26:06.135442 12127 net.cpp:338] ip2_ip2_0_split needs backward computation.
I0819 14:26:06.135447 12127 net.cpp:338] ip2 needs backward computation.
I0819 14:26:06.135452 12127 net.cpp:338] relu1 needs backward computation.
I0819 14:26:06.135455 12127 net.cpp:338] ip1 needs backward computation.
I0819 14:26:06.135460 12127 net.cpp:338] pool2 needs backward computation.
I0819 14:26:06.135464 12127 net.cpp:338] conv2 needs backward computation.
I0819 14:26:06.135468 12127 net.cpp:338] pool1 needs backward computation.
I0819 14:26:06.135473 12127 net.cpp:338] conv1 needs backward computation.
I0819 14:26:06.135478 12127 net.cpp:340] label_mnist_1_split does not need backward computation.
I0819 14:26:06.135483 12127 net.cpp:340] mnist does not need backward computation.
I0819 14:26:06.135488 12127 net.cpp:382] This network produces output accuracy
I0819 14:26:06.135493 12127 net.cpp:382] This network produces output loss
I0819 14:26:06.135517 12127 net.cpp:405] Top memory (TEST) required for data: 525656 diff: 546068
I0819 14:26:06.135524 12127 net.cpp:408] Bottom memory (TEST) required for data: 525648 diff: 546060
I0819 14:26:06.135527 12127 net.cpp:411] Shared (in-place) memory (TEST) by data: 13000 diff: 13000
I0819 14:26:06.135532 12127 net.cpp:414] Parameters memory (TEST) required for data: 862160 diff: 862160
I0819 14:26:06.135550 12127 net.cpp:417] Parameters shared memory (TEST) by data: 0 diff: 0
I0819 14:26:06.135553 12127 net.cpp:423] Network initialization done.
I0819 14:26:06.135634 12127 solver.cpp:55] Solver scaffolding done.
I0819 14:26:06.204330 12127 parallel.cpp:95] Starting sync 0 (of total 8), {0.8}
I0819 14:26:06.204382 12127 parallel.cpp:169] [0 - 0] P2PSync adding callback
I0819 14:26:06.204388 12127 parallel.cpp:95] Starting sync 1 (of total 8), {1.8}
I0819 14:26:06.204393 12127 parallel.cpp:169] [1 - 1] P2PSync adding callback
I0819 14:26:06.204397 12127 parallel.cpp:95] Starting sync 2 (of total 8), {2.8}
I0819 14:26:06.204406 12127 parallel.cpp:169] [2 - 2] P2PSync adding callback
I0819 14:26:06.204409 12127 parallel.cpp:95] Starting sync 3 (of total 8), {3.8}
I0819 14:26:06.204414 12127 parallel.cpp:169] [3 - 3] P2PSync adding callback
I0819 14:26:06.204418 12127 parallel.cpp:95] Starting sync 4 (of total 8), {4.8}
I0819 14:26:06.204423 12127 parallel.cpp:169] [4 - 4] P2PSync adding callback
I0819 14:26:06.204427 12127 parallel.cpp:95] Starting sync 5 (of total 8), {5.8}
I0819 14:26:06.204432 12127 parallel.cpp:169] [5 - 5] P2PSync adding callback
I0819 14:26:06.204435 12127 parallel.cpp:95] Starting sync 6 (of total 8), {6.8}
I0819 14:26:06.204450 12127 parallel.cpp:169] [6 - 6] P2PSync adding callback
I0819 14:26:06.204453 12127 parallel.cpp:95] Starting sync 7 (of total 8), {7.8}
I0819 14:26:06.204458 12127 parallel.cpp:169] [7 - 7] P2PSync adding callback
I0819 14:26:06.204463 12127 parallel.cpp:101] Starting Optimization
I0819 14:26:06.204466 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:06.204527 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:06.204569 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:06.204617 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:06.204741 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:06.205221 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:06.205379 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
I0819 14:26:06.205521 12127 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
I0819 14:26:06.226933 12180 common.cpp:533] NVML initialized, thread 12180
I0819 14:26:06.367383 12180 common.cpp:555] {0} NVML succeeded to set CPU affinity
I0819 14:26:06.518798 12181 common.cpp:555] {1} NVML succeeded to set CPU affinity
I0819 14:26:06.812333 12182 common.cpp:555] {2} NVML succeeded to set CPU affinity
I0819 14:26:07.146512 12183 common.cpp:555] {3} NVML succeeded to set CPU affinity
I0819 14:26:07.524495 12184 common.cpp:555] {4} NVML succeeded to set CPU affinity
I0819 14:26:07.940737 12185 common.cpp:555] {5} NVML succeeded to set CPU affinity
I0819 14:26:08.378716 12186 common.cpp:555] {6} NVML succeeded to set CPU affinity
I0819 14:26:08.804412 12181 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:08.804412 12182 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:08.804430 12183 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:08.804477 12184 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:08.804525 12185 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:08.804567 12186 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:08.807494 12182 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:08.807727 12182 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:08.809671 12182 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:08.811395 12205 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:08.811578 12187 common.cpp:555] {7} NVML succeeded to set CPU affinity
I0819 14:26:08.811616 12187 solver.cpp:40] Solver data type: FLOAT16
I0819 14:26:08.811836 12184 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:08.812060 12184 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:08.815555 12184 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:08.820106 12207 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:08.820359 12183 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:08.820652 12183 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:08.823220 12183 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:08.826602 12209 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:08.827469 12181 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:08.830896 12181 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:08.830940 12181 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:08.832684 12211 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:09.112213 12183 data_layer.cpp:200] [n0.d3.r3] Output data size: 8, 1, 28, 28
I0819 14:26:09.112282 12183 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:09.113519 12182 data_layer.cpp:200] [n0.d2.r2] Output data size: 8, 1, 28, 28
I0819 14:26:09.114099 12182 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:09.114202 12184 data_layer.cpp:200] [n0.d4.r4] Output data size: 8, 1, 28, 28
I0819 14:26:09.114372 12184 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:09.117770 12181 data_layer.cpp:200] [n0.d1.r1] Output data size: 8, 1, 28, 28
I0819 14:26:09.118423 12181 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:09.118736 12185 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:09.123558 12185 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:09.123598 12185 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:09.125273 12217 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:09.125365 12186 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
I0819 14:26:09.125687 12186 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:09.128406 12186 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
I0819 14:26:09.128469 12187 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
I0819 14:26:09.130543 12219 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:09.132122 12187 data_reader.cpp:59] Sample Data Reader threads: 1, out queues: 1, depth: 8
I0819 14:26:09.132172 12187 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
I0819 14:26:09.136530 12221 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
I0819 14:26:09.164611 12185 data_layer.cpp:200] [n0.d5.r5] Output data size: 8, 1, 28, 28
I0819 14:26:09.164680 12185 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:09.165598 12186 data_layer.cpp:200] [n0.d6.r6] Output data size: 8, 1, 28, 28
I0819 14:26:09.166170 12187 data_layer.cpp:200] [n0.d7.r7] Output data size: 8, 1, 28, 28
I0819 14:26:09.166437 12187 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
I0819 14:26:09.166457 12186 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:10.327457 12183 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
W0819 14:26:10.327579 12183 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:10.327780 12183 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:10.328166 12183 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:10.329828 12183 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:10.331306 12226 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
I0819 14:26:10.355618 12183 data_layer.cpp:200] (n0.d3.r3) Output data size: 13, 1, 28, 28
I0819 14:26:10.355700 12183 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:10.359419 12227 data_layer.cpp:105] (n0.d3.r3) Parser threads: 1
I0819 14:26:10.359443 12227 data_layer.cpp:107] (n0.d3.r3) Transformer threads: 1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
I0819 14:26:10.835765 12183 solver.cpp:55] Solver scaffolding done.
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:11.257170 12182 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
W0819 14:26:11.257272 12182 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:11.257467 12182 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:11.258131 12182 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:11.259642 12182 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:11.261420 12229 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
I0819 14:26:11.277657 12182 data_layer.cpp:200] (n0.d2.r2) Output data size: 13, 1, 28, 28
I0819 14:26:11.277732 12182 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:11.279595 12230 data_layer.cpp:105] (n0.d2.r2) Parser threads: 1
I0819 14:26:11.279628 12230 data_layer.cpp:107] (n0.d2.r2) Transformer threads: 1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
I0819 14:26:11.719116 12182 solver.cpp:55] Solver scaffolding done.
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:12.137696 12181 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
W0819 14:26:12.137830 12181 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:12.138058 12181 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:12.138795 12181 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:12.141125 12181 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:12.143594 12232 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
I0819 14:26:12.259518 12181 data_layer.cpp:200] (n0.d1.r1) Output data size: 13, 1, 28, 28
I0819 14:26:12.259595 12181 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:12.261369 12233 data_layer.cpp:105] (n0.d1.r1) Parser threads: 1
I0819 14:26:12.261397 12233 data_layer.cpp:107] (n0.d1.r1) Transformer threads: 1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 4
in math_function.cpp line 565, grp_num= 2, rand_index= 5
in math_function.cpp line 565, grp_num= 3, rand_index= 2
in math_function.cpp line 565, grp_num= 4, rand_index= 1
in math_function.cpp line 565, grp_num= 5, rand_index= 2
in math_function.cpp line 565, grp_num= 6, rand_index= 4
in math_function.cpp line 565, grp_num= 7, rand_index= 3
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 262144
in math_function.cpp line 656, gspt_info_1[i]= 9609891, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.00195312 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 0
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 2
in math_function.cpp line 565, grp_num= 5, rand_index= 1
in math_function.cpp line 565, grp_num= 6, rand_index= 1
in math_function.cpp line 565, grp_num= 7, rand_index= 1
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -1179648
in math_function.cpp line 656, gspt_info_1[i]= 8389705, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.0175781 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 4
in math_function.cpp line 565, grp_num= 2, rand_index= 3
in math_function.cpp line 565, grp_num= 3, rand_index= 3
in math_function.cpp line 565, grp_num= 4, rand_index= 3
in math_function.cpp line 565, grp_num= 5, rand_index= 5
in math_function.cpp line 565, grp_num= 6, rand_index= 1
in math_function.cpp line 565, grp_num= 7, rand_index= 0
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -2228224
in math_function.cpp line 656, gspt_info_1[i]= 9549640, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.0332031 (Q_frac=26
I0819 14:26:13.597795 12181 solver.cpp:55] Solver scaffolding done.
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:13.756623 12184 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
W0819 14:26:13.756768 12184 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:13.757032 12184 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:13.757282 12184 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:13.759277 12184 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:13.761255 12235 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
I0819 14:26:13.864593 12184 data_layer.cpp:200] (n0.d4.r4) Output data size: 13, 1, 28, 28
I0819 14:26:13.864696 12184 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:13.867190 12236 data_layer.cpp:105] (n0.d4.r4) Parser threads: 1
I0819 14:26:13.867213 12236 data_layer.cpp:107] (n0.d4.r4) Transformer threads: 1
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
I0819 14:26:13.878717 12187 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
W0819 14:26:13.878849 12187 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:13.879073 12187 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
I0819 14:26:13.879226 12187 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:13.880722 12187 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:13.882527 12238 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
I0819 14:26:13.887152 12187 data_layer.cpp:200] (n0.d7.r7) Output data size: 13, 1, 28, 28
I0819 14:26:13.887217 12187 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
I0819 14:26:13.889994 12239 data_layer.cpp:105] (n0.d7.r7) Parser threads: 1
I0819 14:26:13.890019 12239 data_layer.cpp:107] (n0.d7.r7) Transformer threads: 1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 1
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 4
in math_function.cpp line 565, grp_num= 4, rand_index= 2
in math_function.cpp line 565, grp_num= 5, rand_index= 6
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 4
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 524288
in math_function.cpp line 656, gspt_info_1[i]= 8865172, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 2
in math_function.cpp line 565, grp_num= 2, rand_index= 1
in math_function.cpp line 565, grp_num= 3, rand_index= 1
in math_function.cpp line 565, grp_num= 4, rand_index= 2
in math_function.cpp line 565, grp_num= 5, rand_index= 5
in math_function.cpp line 565, grp_num= 6, rand_index= 4
in math_function.cpp line 565, grp_num= 7, rand_index= 0
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -1835008
in math_function.cpp line 656, gspt_info_1[i]= 8951136, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.0136719 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 2
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 2
in math_function.cpp line 565, grp_num= 5, rand_index= 6
in math_function.cpp line 565, grp_num= 6, rand_index= 3
in math_function.cpp line 565, grp_num= 7, rand_index= 4
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 655360
in math_function.cpp line 656, gspt_info_1[i]= 9110940, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.00976562 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 6
in math_function.cpp line 565, grp_num= 2, rand_index= 1
in math_function.cpp line 565, grp_num= 3, rand_index= 3
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 2
in math_function.cpp line 565, grp_num= 6, rand_index= 3
in math_function.cpp line 565, grp_num= 7, rand_index= 3
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 131072
in math_function.cpp line 656, gspt_info_1[i]= 10009755, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.000976562 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
in math_function.cpp line 565, grp_num= 0, rand_index= 5
in math_function.cpp line 565, grp_num= 1, rand_index= 0
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 3
in math_function.cpp line 565, grp_num= 4, rand_index= 0
in math_function.cpp line 565, grp_num= 5, rand_index= 3
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 0
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -2097152
in math_function.cpp line 656, gspt_info_1[i]= 10694864, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.03125 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 5
in math_function.cpp line 565, grp_num= 1, rand_index= 3
in math_function.cpp line 565, grp_num= 2, rand_index= 2
in math_function.cpp line 565, grp_num= 3, rand_index= 2
in math_function.cpp line 565, grp_num= 4, rand_index= 1
in math_function.cpp line 565, grp_num= 5, rand_index= 1
in math_function.cpp line 565, grp_num= 6, rand_index= 1
in math_function.cpp line 565, grp_num= 7, rand_index= 0
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -2228224
in math_function.cpp line 656, gspt_info_1[i]= 11346504, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.0166016 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 0
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 1
in math_function.cpp line 565, grp_num= 5, rand_index= 4
in math_function.cpp line 565, grp_num= 6, rand_index= 1
in math_function.cpp line 565, grp_num= 7, rand_index= 3
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -131072
in math_function.cpp line 656, gspt_info_1[i]= 8389387, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.00195312 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 0
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 1
in math_function.cpp line 565, grp_num= 4, rand_index= 0
in math_function.cpp line 565, grp_num= 5, rand_index= 4
in math_function.cpp line 565, grp_num= 6, rand_index= 3
in math_function.cpp line 565, grp_num= 7, rand_index= 2
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -393216
in math_function.cpp line 656, gspt_info_1[i]= 8589594, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.00585938 (Q_frac=26
I0819 14:26:15.608752 12184 solver.cpp:55] Solver scaffolding done.
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:15.621356 12187 solver.cpp:55] Solver scaffolding done.
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 0
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 1
in math_function.cpp line 565, grp_num= 4, rand_index= 5
in math_function.cpp line 565, grp_num= 5, rand_index= 1
in math_function.cpp line 565, grp_num= 6, rand_index= 1
in math_function.cpp line 565, grp_num= 7, rand_index= 2
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -655360
in math_function.cpp line 656, gspt_info_1[i]= 8591946, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.00976562 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:16.119555 12185 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
W0819 14:26:16.119645 12185 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:16.119815 12185 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:16.119961 12185 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:16.121404 12185 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:16.122711 12241 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
I0819 14:26:16.123720 12185 data_layer.cpp:200] (n0.d5.r5) Output data size: 13, 1, 28, 28
I0819 14:26:16.123762 12185 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:16.125058 12242 data_layer.cpp:105] (n0.d5.r5) Parser threads: 1
I0819 14:26:16.125072 12242 data_layer.cpp:107] (n0.d5.r5) Transformer threads: 1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 6
in math_function.cpp line 565, grp_num= 2, rand_index= 2
in math_function.cpp line 565, grp_num= 3, rand_index= 2
in math_function.cpp line 565, grp_num= 4, rand_index= 3
in math_function.cpp line 565, grp_num= 5, rand_index= 2
in math_function.cpp line 565, grp_num= 6, rand_index= 1
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 917504
in math_function.cpp line 656, gspt_info_1[i]= 10036877, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0546875 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 2
in math_function.cpp line 565, grp_num= 3, rand_index= 1
in math_function.cpp line 565, grp_num= 4, rand_index= 2
in math_function.cpp line 565, grp_num= 5, rand_index= 2
in math_function.cpp line 565, grp_num= 6, rand_index= 3
in math_function.cpp line 565, grp_num= 7, rand_index= 2
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = -393216
in math_function.cpp line 656, gspt_info_1[i]= 9770138, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = -0.00585938 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 3
in math_function.cpp line 565, grp_num= 6, rand_index= 3
in math_function.cpp line 565, grp_num= 7, rand_index= 4
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 655360
in math_function.cpp line 656, gspt_info_1[i]= 9899228, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.00488281 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 0
in math_function.cpp line 565, grp_num= 2, rand_index= 5
in math_function.cpp line 565, grp_num= 3, rand_index= 4
in math_function.cpp line 565, grp_num= 4, rand_index= 2
in math_function.cpp line 565, grp_num= 5, rand_index= 4
in math_function.cpp line 565, grp_num= 6, rand_index= 1
in math_function.cpp line 565, grp_num= 7, rand_index= 4
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 393216
in math_function.cpp line 656, gspt_info_1[i]= 8570124, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.00585938 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:16.677002 12186 solver.cpp:174] Creating test net (#0) specified by net file: examples/mnist/lenet_train_test_fp16.prototxt
W0819 14:26:16.677161 12186 parallel.cpp:319] Batch size must be divisible by the number of solvers (GPUs): it's been adjusted from 100 to 104
I0819 14:26:16.677362 12186 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
I0819 14:26:16.677531 12186 data_reader.cpp:59] Data Reader threads: 1, out queues: 1, depth: 13
I0819 14:26:16.679579 12186 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
I0819 14:26:16.681725 12244 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_test_lmdb
I0819 14:26:16.682727 12186 data_layer.cpp:200] (n0.d6.r6) Output data size: 13, 1, 28, 28
I0819 14:26:16.682757 12186 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
I0819 14:26:16.684469 12245 data_layer.cpp:105] (n0.d6.r6) Parser threads: 1
I0819 14:26:16.684499 12245 data_layer.cpp:107] (n0.d6.r6) Transformer threads: 1
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 4
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 2
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 4
in math_function.cpp line 565, grp_num= 7, rand_index= 3
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 262144
in math_function.cpp line 656, gspt_info_1[i]= 9645091, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=24
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 24
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 6
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 1
in math_function.cpp line 565, grp_num= 4, rand_index= 4
in math_function.cpp line 565, grp_num= 5, rand_index= 5
in math_function.cpp line 565, grp_num= 6, rand_index= 3
in math_function.cpp line 565, grp_num= 7, rand_index= 4
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 655360
in math_function.cpp line 656, gspt_info_1[i]= 9967964, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.00976562 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 1
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 4
in math_function.cpp line 565, grp_num= 5, rand_index= 1
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 8652885, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0078125 (Q_frac=27
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 0
in math_function.cpp line 565, grp_num= 2, rand_index= 6
in math_function.cpp line 565, grp_num= 3, rand_index= 2
in math_function.cpp line 565, grp_num= 4, rand_index= 1
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 3
in math_function.cpp line 565, grp_num= 7, rand_index= 6
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 2228224
in math_function.cpp line 656, gspt_info_1[i]= 8593950, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.0332031 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:17.502923 12185 solver.cpp:55] Solver scaffolding done.
In blob.cpp, blob.Q_frac= 27
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
in math_function.cpp line 565, grp_num= 0, rand_index= 4
in math_function.cpp line 565, grp_num= 1, rand_index= 5
in math_function.cpp line 565, grp_num= 2, rand_index= 0
in math_function.cpp line 565, grp_num= 3, rand_index= 0
in math_function.cpp line 565, grp_num= 4, rand_index= 6
in math_function.cpp line 565, grp_num= 5, rand_index= 0
in math_function.cpp line 565, grp_num= 6, rand_index= 2
in math_function.cpp line 565, grp_num= 7, rand_index= 5
in math_function.cpp line 565, grp_num= 8, rand_index= 0
in math_function.cpp, line 730, tmp_value = 1048576
in math_function.cpp line 656, gspt_info_1[i]= 9702421, gspt_info_2[i]= 0
definition of type is half.
gspt r[i] = 0.015625 (Q_frac=26
In fp32 2^(16):65536.00000000000000000000, in fp16 2^(16):inf
In blob.cpp, blob.Q_frac= 26
in math_function.cpp line 934, grp_num= 0, rand_index= 3
in math_function.cpp line 934, grp_num= 1, rand_index= 3
in math_function.cpp line 934, grp_num= 2, rand_index= 3
in math_function.cpp line 934, grp_num= 3, rand_index= 2
in math_function.cpp line 934, grp_num= 4, rand_index= 3
in math_function.cpp line 934, grp_num= 5, rand_index= 3
in math_function.cpp line 934, grp_num= 6, rand_index= 2
in math_function.cpp line 934, grp_num= 7, rand_index= 3
in math_function.cpp line 934, grp_num= 8, rand_index= 0
in math_function.cpp line 1223, gspt_info_1[i]= 7186131, gspt_info_2[i]= 0
in math_function.cpp, line 1227, tmp_value = 0
definition of type is half [FILL ZERO].
gspt r[i] = 0 (Q_frac=22
In blob.cpp, blob.Q_frac= 22
I0819 14:26:17.563305 12186 solver.cpp:55] Solver scaffolding done.
I0819 14:26:20.059680 12183 parallel.cpp:221] [3 - 3] P2PSync added callback
I0819 14:26:20.059725 12181 parallel.cpp:221] [1 - 1] P2PSync added callback
I0819 14:26:20.059744 12183 solver.cpp:418] [0.3] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.059762 12181 solver.cpp:418] [0.1] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.059836 12182 parallel.cpp:221] [2 - 2] P2PSync added callback
I0819 14:26:20.059872 12182 solver.cpp:418] [0.2] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.059886 12185 parallel.cpp:221] [5 - 5] P2PSync added callback
I0819 14:26:20.059888 12180 parallel.cpp:221] [0 - 0] P2PSync added callback
I0819 14:26:20.059901 12187 parallel.cpp:221] [7 - 7] P2PSync added callback
I0819 14:26:20.059939 12186 parallel.cpp:221] [6 - 6] P2PSync added callback
I0819 14:26:20.059949 12185 solver.cpp:418] [0.5] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.059950 12180 solver.cpp:418] [0.0] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.059957 12187 solver.cpp:418] [0.7] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.059945 12184 parallel.cpp:221] [4 - 4] P2PSync added callback
I0819 14:26:20.059979 12186 solver.cpp:418] [0.6] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.060006 12184 solver.cpp:418] [0.4] Solving LeNet-fp16 Learning Rate Policy: inv
I0819 14:26:20.060168 12187 net.cpp:1441] [0.7] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.060173 12181 net.cpp:1441] [0.1] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.060178 12183 net.cpp:1441] [0.3] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.060173 12184 net.cpp:1441] [0.4] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.060189 12186 net.cpp:1441] [0.6] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.060181 12180 net.cpp:1441] [0.0] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.060190 12182 net.cpp:1441] [0.2] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.060183 12185 net.cpp:1441] [0.5] Reserving 862592 bytes of shared learnable space for type FLOAT16
I0819 14:26:20.077126 12181 solver.cpp:220] [0.1] Starting Optimization on GPU 1
I0819 14:26:20.077139 12184 solver.cpp:220] [0.4] Starting Optimization on GPU 4
I0819 14:26:20.077128 12185 solver.cpp:220] [0.5] Starting Optimization on GPU 5
I0819 14:26:20.077129 12186 solver.cpp:220] [0.6] Starting Optimization on GPU 6
I0819 14:26:20.077154 12182 solver.cpp:220] [0.2] Starting Optimization on GPU 2
I0819 14:26:20.077143 12183 solver.cpp:220] [0.3] Starting Optimization on GPU 3
I0819 14:26:20.077142 12180 solver.cpp:220] [0.0] Starting Optimization on GPU 0
I0819 14:26:20.077142 12187 solver.cpp:220] [0.7] Starting Optimization on GPU 7
I0819 14:26:20.080057 12255 common.cpp:555] {4} NVML succeeded to set CPU affinity
I0819 14:26:20.080103 12180 solver.cpp:257] [MultiGPU] Initial Test started...
I0819 14:26:20.080142 12180 solver.cpp:501] Iteration 0, Testing net (#0)
I0819 14:26:20.080691 12257 common.cpp:555] {3} NVML succeeded to set CPU affinity
I0819 14:26:20.080940 12261 common.cpp:555] {7} NVML succeeded to set CPU affinity
I0819 14:26:20.081197 12259 common.cpp:555] {6} NVML succeeded to set CPU affinity
I0819 14:26:20.081463 12258 common.cpp:555] {1} NVML succeeded to set CPU affinity
I0819 14:26:20.081702 12256 common.cpp:555] {5} NVML succeeded to set CPU affinity
I0819 14:26:20.081957 12260 common.cpp:555] {0} NVML succeeded to set CPU affinity
I0819 14:26:20.082206 12262 common.cpp:555] {2} NVML succeeded to set CPU affinity
I0819 14:26:21.656210 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.0769043
I0819 14:26:21.656258 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 2.30273 (* 1 = 2.30273 loss)
I0819 14:26:21.656350 12180 solver.cpp:262] [MultiGPU] Initial Test completed in 1.57611s
I0819 14:26:21.676004 12212 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 3
I0819 14:26:21.691151 12215 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 1
I0819 14:26:21.707749 12213 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 2
I0819 14:26:21.722822 12223 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 6
I0819 14:26:21.736966 12224 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 7
I0819 14:26:21.755342 12214 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 4
I0819 14:26:21.771665 12175 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 0
I0819 14:26:21.787470 12222 internal_thread.cpp:42] Restarting 4 internal thread(s) on device 5
I0819 14:26:21.787524 12212 internal_thread.cpp:18] Starting 1 internal thread(s) on device 3
I0819 14:26:21.787591 12213 internal_thread.cpp:18] Starting 1 internal thread(s) on device 2
I0819 14:26:21.787559 12215 internal_thread.cpp:18] Starting 1 internal thread(s) on device 1
I0819 14:26:21.787701 12223 internal_thread.cpp:18] Starting 1 internal thread(s) on device 6
I0819 14:26:21.788395 12224 internal_thread.cpp:18] Starting 1 internal thread(s) on device 7
I0819 14:26:21.788424 12175 internal_thread.cpp:18] Starting 1 internal thread(s) on device 0
I0819 14:26:21.788426 12214 internal_thread.cpp:18] Starting 1 internal thread(s) on device 4
I0819 14:26:21.792286 12212 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.792323 12215 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.792358 12304 common.cpp:555] {1} NVML succeeded to set CPU affinity
I0819 14:26:21.792376 12214 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.792440 12175 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.793954 12305 common.cpp:555] {3} NVML succeeded to set CPU affinity
I0819 14:26:21.795452 12306 common.cpp:555] {4} NVML succeeded to set CPU affinity
I0819 14:26:21.796964 12309 common.cpp:555] {2} NVML succeeded to set CPU affinity
I0819 14:26:21.798382 12310 common.cpp:555] {6} NVML succeeded to set CPU affinity
I0819 14:26:21.800101 12307 common.cpp:555] {0} NVML succeeded to set CPU affinity
I0819 14:26:21.801498 12222 internal_thread.cpp:18] Starting 1 internal thread(s) on device 5
I0819 14:26:21.801631 12308 common.cpp:555] {7} NVML succeeded to set CPU affinity
I0819 14:26:21.801648 12215 internal_thread.cpp:18] Starting 3 internal thread(s) on device 1
I0819 14:26:21.801681 12224 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.801687 12214 internal_thread.cpp:18] Starting 3 internal thread(s) on device 4
I0819 14:26:21.801651 12212 internal_thread.cpp:18] Starting 3 internal thread(s) on device 3
I0819 14:26:21.801689 12175 internal_thread.cpp:18] Starting 3 internal thread(s) on device 0
I0819 14:26:21.801734 12213 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.801728 12222 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.801715 12223 data_reader.cpp:59] Data Reader threads: 3, out queues: 12, depth: 8
I0819 14:26:21.804661 12311 common.cpp:555] {5} NVML succeeded to set CPU affinity
I0819 14:26:21.805318 12313 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.806767 12223 internal_thread.cpp:18] Starting 3 internal thread(s) on device 6
I0819 14:26:21.806782 12312 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.806785 12222 internal_thread.cpp:18] Starting 3 internal thread(s) on device 5
I0819 14:26:21.808486 12224 internal_thread.cpp:18] Starting 3 internal thread(s) on device 7
I0819 14:26:21.808490 12316 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.809957 12315 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.811379 12317 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.811403 12213 internal_thread.cpp:18] Starting 3 internal thread(s) on device 2
I0819 14:26:21.812842 12314 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.814517 12318 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.815894 12320 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.817451 12321 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.819092 12319 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.820793 12322 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.824617 12323 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.825839 12324 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.827411 12325 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.828995 12327 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.830147 12326 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.831641 12328 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.833058 12329 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.834400 12332 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.835896 12330 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.837077 12331 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.838599 12333 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.840051 12334 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.841423 12335 db_lmdb.cpp:36] Opened lmdb examples/mnist/mnist_train_lmdb
I0819 14:26:21.864646 12215 data_layer.cpp:200] [n0.d1.r1] Output data size: 8, 1, 28, 28
I0819 14:26:21.864681 12215 data_layer.cpp:105] [n0.d1.r1] Parser threads: 3 (auto)
I0819 14:26:21.864693 12215 data_layer.cpp:107] [n0.d1.r1] Transformer threads: 4 (auto)
I0819 14:26:21.865779 12214 data_layer.cpp:200] [n0.d4.r4] Output data size: 8, 1, 28, 28
I0819 14:26:21.865823 12214 data_layer.cpp:105] [n0.d4.r4] Parser threads: 3 (auto)
I0819 14:26:21.865835 12214 data_layer.cpp:107] [n0.d4.r4] Transformer threads: 4 (auto)
I0819 14:26:21.866470 12212 data_layer.cpp:200] [n0.d3.r3] Output data size: 8, 1, 28, 28
I0819 14:26:21.866511 12212 data_layer.cpp:105] [n0.d3.r3] Parser threads: 3 (auto)
I0819 14:26:21.866523 12212 data_layer.cpp:107] [n0.d3.r3] Transformer threads: 4 (auto)
I0819 14:26:21.867338 12175 data_layer.cpp:200] [n0.d0.r0] Output data size: 8, 1, 28, 28
I0819 14:26:21.867383 12175 data_layer.cpp:105] [n0.d0.r0] Parser threads: 3 (auto)
I0819 14:26:21.867393 12175 data_layer.cpp:107] [n0.d0.r0] Transformer threads: 4 (auto)
I0819 14:26:21.870844 12222 data_layer.cpp:200] [n0.d5.r5] Output data size: 8, 1, 28, 28
I0819 14:26:21.870885 12222 data_layer.cpp:105] [n0.d5.r5] Parser threads: 3 (auto)
I0819 14:26:21.870895 12222 data_layer.cpp:107] [n0.d5.r5] Transformer threads: 4 (auto)
I0819 14:26:21.871150 12223 data_layer.cpp:200] [n0.d6.r6] Output data size: 8, 1, 28, 28
I0819 14:26:21.871199 12223 data_layer.cpp:105] [n0.d6.r6] Parser threads: 3 (auto)
I0819 14:26:21.871244 12223 data_layer.cpp:107] [n0.d6.r6] Transformer threads: 4 (auto)
I0819 14:26:21.871914 12224 data_layer.cpp:200] [n0.d7.r7] Output data size: 8, 1, 28, 28
I0819 14:26:21.871960 12224 data_layer.cpp:105] [n0.d7.r7] Parser threads: 3 (auto)
I0819 14:26:21.871973 12224 data_layer.cpp:107] [n0.d7.r7] Transformer threads: 4 (auto)
I0819 14:26:21.877614 12213 data_layer.cpp:200] [n0.d2.r2] Output data size: 8, 1, 28, 28
I0819 14:26:21.877653 12213 data_layer.cpp:105] [n0.d2.r2] Parser threads: 3 (auto)
I0819 14:26:21.877663 12213 data_layer.cpp:107] [n0.d2.r2] Transformer threads: 4 (auto)
I0819 14:26:22.008081 12180 solver.cpp:341]     [0.0] Iteration 0 (0.351633 s), loss = 2.30273
I0819 14:26:22.009574 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.30273 (* 1 = 2.30273 loss)
I0819 14:26:22.009629 12180 sgd_solver.cpp:180] [0.0] Iteration 0, lr = 0.006, m = 0.9, lrm = 0.06, wd = 0.0005, gs = 1
I0819 14:26:22.028228 12180 solver.cpp:341]     [0.0] Iteration 1 (0.0201338 s), loss = 2.30273
I0819 14:26:22.028343 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.30273 (* 1 = 2.30273 loss)
I0819 14:26:22.175338 12187 cudnn_conv_layer.cpp:849] [n0.d7.r7] Conv Algos (F,BD,BF): 'conv1' with space 6.91K 1/1 0p 1 0p 	(avail 11.35G, req 2.06M)	t: 0 0 0.1
I0819 14:26:22.256990 12183 cudnn_conv_layer.cpp:849] [n0.d3.r3] Conv Algos (F,BD,BF): 'conv1' with space 2.06M 1/1 0p 1 0p 	(avail 11.35G, req 2.06M)	t: 0 0 0.1
I0819 14:26:22.359421 12185 cudnn_conv_layer.cpp:849] [n0.d5.r5] Conv Algos (F,BD,BF): 'conv1' with space 2.06M 1/1 1p 1 5p 	(avail 11.35G, req 2.06M)	t: 0 0 0.11
I0819 14:26:22.361574 12184 cudnn_conv_layer.cpp:849] [n0.d4.r4] Conv Algos (F,BD,BF): 'conv1' with space 2.06M 1/1 0p 1 0p 	(avail 11.35G, req 2.06M)	t: 0 0 0.1
I0819 14:26:22.458999 12186 cudnn_conv_layer.cpp:849] [n0.d6.r6] Conv Algos (F,BD,BF): 'conv1' with space 2.06M 1/1 0p 1 0p 	(avail 11.35G, req 2.06M)	t: 0 0 0.11
I0819 14:26:22.461419 12182 cudnn_conv_layer.cpp:849] [n0.d2.r2] Conv Algos (F,BD,BF): 'conv1' with space 2.06M 1/1 0p 1 0p 	(avail 11.35G, req 2.06M)	t: 0 0 0.11
I0819 14:26:22.464751 12181 cudnn_conv_layer.cpp:849] [n0.d1.r1] Conv Algos (F,BD,BF): 'conv1' with space 2.06M 1/1 0p 1 5p 	(avail 11.35G, req 2.06M)	t: 0 0 0.11
I0819 14:26:22.473542 12180 cudnn_conv_layer.cpp:849] [n0.d0.r0] Conv Algos (F,BD,BF): 'conv1' with space 2.06M 1/1 1p 1 0p 	(avail 11.09G, req 2.06M)	t: 0 0 0.11
I0819 14:26:22.581921 12183 cudnn_conv_layer.cpp:849] [n0.d3.r3] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.35G, req 2.06M)	t: 0 0.07 0.06
I0819 14:26:22.589903 12184 cudnn_conv_layer.cpp:849] [n0.d4.r4] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.35G, req 2.06M)	t: 0 0.07 0.06
I0819 14:26:22.591792 12187 cudnn_conv_layer.cpp:849] [n0.d7.r7] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.35G, req 2.06M)	t: 0 0.06 0.06
I0819 14:26:22.609736 12185 cudnn_conv_layer.cpp:849] [n0.d5.r5] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.35G, req 2.06M)	t: 0 0.07 0.06
I0819 14:26:22.613392 12186 cudnn_conv_layer.cpp:849] [n0.d6.r6] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.35G, req 2.06M)	t: 0 0.06 0.06
I0819 14:26:22.636795 12182 cudnn_conv_layer.cpp:849] [n0.d2.r2] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.35G, req 2.06M)	t: 0 0.06 0.05
I0819 14:26:22.650578 12181 cudnn_conv_layer.cpp:849] [n0.d1.r1] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.35G, req 2.06M)	t: 0 0.06 0.05
I0819 14:26:22.682121 12180 cudnn_conv_layer.cpp:849] [n0.d0.r0] Conv Algos (F,BD,BF): 'conv2' with space 2.06M 20/1 7p 0p 0p 	(avail 11.09G, req 2.06M)	t: 0 0.06 0.06
I0819 14:26:22.686460 12180 solver.cpp:341]     [0.0] Iteration 2 (0.658181 s), loss = 2.30273
I0819 14:26:22.686547 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.30273 (* 1 = 2.30273 loss)
I0819 14:26:23.101030 12180 solver.cpp:333]     [0.0] Iteration 100 (236.412 iter/s, 0.414531s/98 iter), loss = 2.3125
I0819 14:26:23.101125 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.3125 (* 1 = 2.3125 loss)
I0819 14:26:23.101152 12180 sgd_solver.cpp:180] [0.0] Iteration 100, lr = 0.00595539, m = 0.9, lrm = 0.0595539, wd = 0.0005, gs = 1
I0819 14:26:23.484020 12180 solver.cpp:333]     [0.0] Iteration 200 (261.139 iter/s, 0.382937s/100 iter), loss = 2.35156
I0819 14:26:23.484200 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.35156 (* 1 = 2.35156 loss)
I0819 14:26:23.484244 12180 sgd_solver.cpp:180] [0.0] Iteration 200, lr = 0.00591155, m = 0.9, lrm = 0.0591155, wd = 0.0005, gs = 1
I0819 14:26:23.861603 12180 solver.cpp:333]     [0.0] Iteration 300 (264.859 iter/s, 0.37756s/100 iter), loss = 2.32617
I0819 14:26:23.861706 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.32617 (* 1 = 2.32617 loss)
I0819 14:26:23.861726 12180 sgd_solver.cpp:180] [0.0] Iteration 300, lr = 0.00586845, m = 0.9, lrm = 0.0586845, wd = 0.0005, gs = 1
I0819 14:26:24.242758 12180 solver.cpp:333]     [0.0] Iteration 400 (262.38 iter/s, 0.381127s/100 iter), loss = 2.29492
I0819 14:26:24.242908 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.29492 (* 1 = 2.29492 loss)
I0819 14:26:24.242923 12180 sgd_solver.cpp:180] [0.0] Iteration 400, lr = 0.00582608, m = 0.9, lrm = 0.0582608, wd = 0.0005, gs = 1
I0819 14:26:24.618224 12180 solver.cpp:501] Iteration 500, Testing net (#0)
I0819 14:26:24.923928 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:24.957980 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.110742
I0819 14:26:24.958009 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 2.30234 (* 1 = 2.30234 loss)
I0819 14:26:24.958096 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.715304s
I0819 14:26:24.961958 12180 solver.cpp:333]     [0.0] Iteration 500 (139.801 iter/s, 0.715304s/100 iter), loss = 2.29492
I0819 14:26:24.962043 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.29492 (* 1 = 2.29492 loss)
I0819 14:26:24.962056 12180 sgd_solver.cpp:180] [0.0] Iteration 500, lr = 0.00578441, m = 0.9, lrm = 0.0578441, wd = 0.0005, gs = 1
I0819 14:26:25.341384 12180 solver.cpp:333]     [0.0] Iteration 600 (263.582 iter/s, 0.379389s/100 iter), loss = 2.31836
I0819 14:26:25.341576 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.31836 (* 1 = 2.31836 loss)
I0819 14:26:25.341591 12180 sgd_solver.cpp:180] [0.0] Iteration 600, lr = 0.00574344, m = 0.9, lrm = 0.0574344, wd = 0.0005, gs = 1
I0819 14:26:25.718746 12180 solver.cpp:333]     [0.0] Iteration 700 (265.02 iter/s, 0.377331s/100 iter), loss = 2.34766
I0819 14:26:25.718859 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.34766 (* 1 = 2.34766 loss)
I0819 14:26:25.718874 12180 sgd_solver.cpp:180] [0.0] Iteration 700, lr = 0.00570313, m = 0.9, lrm = 0.0570313, wd = 0.0005, gs = 1
I0819 14:26:26.091969 12180 solver.cpp:333]     [0.0] Iteration 800 (267.958 iter/s, 0.373193s/100 iter), loss = 2.32031
I0819 14:26:26.092077 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.32031 (* 1 = 2.32031 loss)
I0819 14:26:26.092092 12180 sgd_solver.cpp:180] [0.0] Iteration 800, lr = 0.00566348, m = 0.9, lrm = 0.0566348, wd = 0.0005, gs = 1
I0819 14:26:26.462466 12180 solver.cpp:333]     [0.0] Iteration 900 (269.93 iter/s, 0.370466s/100 iter), loss = 2.30469
I0819 14:26:26.462560 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.30469 (* 1 = 2.30469 loss)
I0819 14:26:26.462589 12180 sgd_solver.cpp:180] [0.0] Iteration 900, lr = 0.00562447, m = 0.9, lrm = 0.0562447, wd = 0.0005, gs = 1
I0819 14:26:26.500018 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:26.846081 12180 solver.cpp:501] Iteration 1000, Testing net (#0)
I0819 14:26:27.138350 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:27.185463 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.108435
I0819 14:26:27.185499 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 2.30098 (* 1 = 2.30098 loss)
I0819 14:26:27.185529 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.72303s
I0819 14:26:27.189757 12180 solver.cpp:333]     [0.0] Iteration 1000 (138.307 iter/s, 0.72303s/100 iter), 1.1/21.3ep, loss = 2.26758
I0819 14:26:27.189815 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.26758 (* 1 = 2.26758 loss)
I0819 14:26:27.189831 12180 sgd_solver.cpp:180] [0.0] Iteration 1000, lr = 0.00558607, m = 0.9, lrm = 0.0558607, wd = 0.0005, gs = 1
I0819 14:26:27.570098 12180 solver.cpp:333]     [0.0] Iteration 1100 (262.946 iter/s, 0.380306s/100 iter), 1.2/21.3ep, loss = 2.31445
I0819 14:26:27.570201 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.31445 (* 1 = 2.31445 loss)
I0819 14:26:27.570243 12180 sgd_solver.cpp:180] [0.0] Iteration 1100, lr = 0.00554829, m = 0.9, lrm = 0.0554829, wd = 0.0005, gs = 1
I0819 14:26:27.948230 12180 solver.cpp:333]     [0.0] Iteration 1200 (264.486 iter/s, 0.378093s/100 iter), 1.3/21.3ep, loss = 2.33398
I0819 14:26:27.948328 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.33398 (* 1 = 2.33398 loss)
I0819 14:26:27.948377 12180 sgd_solver.cpp:180] [0.0] Iteration 1200, lr = 0.00551109, m = 0.9, lrm = 0.0551109, wd = 0.0005, gs = 1
I0819 14:26:28.326474 12180 solver.cpp:333]     [0.0] Iteration 1300 (264.401 iter/s, 0.378213s/100 iter), 1.4/21.3ep, loss = 2.3418
I0819 14:26:28.326596 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.3418 (* 1 = 2.3418 loss)
I0819 14:26:28.326632 12180 sgd_solver.cpp:180] [0.0] Iteration 1300, lr = 0.00547447, m = 0.9, lrm = 0.0547447, wd = 0.0005, gs = 1
I0819 14:26:28.703507 12180 solver.cpp:333]     [0.0] Iteration 1400 (265.243 iter/s, 0.377012s/100 iter), 1.5/21.3ep, loss = 2.34375
I0819 14:26:28.703608 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.34375 (* 1 = 2.34375 loss)
I0819 14:26:28.703624 12180 sgd_solver.cpp:180] [0.0] Iteration 1400, lr = 0.00543842, m = 0.9, lrm = 0.0543842, wd = 0.0005, gs = 1
I0819 14:26:29.077296 12180 solver.cpp:501] Iteration 1500, Testing net (#0)
I0819 14:26:29.356602 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:29.425398 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.116895
I0819 14:26:29.425431 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 2.30863 (* 1 = 2.30863 loss)
I0819 14:26:29.425524 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.721988s
I0819 14:26:29.430272 12180 solver.cpp:333]     [0.0] Iteration 1500 (138.507 iter/s, 0.721988s/100 iter), 1.6/21.3ep, loss = 2.28711
I0819 14:26:29.430392 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.28711 (* 1 = 2.28711 loss)
I0819 14:26:29.430408 12180 sgd_solver.cpp:180] [0.0] Iteration 1500, lr = 0.00540291, m = 0.9, lrm = 0.0540291, wd = 0.0005, gs = 1
I0819 14:26:29.816975 12180 solver.cpp:333]     [0.0] Iteration 1600 (258.62 iter/s, 0.386668s/100 iter), 1.7/21.3ep, loss = 2.30859
I0819 14:26:29.817056 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.30859 (* 1 = 2.30859 loss)
I0819 14:26:29.817080 12180 sgd_solver.cpp:180] [0.0] Iteration 1600, lr = 0.00536794, m = 0.9, lrm = 0.0536794, wd = 0.0005, gs = 1
I0819 14:26:30.191833 12180 solver.cpp:333]     [0.0] Iteration 1700 (266.789 iter/s, 0.374828s/100 iter), 1.8/21.3ep, loss = 2.31055
I0819 14:26:30.191944 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 2.31055 (* 1 = 2.31055 loss)
I0819 14:26:30.191958 12180 sgd_solver.cpp:180] [0.0] Iteration 1700, lr = 0.00533349, m = 0.9, lrm = 0.0533349, wd = 0.0005, gs = 1
I0819 14:26:30.557349 12180 solver.cpp:333]     [0.0] Iteration 1800 (273.608 iter/s, 0.365486s/100 iter), 1.9/21.3ep, loss = 0.461182
I0819 14:26:30.557466 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.461182 (* 1 = 0.461182 loss)
I0819 14:26:30.557484 12180 sgd_solver.cpp:180] [0.0] Iteration 1800, lr = 0.00529956, m = 0.9, lrm = 0.0529956, wd = 0.0005, gs = 1
I0819 14:26:30.721966 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:30.923511 12180 solver.cpp:333]     [0.0] Iteration 1900 (273.13 iter/s, 0.366126s/100 iter), 2/21.3ep, loss = 0.404297
I0819 14:26:30.923636 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.404297 (* 1 = 0.404297 loss)
I0819 14:26:30.923694 12180 sgd_solver.cpp:180] [0.0] Iteration 1900, lr = 0.00526612, m = 0.9, lrm = 0.0526612, wd = 0.0005, gs = 1
I0819 14:26:31.294430 12180 solver.cpp:501] Iteration 2000, Testing net (#0)
I0819 14:26:31.559896 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:31.634709 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.937607
I0819 14:26:31.634754 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.210998 (* 1 = 0.210998 loss)
I0819 14:26:31.634786 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.711246s
I0819 14:26:31.638682 12180 solver.cpp:333]     [0.0] Iteration 2000 (140.598 iter/s, 0.711246s/100 iter), 2.1/21.3ep, loss = 0.00550461
I0819 14:26:31.638777 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00550461 (* 1 = 0.00550461 loss)
I0819 14:26:31.638789 12180 sgd_solver.cpp:180] [0.0] Iteration 2000, lr = 0.00523318, m = 0.9, lrm = 0.0523317, wd = 0.0005, gs = 1
I0819 14:26:32.012344 12180 solver.cpp:333]     [0.0] Iteration 2100 (267.655 iter/s, 0.373615s/100 iter), 2.2/21.3ep, loss = 0.273926
I0819 14:26:32.012411 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.273926 (* 1 = 0.273926 loss)
I0819 14:26:32.012425 12180 sgd_solver.cpp:180] [0.0] Iteration 2100, lr = 0.00520071, m = 0.9, lrm = 0.052007, wd = 0.0005, gs = 1
I0819 14:26:32.390202 12180 solver.cpp:333]     [0.0] Iteration 2200 (264.684 iter/s, 0.377809s/100 iter), 2.3/21.3ep, loss = 0.747559
I0819 14:26:32.390300 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.747559 (* 1 = 0.747559 loss)
I0819 14:26:32.390316 12180 sgd_solver.cpp:180] [0.0] Iteration 2200, lr = 0.0051687, m = 0.9, lrm = 0.051687, wd = 0.0005, gs = 1
I0819 14:26:32.768738 12180 solver.cpp:333]     [0.0] Iteration 2300 (264.201 iter/s, 0.378499s/100 iter), 2.5/21.3ep, loss = 0.0458069
I0819 14:26:32.768821 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0458069 (* 1 = 0.0458069 loss)
I0819 14:26:32.768837 12180 sgd_solver.cpp:180] [0.0] Iteration 2300, lr = 0.00513715, m = 0.9, lrm = 0.0513715, wd = 0.0005, gs = 1
I0819 14:26:33.147343 12180 solver.cpp:333]     [0.0] Iteration 2400 (264.152 iter/s, 0.37857s/100 iter), 2.6/21.3ep, loss = 0.000305653
I0819 14:26:33.147423 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000305653 (* 1 = 0.000305653 loss)
I0819 14:26:33.147435 12180 sgd_solver.cpp:180] [0.0] Iteration 2400, lr = 0.00510605, m = 0.9, lrm = 0.0510605, wd = 0.0005, gs = 1
I0819 14:26:33.518666 12180 solver.cpp:501] Iteration 2500, Testing net (#0)
I0819 14:26:33.772068 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:33.858614 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.971484
I0819 14:26:33.858675 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0847127 (* 1 = 0.0847127 loss)
I0819 14:26:33.858703 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.711336s
I0819 14:26:33.862964 12180 solver.cpp:333]     [0.0] Iteration 2500 (140.581 iter/s, 0.711336s/100 iter), 2.7/21.3ep, loss = 0.0294952
I0819 14:26:33.863178 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0294952 (* 1 = 0.0294952 loss)
I0819 14:26:33.863195 12180 sgd_solver.cpp:180] [0.0] Iteration 2500, lr = 0.00507538, m = 0.9, lrm = 0.0507538, wd = 0.0005, gs = 1
I0819 14:26:34.237495 12180 solver.cpp:333]     [0.0] Iteration 2600 (267.034 iter/s, 0.374484s/100 iter), 2.8/21.3ep, loss = 0.0422363
I0819 14:26:34.237598 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0422363 (* 1 = 0.0422363 loss)
I0819 14:26:34.237634 12180 sgd_solver.cpp:180] [0.0] Iteration 2600, lr = 0.00504514, m = 0.9, lrm = 0.0504514, wd = 0.0005, gs = 1
I0819 14:26:34.616835 12180 solver.cpp:333]     [0.0] Iteration 2700 (263.637 iter/s, 0.379309s/100 iter), 2.9/21.3ep, loss = 0.00308228
I0819 14:26:34.616940 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00308228 (* 1 = 0.00308228 loss)
I0819 14:26:34.616959 12180 sgd_solver.cpp:180] [0.0] Iteration 2700, lr = 0.00501532, m = 0.9, lrm = 0.0501532, wd = 0.0005, gs = 1
I0819 14:26:34.939465 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:35.003248 12180 solver.cpp:333]     [0.0] Iteration 2800 (258.811 iter/s, 0.386382s/100 iter), 3/21.3ep, loss = 6.10352e-05
I0819 14:26:35.003343 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:26:35.003365 12180 sgd_solver.cpp:180] [0.0] Iteration 2800, lr = 0.0049859, m = 0.9, lrm = 0.049859, wd = 0.0005, gs = 1
I0819 14:26:35.385121 12180 solver.cpp:333]     [0.0] Iteration 2900 (261.887 iter/s, 0.381843s/100 iter), 3.1/21.3ep, loss = 0.0159302
I0819 14:26:35.385216 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0159302 (* 1 = 0.0159302 loss)
I0819 14:26:35.385231 12180 sgd_solver.cpp:180] [0.0] Iteration 2900, lr = 0.00495689, m = 0.9, lrm = 0.0495689, wd = 0.0005, gs = 1
I0819 14:26:35.763062 12180 solver.cpp:501] Iteration 3000, Testing net (#0)
I0819 14:26:36.004958 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:36.104571 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.983032
I0819 14:26:36.104607 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0602388 (* 1 = 0.0602388 loss)
I0819 14:26:36.104666 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.719494s
I0819 14:26:36.109220 12180 solver.cpp:333]     [0.0] Iteration 3000 (138.987 iter/s, 0.719494s/100 iter), 3.2/21.3ep, loss = 0.00344086
I0819 14:26:36.109329 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00344086 (* 1 = 0.00344086 loss)
I0819 14:26:36.109360 12180 sgd_solver.cpp:180] [0.0] Iteration 3000, lr = 0.00492826, m = 0.9, lrm = 0.0492826, wd = 0.0005, gs = 1
I0819 14:26:36.488287 12180 solver.cpp:333]     [0.0] Iteration 3100 (263.83 iter/s, 0.379033s/100 iter), 3.3/21.3ep, loss = 0.0142212
I0819 14:26:36.488363 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0142212 (* 1 = 0.0142212 loss)
I0819 14:26:36.488376 12180 sgd_solver.cpp:180] [0.0] Iteration 3100, lr = 0.00490002, m = 0.9, lrm = 0.0490002, wd = 0.0005, gs = 1
I0819 14:26:36.871675 12180 solver.cpp:333]     [0.0] Iteration 3200 (260.857 iter/s, 0.383352s/100 iter), 3.4/21.3ep, loss = 0.0726318
I0819 14:26:36.871765 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0726318 (* 1 = 0.0726318 loss)
I0819 14:26:36.871778 12180 sgd_solver.cpp:180] [0.0] Iteration 3200, lr = 0.00487215, m = 0.9, lrm = 0.0487215, wd = 0.0005, gs = 1
I0819 14:26:37.262538 12180 solver.cpp:333]     [0.0] Iteration 3300 (255.872 iter/s, 0.390821s/100 iter), 3.5/21.3ep, loss = 0.0606079
I0819 14:26:37.262645 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0606079 (* 1 = 0.0606079 loss)
I0819 14:26:37.262689 12180 sgd_solver.cpp:180] [0.0] Iteration 3300, lr = 0.00484465, m = 0.9, lrm = 0.0484465, wd = 0.0005, gs = 1
I0819 14:26:37.661991 12180 solver.cpp:333]     [0.0] Iteration 3400 (250.363 iter/s, 0.399419s/100 iter), 3.6/21.3ep, loss = 0.0738525
I0819 14:26:37.662087 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0738525 (* 1 = 0.0738525 loss)
I0819 14:26:37.662102 12180 sgd_solver.cpp:180] [0.0] Iteration 3400, lr = 0.00481751, m = 0.9, lrm = 0.0481751, wd = 0.0005, gs = 1
I0819 14:26:38.071043 12180 solver.cpp:501] Iteration 3500, Testing net (#0)
I0819 14:26:38.304494 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:38.414863 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.98458
I0819 14:26:38.414906 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0620035 (* 1 = 0.0620035 loss)
I0819 14:26:38.414933 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.752913s
I0819 14:26:38.419159 12180 solver.cpp:333]     [0.0] Iteration 3500 (132.817 iter/s, 0.752913s/100 iter), 3.7/21.3ep, loss = 0.00721359
I0819 14:26:38.419271 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00721359 (* 1 = 0.00721359 loss)
I0819 14:26:38.419294 12180 sgd_solver.cpp:180] [0.0] Iteration 3500, lr = 0.00479072, m = 0.9, lrm = 0.0479072, wd = 0.0005, gs = 1
I0819 14:26:38.804241 12180 solver.cpp:333]     [0.0] Iteration 3600 (259.708 iter/s, 0.385048s/100 iter), 3.8/21.3ep, loss = 0.0017128
I0819 14:26:38.804404 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0017128 (* 1 = 0.0017128 loss)
I0819 14:26:38.804424 12180 sgd_solver.cpp:180] [0.0] Iteration 3600, lr = 0.00476428, m = 0.9, lrm = 0.0476428, wd = 0.0005, gs = 1
I0819 14:26:39.195200 12180 solver.cpp:333]     [0.0] Iteration 3700 (255.824 iter/s, 0.390894s/100 iter), 3.9/21.3ep, loss = 0.000611782
I0819 14:26:39.195310 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000611782 (* 1 = 0.000611782 loss)
I0819 14:26:39.195330 12180 sgd_solver.cpp:180] [0.0] Iteration 3700, lr = 0.00473817, m = 0.9, lrm = 0.0473817, wd = 0.0005, gs = 1
I0819 14:26:39.275179 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:39.588418 12180 solver.cpp:333]     [0.0] Iteration 3800 (254.338 iter/s, 0.393178s/100 iter), 4.1/21.3ep, loss = 0.0181732
I0819 14:26:39.588510 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0181732 (* 1 = 0.0181732 loss)
I0819 14:26:39.588526 12180 sgd_solver.cpp:180] [0.0] Iteration 3800, lr = 0.0047124, m = 0.9, lrm = 0.047124, wd = 0.0005, gs = 1
I0819 14:26:39.973237 12180 solver.cpp:333]     [0.0] Iteration 3900 (259.877 iter/s, 0.384798s/100 iter), 4.2/21.3ep, loss = 0.00190639
I0819 14:26:39.973321 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00190639 (* 1 = 0.00190639 loss)
I0819 14:26:39.973356 12180 sgd_solver.cpp:180] [0.0] Iteration 3900, lr = 0.00468695, m = 0.9, lrm = 0.0468695, wd = 0.0005, gs = 1
I0819 14:26:40.355121 12180 solver.cpp:501] Iteration 4000, Testing net (#0)
I0819 14:26:40.572141 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:40.698192 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.989976
I0819 14:26:40.698222 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0351509 (* 1 = 0.0351509 loss)
I0819 14:26:40.698256 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.724985s
I0819 14:26:40.702327 12180 solver.cpp:333]     [0.0] Iteration 4000 (137.934 iter/s, 0.724985s/100 iter), 4.3/21.3ep, loss = 0.000366688
I0819 14:26:40.702389 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000366688 (* 1 = 0.000366688 loss)
I0819 14:26:40.702417 12180 sgd_solver.cpp:180] [0.0] Iteration 4000, lr = 0.00466182, m = 0.9, lrm = 0.0466182, wd = 0.0005, gs = 1
I0819 14:26:41.102905 12180 solver.cpp:333]     [0.0] Iteration 4100 (249.671 iter/s, 0.400527s/100 iter), 4.4/21.3ep, loss = 0.00313187
I0819 14:26:41.103052 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00313187 (* 1 = 0.00313187 loss)
I0819 14:26:41.103081 12180 sgd_solver.cpp:180] [0.0] Iteration 4100, lr = 0.004637, m = 0.9, lrm = 0.04637, wd = 0.0005, gs = 1
I0819 14:26:41.513613 12180 solver.cpp:333]     [0.0] Iteration 4200 (243.506 iter/s, 0.410668s/100 iter), 4.5/21.3ep, loss = 0.00296211
I0819 14:26:41.513764 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00296211 (* 1 = 0.00296211 loss)
I0819 14:26:41.513788 12180 sgd_solver.cpp:180] [0.0] Iteration 4200, lr = 0.00461249, m = 0.9, lrm = 0.0461248, wd = 0.0005, gs = 1
I0819 14:26:41.919658 12180 solver.cpp:333]     [0.0] Iteration 4300 (246.325 iter/s, 0.405968s/100 iter), 4.6/21.3ep, loss = 0.0287476
I0819 14:26:41.919812 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0287476 (* 1 = 0.0287476 loss)
I0819 14:26:41.919832 12180 sgd_solver.cpp:180] [0.0] Iteration 4300, lr = 0.00458827, m = 0.9, lrm = 0.0458827, wd = 0.0005, gs = 1
I0819 14:26:42.304530 12180 solver.cpp:333]     [0.0] Iteration 4400 (259.831 iter/s, 0.384866s/100 iter), 4.7/21.3ep, loss = 0.000183225
I0819 14:26:42.304636 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:26:42.304654 12180 sgd_solver.cpp:180] [0.0] Iteration 4400, lr = 0.00456435, m = 0.9, lrm = 0.0456435, wd = 0.0005, gs = 1
I0819 14:26:42.688249 12180 solver.cpp:501] Iteration 4500, Testing net (#0)
I0819 14:26:42.894536 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:43.027454 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.989971
I0819 14:26:43.027487 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0380082 (* 1 = 0.0380082 loss)
I0819 14:26:43.027531 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.722957s
I0819 14:26:43.031596 12180 solver.cpp:333]     [0.0] Iteration 4500 (138.321 iter/s, 0.722957s/100 iter), 4.8/21.3ep, loss = 0.357666
I0819 14:26:43.031718 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.357666 (* 1 = 0.357666 loss)
I0819 14:26:43.031742 12180 sgd_solver.cpp:180] [0.0] Iteration 4500, lr = 0.00454073, m = 0.9, lrm = 0.0454072, wd = 0.0005, gs = 1
I0819 14:26:43.411060 12180 solver.cpp:333]     [0.0] Iteration 4600 (263.558 iter/s, 0.379423s/100 iter), 4.9/21.3ep, loss = 0
I0819 14:26:43.411164 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:26:43.411185 12180 sgd_solver.cpp:180] [0.0] Iteration 4600, lr = 0.00451738, m = 0.9, lrm = 0.0451738, wd = 0.0005, gs = 1
I0819 14:26:43.642176 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:43.803308 12180 solver.cpp:333]     [0.0] Iteration 4700 (254.968 iter/s, 0.392206s/100 iter), 5/21.3ep, loss = 0.00122356
I0819 14:26:43.803382 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00122356 (* 1 = 0.00122356 loss)
I0819 14:26:43.803405 12180 sgd_solver.cpp:180] [0.0] Iteration 4700, lr = 0.00449431, m = 0.9, lrm = 0.0449431, wd = 0.0005, gs = 1
I0819 14:26:44.195493 12180 solver.cpp:333]     [0.0] Iteration 4800 (255.012 iter/s, 0.392139s/100 iter), 5.1/21.3ep, loss = 0.000305414
I0819 14:26:44.195591 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000305414 (* 1 = 0.000305414 loss)
I0819 14:26:44.195611 12180 sgd_solver.cpp:180] [0.0] Iteration 4800, lr = 0.00447152, m = 0.9, lrm = 0.0447152, wd = 0.0005, gs = 1
I0819 14:26:44.582638 12180 solver.cpp:333]     [0.0] Iteration 4900 (258.326 iter/s, 0.387108s/100 iter), 5.2/21.3ep, loss = 0.000427723
I0819 14:26:44.582801 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000427723 (* 1 = 0.000427723 loss)
I0819 14:26:44.582828 12180 sgd_solver.cpp:180] [0.0] Iteration 4900, lr = 0.00444899, m = 0.9, lrm = 0.0444899, wd = 0.0005, gs = 1
I0819 14:26:44.971208 12180 solver.cpp:501] Iteration 5000, Testing net (#0)
I0819 14:26:45.163486 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:45.314141 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.989209
I0819 14:26:45.314182 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0298161 (* 1 = 0.0298161 loss)
I0819 14:26:45.314218 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.731553s
I0819 14:26:45.318676 12180 solver.cpp:333]     [0.0] Iteration 5000 (136.696 iter/s, 0.731553s/100 iter), 5.3/21.3ep, loss = 0.00227928
I0819 14:26:45.318773 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00227928 (* 1 = 0.00227928 loss)
I0819 14:26:45.318789 12180 sgd_solver.cpp:180] [0.0] Iteration 5000, lr = 0.00442673, m = 0.9, lrm = 0.0442673, wd = 0.0005, gs = 1
I0819 14:26:45.712577 12180 solver.cpp:333]     [0.0] Iteration 5100 (253.928 iter/s, 0.393812s/100 iter), 5.4/21.3ep, loss = 0.000244379
I0819 14:26:45.712747 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000244379 (* 1 = 0.000244379 loss)
I0819 14:26:45.712774 12180 sgd_solver.cpp:180] [0.0] Iteration 5100, lr = 0.00440472, m = 0.9, lrm = 0.0440472, wd = 0.0005, gs = 1
I0819 14:26:46.098834 12180 solver.cpp:333]     [0.0] Iteration 5200 (258.883 iter/s, 0.386274s/100 iter), 5.5/21.3ep, loss = 0.00012207
I0819 14:26:46.098973 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012207 (* 1 = 0.00012207 loss)
I0819 14:26:46.098989 12180 sgd_solver.cpp:180] [0.0] Iteration 5200, lr = 0.00438297, m = 0.9, lrm = 0.0438297, wd = 0.0005, gs = 1
I0819 14:26:46.494845 12180 solver.cpp:333]     [0.0] Iteration 5300 (252.542 iter/s, 0.395974s/100 iter), 5.7/21.3ep, loss = 0.00325394
I0819 14:26:46.494944 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00325394 (* 1 = 0.00325394 loss)
I0819 14:26:46.495009 12180 sgd_solver.cpp:180] [0.0] Iteration 5300, lr = 0.00436147, m = 0.9, lrm = 0.0436147, wd = 0.0005, gs = 1
I0819 14:26:46.878840 12180 solver.cpp:333]     [0.0] Iteration 5400 (260.441 iter/s, 0.383964s/100 iter), 5.8/21.3ep, loss = 0.000795364
I0819 14:26:46.878952 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000795364 (* 1 = 0.000795364 loss)
I0819 14:26:46.878974 12180 sgd_solver.cpp:180] [0.0] Iteration 5400, lr = 0.00434021, m = 0.9, lrm = 0.0434021, wd = 0.0005, gs = 1
I0819 14:26:47.253578 12180 solver.cpp:501] Iteration 5500, Testing net (#0)
I0819 14:26:47.432548 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:47.594924 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.993828
I0819 14:26:47.594957 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0290734 (* 1 = 0.0290734 loss)
I0819 14:26:47.594997 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.716128s
I0819 14:26:47.598889 12180 solver.cpp:333]     [0.0] Iteration 5500 (139.64 iter/s, 0.716128s/100 iter), 5.9/21.3ep, loss = 0.000734806
I0819 14:26:47.598986 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000734806 (* 1 = 0.000734806 loss)
I0819 14:26:47.599012 12180 sgd_solver.cpp:180] [0.0] Iteration 5500, lr = 0.00431919, m = 0.9, lrm = 0.0431919, wd = 0.0005, gs = 1
I0819 14:26:47.964301 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:47.984242 12180 solver.cpp:333]     [0.0] Iteration 5600 (259.546 iter/s, 0.385288s/100 iter), 6/21.3ep, loss = 0.00482178
I0819 14:26:47.984391 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00482178 (* 1 = 0.00482178 loss)
I0819 14:26:47.984429 12180 sgd_solver.cpp:180] [0.0] Iteration 5600, lr = 0.00429841, m = 0.9, lrm = 0.0429841, wd = 0.0005, gs = 1
I0819 14:26:48.372097 12180 solver.cpp:333]     [0.0] Iteration 5700 (257.849 iter/s, 0.387824s/100 iter), 6.1/21.3ep, loss = 0.000855923
I0819 14:26:48.372200 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000855923 (* 1 = 0.000855923 loss)
I0819 14:26:48.372222 12180 sgd_solver.cpp:180] [0.0] Iteration 5700, lr = 0.00427786, m = 0.9, lrm = 0.0427786, wd = 0.0005, gs = 1
I0819 14:26:48.762524 12180 solver.cpp:333]     [0.0] Iteration 5800 (256.143 iter/s, 0.390406s/100 iter), 6.2/21.3ep, loss = 0.000366688
I0819 14:26:48.762622 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000366688 (* 1 = 0.000366688 loss)
I0819 14:26:48.762637 12180 sgd_solver.cpp:180] [0.0] Iteration 5800, lr = 0.00425754, m = 0.9, lrm = 0.0425754, wd = 0.0005, gs = 1
I0819 14:26:49.154209 12180 solver.cpp:333]     [0.0] Iteration 5900 (255.327 iter/s, 0.391654s/100 iter), 6.3/21.3ep, loss = 0.0144882
I0819 14:26:49.154320 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0144882 (* 1 = 0.0144882 loss)
I0819 14:26:49.154371 12180 sgd_solver.cpp:180] [0.0] Iteration 5900, lr = 0.00423744, m = 0.9, lrm = 0.0423744, wd = 0.0005, gs = 1
I0819 14:26:49.542238 12180 solver.cpp:501] Iteration 6000, Testing net (#0)
I0819 14:26:49.707733 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:49.888193 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.989976
I0819 14:26:49.888240 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0260334 (* 1 = 0.0260334 loss)
I0819 14:26:49.888269 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.734035s
I0819 14:26:49.892340 12180 solver.cpp:333]     [0.0] Iteration 6000 (136.233 iter/s, 0.734035s/100 iter), 6.4/21.3ep, loss = 0.00012207
I0819 14:26:49.892462 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012207 (* 1 = 0.00012207 loss)
I0819 14:26:49.892477 12180 sgd_solver.cpp:180] [0.0] Iteration 6000, lr = 0.00421756, m = 0.9, lrm = 0.0421756, wd = 0.0005, gs = 1
I0819 14:26:50.281786 12180 solver.cpp:333]     [0.0] Iteration 6100 (256.803 iter/s, 0.389404s/100 iter), 6.5/21.3ep, loss = 0.000305653
I0819 14:26:50.281940 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000305653 (* 1 = 0.000305653 loss)
I0819 14:26:50.281965 12180 sgd_solver.cpp:180] [0.0] Iteration 6100, lr = 0.0041979, m = 0.9, lrm = 0.041979, wd = 0.0005, gs = 1
I0819 14:26:50.666496 12180 solver.cpp:333]     [0.0] Iteration 6200 (259.956 iter/s, 0.38468s/100 iter), 6.6/21.3ep, loss = 0.00270271
I0819 14:26:50.666636 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00270271 (* 1 = 0.00270271 loss)
I0819 14:26:50.666682 12180 sgd_solver.cpp:180] [0.0] Iteration 6200, lr = 0.00417845, m = 0.9, lrm = 0.0417845, wd = 0.0005, gs = 1
I0819 14:26:51.070538 12180 solver.cpp:333]     [0.0] Iteration 6300 (247.521 iter/s, 0.404006s/100 iter), 6.7/21.3ep, loss = 0.00104237
I0819 14:26:51.070636 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00104237 (* 1 = 0.00104237 loss)
I0819 14:26:51.070677 12180 sgd_solver.cpp:180] [0.0] Iteration 6300, lr = 0.00415921, m = 0.9, lrm = 0.0415921, wd = 0.0005, gs = 1
I0819 14:26:51.463475 12180 solver.cpp:333]     [0.0] Iteration 6400 (254.513 iter/s, 0.392907s/100 iter), 6.8/21.3ep, loss = 0.00371742
I0819 14:26:51.463573 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00371742 (* 1 = 0.00371742 loss)
I0819 14:26:51.463642 12180 sgd_solver.cpp:180] [0.0] Iteration 6400, lr = 0.00414017, m = 0.9, lrm = 0.0414017, wd = 0.0005, gs = 1
I0819 14:26:51.844105 12180 solver.cpp:501] Iteration 6500, Testing net (#0)
I0819 14:26:51.994576 12225 blocking_queue.cpp:40] Data layer prefetch queue empty
I0819 14:26:51.997326 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:52.187436 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.98689
I0819 14:26:52.187467 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0386341 (* 1 = 0.0386341 loss)
I0819 14:26:52.187510 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.724005s
I0819 14:26:52.191495 12180 solver.cpp:333]     [0.0] Iteration 6500 (138.121 iter/s, 0.724005s/100 iter), 6.9/21.3ep, loss = 0.0087204
I0819 14:26:52.191601 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0087204 (* 1 = 0.0087204 loss)
I0819 14:26:52.191622 12180 sgd_solver.cpp:180] [0.0] Iteration 6500, lr = 0.00412134, m = 0.9, lrm = 0.0412134, wd = 0.0005, gs = 1
I0819 14:26:52.322299 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:52.588073 12180 solver.cpp:333]     [0.0] Iteration 6600 (252.193 iter/s, 0.396522s/100 iter), 7/21.3ep, loss = 0.000611782
I0819 14:26:52.588196 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000611782 (* 1 = 0.000611782 loss)
I0819 14:26:52.588223 12180 sgd_solver.cpp:180] [0.0] Iteration 6600, lr = 0.0041027, m = 0.9, lrm = 0.041027, wd = 0.0005, gs = 1
I0819 14:26:52.991333 12180 solver.cpp:333]     [0.0] Iteration 6700 (247.991 iter/s, 0.40324s/100 iter), 7.1/21.3ep, loss = 0.00110149
I0819 14:26:52.991420 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00110149 (* 1 = 0.00110149 loss)
I0819 14:26:52.991436 12180 sgd_solver.cpp:180] [0.0] Iteration 6700, lr = 0.00408426, m = 0.9, lrm = 0.0408426, wd = 0.0005, gs = 1
I0819 14:26:53.389897 12180 solver.cpp:333]     [0.0] Iteration 6800 (250.916 iter/s, 0.39854s/100 iter), 7.3/21.3ep, loss = 6.10352e-05
I0819 14:26:53.389988 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:26:53.390013 12180 sgd_solver.cpp:180] [0.0] Iteration 6800, lr = 0.00406602, m = 0.9, lrm = 0.0406602, wd = 0.0005, gs = 1
I0819 14:26:53.784565 12180 solver.cpp:333]     [0.0] Iteration 6900 (253.402 iter/s, 0.39463s/100 iter), 7.4/21.3ep, loss = 0
I0819 14:26:53.784647 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:26:53.784677 12180 sgd_solver.cpp:180] [0.0] Iteration 6900, lr = 0.00404796, m = 0.9, lrm = 0.0404796, wd = 0.0005, gs = 1
I0819 14:26:54.168697 12180 solver.cpp:501] Iteration 7000, Testing net (#0)
I0819 14:26:54.309862 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:54.509017 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.993057
I0819 14:26:54.509055 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0355603 (* 1 = 0.0355603 loss)
I0819 14:26:54.509093 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.72449s
I0819 14:26:54.513242 12180 solver.cpp:333]     [0.0] Iteration 7000 (138.028 iter/s, 0.72449s/100 iter), 7.5/21.3ep, loss = 6.10352e-05
I0819 14:26:54.513398 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:26:54.513414 12180 sgd_solver.cpp:180] [0.0] Iteration 7000, lr = 0.00403009, m = 0.9, lrm = 0.0403009, wd = 0.0005, gs = 1
I0819 14:26:54.905464 12180 solver.cpp:333]     [0.0] Iteration 7100 (254.986 iter/s, 0.392179s/100 iter), 7.6/21.3ep, loss = 0
I0819 14:26:54.905551 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:26:54.905565 12180 sgd_solver.cpp:180] [0.0] Iteration 7100, lr = 0.0040124, m = 0.9, lrm = 0.040124, wd = 0.0005, gs = 1
I0819 14:26:55.298621 12180 solver.cpp:333]     [0.0] Iteration 7200 (254.368 iter/s, 0.393131s/100 iter), 7.7/21.3ep, loss = 0.000734329
I0819 14:26:55.298749 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000734329 (* 1 = 0.000734329 loss)
I0819 14:26:55.298769 12180 sgd_solver.cpp:180] [0.0] Iteration 7200, lr = 0.00399489, m = 0.9, lrm = 0.0399489, wd = 0.0005, gs = 1
I0819 14:26:55.693562 12180 solver.cpp:333]     [0.0] Iteration 7300 (253.234 iter/s, 0.394892s/100 iter), 7.8/21.3ep, loss = 0.0197296
I0819 14:26:55.693650 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0197296 (* 1 = 0.0197296 loss)
I0819 14:26:55.693665 12180 sgd_solver.cpp:180] [0.0] Iteration 7300, lr = 0.00397756, m = 0.9, lrm = 0.0397756, wd = 0.0005, gs = 1
I0819 14:26:56.084816 12180 solver.cpp:333]     [0.0] Iteration 7400 (255.604 iter/s, 0.39123s/100 iter), 7.9/21.3ep, loss = 0.0277405
I0819 14:26:56.084923 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0277405 (* 1 = 0.0277405 loss)
I0819 14:26:56.084939 12180 sgd_solver.cpp:180] [0.0] Iteration 7400, lr = 0.0039604, m = 0.9, lrm = 0.039604, wd = 0.0005, gs = 1
I0819 14:26:56.358763 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:56.475492 12180 solver.cpp:501] Iteration 7500, Testing net (#0)
I0819 14:26:56.604173 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:56.816341 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.993828
I0819 14:26:56.816376 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0310044 (* 1 = 0.0310044 loss)
I0819 14:26:56.816414 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.731554s
I0819 14:26:56.820881 12180 solver.cpp:333]     [0.0] Iteration 7500 (136.695 iter/s, 0.731554s/100 iter), 8/21.3ep, loss = 0
I0819 14:26:56.820950 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:26:56.820981 12180 sgd_solver.cpp:180] [0.0] Iteration 7500, lr = 0.00394342, m = 0.9, lrm = 0.0394342, wd = 0.0005, gs = 1
I0819 14:26:57.207087 12180 solver.cpp:333]     [0.0] Iteration 7600 (258.951 iter/s, 0.386174s/100 iter), 8.1/21.3ep, loss = 6.10352e-05
I0819 14:26:57.207180 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:26:57.207208 12180 sgd_solver.cpp:180] [0.0] Iteration 7600, lr = 0.0039266, m = 0.9, lrm = 0.039266, wd = 0.0005, gs = 1
I0819 14:26:57.592334 12180 solver.cpp:333]     [0.0] Iteration 7700 (259.598 iter/s, 0.385211s/100 iter), 8.2/21.3ep, loss = 0.000550747
I0819 14:26:57.592437 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000550747 (* 1 = 0.000550747 loss)
I0819 14:26:57.592453 12180 sgd_solver.cpp:180] [0.0] Iteration 7700, lr = 0.00390995, m = 0.9, lrm = 0.0390995, wd = 0.0005, gs = 1
I0819 14:26:57.978354 12180 solver.cpp:333]     [0.0] Iteration 7800 (259.068 iter/s, 0.385999s/100 iter), 8.3/21.3ep, loss = 0
I0819 14:26:57.978477 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:26:57.978518 12180 sgd_solver.cpp:180] [0.0] Iteration 7800, lr = 0.00389346, m = 0.9, lrm = 0.0389346, wd = 0.0005, gs = 1
I0819 14:26:58.365398 12180 solver.cpp:333]     [0.0] Iteration 7900 (258.386 iter/s, 0.387018s/100 iter), 8.4/21.3ep, loss = 0
I0819 14:26:58.365538 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:26:58.365553 12180 sgd_solver.cpp:180] [0.0] Iteration 7900, lr = 0.00387714, m = 0.9, lrm = 0.0387714, wd = 0.0005, gs = 1
I0819 14:26:58.748659 12180 solver.cpp:501] Iteration 8000, Testing net (#0)
I0819 14:26:58.862890 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:26:59.090896 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.991514
I0819 14:26:59.090925 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0295883 (* 1 = 0.0295883 loss)
I0819 14:26:59.090956 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.725529s
I0819 14:26:59.095119 12180 solver.cpp:333]     [0.0] Iteration 8000 (137.831 iter/s, 0.725529s/100 iter), 8.5/21.3ep, loss = 0.00171947
I0819 14:26:59.095240 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00171947 (* 1 = 0.00171947 loss)
I0819 14:26:59.095261 12180 sgd_solver.cpp:180] [0.0] Iteration 8000, lr = 0.00386097, m = 0.9, lrm = 0.0386097, wd = 0.0005, gs = 1
I0819 14:26:59.482128 12180 solver.cpp:333]     [0.0] Iteration 8100 (258.422 iter/s, 0.386963s/100 iter), 8.6/21.3ep, loss = 0.000488758
I0819 14:26:59.482215 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000488758 (* 1 = 0.000488758 loss)
I0819 14:26:59.482231 12180 sgd_solver.cpp:180] [0.0] Iteration 8100, lr = 0.00384496, m = 0.9, lrm = 0.0384496, wd = 0.0005, gs = 1
I0819 14:26:59.869750 12180 solver.cpp:333]     [0.0] Iteration 8200 (258.005 iter/s, 0.387589s/100 iter), 8.7/21.3ep, loss = 0.000671864
I0819 14:26:59.869866 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000671864 (* 1 = 0.000671864 loss)
I0819 14:26:59.869880 12180 sgd_solver.cpp:180] [0.0] Iteration 8200, lr = 0.00382911, m = 0.9, lrm = 0.0382911, wd = 0.0005, gs = 1
I0819 14:27:00.257104 12180 solver.cpp:333]     [0.0] Iteration 8300 (258.185 iter/s, 0.387319s/100 iter), 8.9/21.3ep, loss = 0
I0819 14:27:00.257232 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:00.257248 12180 sgd_solver.cpp:180] [0.0] Iteration 8300, lr = 0.00381341, m = 0.9, lrm = 0.038134, wd = 0.0005, gs = 1
I0819 14:27:00.655155 12180 solver.cpp:333]     [0.0] Iteration 8400 (251.247 iter/s, 0.398015s/100 iter), 9/21.3ep, loss = 0.00271416
I0819 14:27:00.655246 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00271416 (* 1 = 0.00271416 loss)
I0819 14:27:00.655261 12180 sgd_solver.cpp:180] [0.0] Iteration 8400, lr = 0.00379785, m = 0.9, lrm = 0.0379785, wd = 0.0005, gs = 1
I0819 14:27:00.691988 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:01.037777 12180 solver.cpp:501] Iteration 8500, Testing net (#0)
I0819 14:27:01.139003 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:01.378856 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.991514
I0819 14:27:01.378890 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0338354 (* 1 = 0.0338354 loss)
I0819 14:27:01.378931 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.723752s
I0819 14:27:01.383092 12180 solver.cpp:333]     [0.0] Iteration 8500 (138.169 iter/s, 0.723752s/100 iter), 9.1/21.3ep, loss = 0.00012207
I0819 14:27:01.383271 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012207 (* 1 = 0.00012207 loss)
I0819 14:27:01.383285 12180 sgd_solver.cpp:180] [0.0] Iteration 8500, lr = 0.00378244, m = 0.9, lrm = 0.0378244, wd = 0.0005, gs = 1
I0819 14:27:01.770723 12180 solver.cpp:333]     [0.0] Iteration 8600 (258.006 iter/s, 0.387588s/100 iter), 9.2/21.3ep, loss = 6.10352e-05
I0819 14:27:01.770825 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:01.770874 12180 sgd_solver.cpp:180] [0.0] Iteration 8600, lr = 0.00376718, m = 0.9, lrm = 0.0376718, wd = 0.0005, gs = 1
I0819 14:27:02.161785 12180 solver.cpp:333]     [0.0] Iteration 8700 (255.733 iter/s, 0.391033s/100 iter), 9.3/21.3ep, loss = 0.00344086
I0819 14:27:02.161886 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00344086 (* 1 = 0.00344086 loss)
I0819 14:27:02.161898 12180 sgd_solver.cpp:180] [0.0] Iteration 8700, lr = 0.00375206, m = 0.9, lrm = 0.0375206, wd = 0.0005, gs = 1
I0819 14:27:02.553359 12180 solver.cpp:333]     [0.0] Iteration 8800 (255.407 iter/s, 0.391533s/100 iter), 9.4/21.3ep, loss = 0.00012219
I0819 14:27:02.553467 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012219 (* 1 = 0.00012219 loss)
I0819 14:27:02.553489 12180 sgd_solver.cpp:180] [0.0] Iteration 8800, lr = 0.00373708, m = 0.9, lrm = 0.0373708, wd = 0.0005, gs = 1
I0819 14:27:02.945791 12180 solver.cpp:333]     [0.0] Iteration 8900 (254.861 iter/s, 0.392371s/100 iter), 9.5/21.3ep, loss = 0.0030899
I0819 14:27:02.945888 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0030899 (* 1 = 0.0030899 loss)
I0819 14:27:02.945902 12180 sgd_solver.cpp:180] [0.0] Iteration 8900, lr = 0.00372224, m = 0.9, lrm = 0.0372224, wd = 0.0005, gs = 1
I0819 14:27:03.326556 12180 solver.cpp:501] Iteration 9000, Testing net (#0)
I0819 14:27:03.414835 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:03.667979 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.992285
I0819 14:27:03.668007 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.031059 (* 1 = 0.031059 loss)
I0819 14:27:03.668042 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.722252s
I0819 14:27:03.671975 12180 solver.cpp:333]     [0.0] Iteration 9000 (138.456 iter/s, 0.722252s/100 iter), 9.6/21.3ep, loss = 0.201294
I0819 14:27:03.672065 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.201294 (* 1 = 0.201294 loss)
I0819 14:27:03.672085 12180 sgd_solver.cpp:180] [0.0] Iteration 9000, lr = 0.00370754, m = 0.9, lrm = 0.0370754, wd = 0.0005, gs = 1
I0819 14:27:04.061833 12180 solver.cpp:333]     [0.0] Iteration 9100 (256.53 iter/s, 0.389818s/100 iter), 9.7/21.3ep, loss = 0.204102
I0819 14:27:04.061956 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.204102 (* 1 = 0.204102 loss)
I0819 14:27:04.062000 12180 sgd_solver.cpp:180] [0.0] Iteration 9100, lr = 0.00369297, m = 0.9, lrm = 0.0369297, wd = 0.0005, gs = 1
I0819 14:27:04.448829 12180 solver.cpp:333]     [0.0] Iteration 9200 (258.421 iter/s, 0.386966s/100 iter), 9.8/21.3ep, loss = 0
I0819 14:27:04.448938 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:04.448961 12180 sgd_solver.cpp:180] [0.0] Iteration 9200, lr = 0.00367854, m = 0.9, lrm = 0.0367854, wd = 0.0005, gs = 1
I0819 14:27:04.842108 12180 solver.cpp:333]     [0.0] Iteration 9300 (254.293 iter/s, 0.393248s/100 iter), 9.9/21.3ep, loss = 0
I0819 14:27:04.842198 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:04.842217 12180 sgd_solver.cpp:180] [0.0] Iteration 9300, lr = 0.00366423, m = 0.9, lrm = 0.0366423, wd = 0.0005, gs = 1
I0819 14:27:05.016857 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:05.229874 12180 solver.cpp:333]     [0.0] Iteration 9400 (257.909 iter/s, 0.387734s/100 iter), 10/21.3ep, loss = 0.000734329
I0819 14:27:05.229962 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000734329 (* 1 = 0.000734329 loss)
I0819 14:27:05.229982 12180 sgd_solver.cpp:180] [0.0] Iteration 9400, lr = 0.00365006, m = 0.9, lrm = 0.0365006, wd = 0.0005, gs = 1
I0819 14:27:05.620606 12180 solver.cpp:501] Iteration 9500, Testing net (#0)
I0819 14:27:05.695765 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:05.961028 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.98689
I0819 14:27:05.961060 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0439001 (* 1 = 0.0439001 loss)
I0819 14:27:05.961089 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.731181s
I0819 14:27:05.965332 12180 solver.cpp:333]     [0.0] Iteration 9500 (136.765 iter/s, 0.731181s/100 iter), 10.1/21.3ep, loss = 0
I0819 14:27:05.965427 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:05.965452 12180 sgd_solver.cpp:180] [0.0] Iteration 9500, lr = 0.00363601, m = 0.9, lrm = 0.0363601, wd = 0.0005, gs = 1
I0819 14:27:06.356194 12180 solver.cpp:333]     [0.0] Iteration 9600 (255.871 iter/s, 0.390822s/100 iter), 10.2/21.3ep, loss = 6.10352e-05
I0819 14:27:06.356318 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:06.356350 12180 sgd_solver.cpp:180] [0.0] Iteration 9600, lr = 0.00362209, m = 0.9, lrm = 0.0362209, wd = 0.0005, gs = 1
I0819 14:27:06.743566 12180 solver.cpp:333]     [0.0] Iteration 9700 (258.18 iter/s, 0.387326s/100 iter), 10.3/21.3ep, loss = 0.000305653
I0819 14:27:06.743641 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000305653 (* 1 = 0.000305653 loss)
I0819 14:27:06.743657 12180 sgd_solver.cpp:180] [0.0] Iteration 9700, lr = 0.00360829, m = 0.9, lrm = 0.0360829, wd = 0.0005, gs = 1
I0819 14:27:07.134608 12180 solver.cpp:333]     [0.0] Iteration 9800 (255.739 iter/s, 0.391023s/100 iter), 10.5/21.3ep, loss = 0.000855923
I0819 14:27:07.134759 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000855923 (* 1 = 0.000855923 loss)
I0819 14:27:07.134781 12180 sgd_solver.cpp:180] [0.0] Iteration 9800, lr = 0.00359461, m = 0.9, lrm = 0.0359461, wd = 0.0005, gs = 1
I0819 14:27:07.526099 12180 solver.cpp:333]     [0.0] Iteration 9900 (255.467 iter/s, 0.39144s/100 iter), 10.6/21.3ep, loss = 0
I0819 14:27:07.526212 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:07.526252 12180 sgd_solver.cpp:180] [0.0] Iteration 9900, lr = 0.00358106, m = 0.9, lrm = 0.0358106, wd = 0.0005, gs = 1
I0819 14:27:07.906834 12180 solver.cpp:501] Iteration 10000, Testing net (#0)
I0819 14:27:07.972169 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:08.248234 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.987661
I0819 14:27:08.248268 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0476012 (* 1 = 0.0476012 loss)
I0819 14:27:08.248308 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.722188s
I0819 14:27:08.252460 12180 solver.cpp:333]     [0.0] Iteration 10000 (138.468 iter/s, 0.722188s/100 iter), 10.7/21.3ep, loss = 0.00012219
I0819 14:27:08.252575 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012219 (* 1 = 0.00012219 loss)
I0819 14:27:08.252589 12180 sgd_solver.cpp:180] [0.0] Iteration 10000, lr = 0.00356762, m = 0.9, lrm = 0.0356762, wd = 0.0005, gs = 1
I0819 14:27:08.641191 12180 solver.cpp:333]     [0.0] Iteration 10100 (257.263 iter/s, 0.388707s/100 iter), 10.8/21.3ep, loss = 0.0248413
I0819 14:27:08.641288 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0248413 (* 1 = 0.0248413 loss)
I0819 14:27:08.641307 12180 sgd_solver.cpp:180] [0.0] Iteration 10100, lr = 0.0035543, m = 0.9, lrm = 0.035543, wd = 0.0005, gs = 1
I0819 14:27:09.031965 12180 solver.cpp:333]     [0.0] Iteration 10200 (255.974 iter/s, 0.390664s/100 iter), 10.9/21.3ep, loss = 0
I0819 14:27:09.032181 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:09.032232 12180 sgd_solver.cpp:180] [0.0] Iteration 10200, lr = 0.0035411, m = 0.9, lrm = 0.035411, wd = 0.0005, gs = 1
I0819 14:27:09.378978 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:09.448051 12180 solver.cpp:333]     [0.0] Iteration 10300 (240.311 iter/s, 0.416127s/100 iter), 11/21.3ep, loss = 0
I0819 14:27:09.448151 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:09.448179 12180 sgd_solver.cpp:180] [0.0] Iteration 10300, lr = 0.00352801, m = 0.9, lrm = 0.03528, wd = 0.0005, gs = 1
I0819 14:27:09.858690 12180 solver.cpp:333]     [0.0] Iteration 10400 (243.553 iter/s, 0.410588s/100 iter), 11.1/21.3ep, loss = 0.00159168
I0819 14:27:09.858820 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00159168 (* 1 = 0.00159168 loss)
I0819 14:27:09.858844 12180 sgd_solver.cpp:180] [0.0] Iteration 10400, lr = 0.00351503, m = 0.9, lrm = 0.0351503, wd = 0.0005, gs = 1
I0819 14:27:10.248284 12180 solver.cpp:501] Iteration 10500, Testing net (#0)
I0819 14:27:10.301270 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:10.592121 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.989976
I0819 14:27:10.592262 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0490732 (* 1 = 0.0490732 loss)
I0819 14:27:10.592314 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.733582s
I0819 14:27:10.596913 12180 solver.cpp:333]     [0.0] Iteration 10500 (136.317 iter/s, 0.733582s/100 iter), 11.2/21.3ep, loss = 0.00503159
I0819 14:27:10.597009 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00503159 (* 1 = 0.00503159 loss)
I0819 14:27:10.597033 12180 sgd_solver.cpp:180] [0.0] Iteration 10500, lr = 0.00350216, m = 0.9, lrm = 0.0350216, wd = 0.0005, gs = 1
I0819 14:27:10.991122 12180 solver.cpp:333]     [0.0] Iteration 10600 (253.708 iter/s, 0.394154s/100 iter), 11.3/21.3ep, loss = 0.00012219
I0819 14:27:10.991216 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012219 (* 1 = 0.00012219 loss)
I0819 14:27:10.991238 12180 sgd_solver.cpp:180] [0.0] Iteration 10600, lr = 0.0034894, m = 0.9, lrm = 0.034894, wd = 0.0005, gs = 1
I0819 14:27:11.382978 12180 solver.cpp:333]     [0.0] Iteration 10700 (255.209 iter/s, 0.391836s/100 iter), 11.4/21.3ep, loss = 0.0326843
I0819 14:27:11.383086 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0326843 (* 1 = 0.0326843 loss)
I0819 14:27:11.383101 12180 sgd_solver.cpp:180] [0.0] Iteration 10700, lr = 0.00347675, m = 0.9, lrm = 0.0347675, wd = 0.0005, gs = 1
I0819 14:27:11.769176 12180 solver.cpp:333]     [0.0] Iteration 10800 (258.96 iter/s, 0.386161s/100 iter), 11.5/21.3ep, loss = 0.0303955
I0819 14:27:11.769268 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0303955 (* 1 = 0.0303955 loss)
I0819 14:27:11.769285 12180 sgd_solver.cpp:180] [0.0] Iteration 10800, lr = 0.00346421, m = 0.9, lrm = 0.0346421, wd = 0.0005, gs = 1
I0819 14:27:12.156428 12180 solver.cpp:333]     [0.0] Iteration 10900 (258.253 iter/s, 0.387217s/100 iter), 11.6/21.3ep, loss = 0.0141907
I0819 14:27:12.156536 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0141907 (* 1 = 0.0141907 loss)
I0819 14:27:12.156559 12180 sgd_solver.cpp:180] [0.0] Iteration 10900, lr = 0.00345177, m = 0.9, lrm = 0.0345177, wd = 0.0005, gs = 1
I0819 14:27:12.551945 12180 solver.cpp:501] Iteration 11000, Testing net (#0)
I0819 14:27:12.591337 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:12.897469 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.993057
I0819 14:27:12.897501 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0169954 (* 1 = 0.0169954 loss)
I0819 14:27:12.897538 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.741069s
I0819 14:27:12.901711 12180 solver.cpp:333]     [0.0] Iteration 11000 (134.94 iter/s, 0.741069s/100 iter), 11.7/21.3ep, loss = 6.10352e-05
I0819 14:27:12.901855 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:12.901872 12180 sgd_solver.cpp:180] [0.0] Iteration 11000, lr = 0.00343943, m = 0.9, lrm = 0.0343943, wd = 0.0005, gs = 1
I0819 14:27:13.294100 12180 solver.cpp:333]     [0.0] Iteration 11100 (254.879 iter/s, 0.392343s/100 iter), 11.8/21.3ep, loss = 0.000244379
I0819 14:27:13.294205 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000244379 (* 1 = 0.000244379 loss)
I0819 14:27:13.294224 12180 sgd_solver.cpp:180] [0.0] Iteration 11100, lr = 0.0034272, m = 0.9, lrm = 0.034272, wd = 0.0005, gs = 1
I0819 14:27:13.687636 12180 solver.cpp:333]     [0.0] Iteration 11200 (254.121 iter/s, 0.393513s/100 iter), 11.9/21.3ep, loss = 6.10352e-05
I0819 14:27:13.687731 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:13.687752 12180 sgd_solver.cpp:180] [0.0] Iteration 11200, lr = 0.00341507, m = 0.9, lrm = 0.0341507, wd = 0.0005, gs = 1
I0819 14:27:13.767225 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:14.082840 12180 solver.cpp:333]     [0.0] Iteration 11300 (253.052 iter/s, 0.395175s/100 iter), 12.1/21.3ep, loss = 0.0273438
I0819 14:27:14.082963 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0273438 (* 1 = 0.0273438 loss)
I0819 14:27:14.082983 12180 sgd_solver.cpp:180] [0.0] Iteration 11300, lr = 0.00340304, m = 0.9, lrm = 0.0340304, wd = 0.0005, gs = 1
I0819 14:27:14.470371 12180 solver.cpp:333]     [0.0] Iteration 11400 (258.067 iter/s, 0.387496s/100 iter), 12.2/21.3ep, loss = 0
I0819 14:27:14.470458 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:14.470472 12180 sgd_solver.cpp:180] [0.0] Iteration 11400, lr = 0.0033911, m = 0.9, lrm = 0.033911, wd = 0.0005, gs = 1
I0819 14:27:14.855865 12180 solver.cpp:501] Iteration 11500, Testing net (#0)
I0819 14:27:14.880189 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:15.204100 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.99229
I0819 14:27:15.204133 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0275124 (* 1 = 0.0275124 loss)
I0819 14:27:15.204174 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.733757s
I0819 14:27:15.208448 12180 solver.cpp:333]     [0.0] Iteration 11500 (136.285 iter/s, 0.733757s/100 iter), 12.3/21.3ep, loss = 0
I0819 14:27:15.208532 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:15.208549 12180 sgd_solver.cpp:180] [0.0] Iteration 11500, lr = 0.00337927, m = 0.9, lrm = 0.0337927, wd = 0.0005, gs = 1
I0819 14:27:15.596454 12180 solver.cpp:333]     [0.0] Iteration 11600 (257.752 iter/s, 0.38797s/100 iter), 12.4/21.3ep, loss = 0.0008564
I0819 14:27:15.596580 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0008564 (* 1 = 0.0008564 loss)
I0819 14:27:15.596594 12180 sgd_solver.cpp:180] [0.0] Iteration 11600, lr = 0.00336753, m = 0.9, lrm = 0.0336753, wd = 0.0005, gs = 1
I0819 14:27:15.997716 12180 solver.cpp:333]     [0.0] Iteration 11700 (249.234 iter/s, 0.40123s/100 iter), 12.5/21.3ep, loss = 6.10352e-05
I0819 14:27:15.997829 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:15.997881 12180 sgd_solver.cpp:180] [0.0] Iteration 11700, lr = 0.00335588, m = 0.9, lrm = 0.0335588, wd = 0.0005, gs = 1
I0819 14:27:16.396939 12180 solver.cpp:333]     [0.0] Iteration 11800 (250.512 iter/s, 0.399182s/100 iter), 12.6/21.3ep, loss = 0.00378036
I0819 14:27:16.397038 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00378036 (* 1 = 0.00378036 loss)
I0819 14:27:16.397061 12180 sgd_solver.cpp:180] [0.0] Iteration 11800, lr = 0.00334433, m = 0.9, lrm = 0.0334433, wd = 0.0005, gs = 1
I0819 14:27:16.805059 12180 solver.cpp:333]     [0.0] Iteration 11900 (245.048 iter/s, 0.408083s/100 iter), 12.7/21.3ep, loss = 0
I0819 14:27:16.805160 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:16.805238 12180 sgd_solver.cpp:180] [0.0] Iteration 11900, lr = 0.00333287, m = 0.9, lrm = 0.0333287, wd = 0.0005, gs = 1
I0819 14:27:17.198304 12180 solver.cpp:501] Iteration 12000, Testing net (#0)
I0819 14:27:17.209579 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:17.530982 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:17.541715 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.98458
I0819 14:27:17.541761 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0551621 (* 1 = 0.0551621 loss)
I0819 14:27:17.541786 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.736701s
I0819 14:27:17.546231 12180 solver.cpp:333]     [0.0] Iteration 12000 (135.74 iter/s, 0.736701s/100 iter), 12.8/21.3ep, loss = 0.0225525
I0819 14:27:17.546320 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0225525 (* 1 = 0.0225525 loss)
I0819 14:27:17.546344 12180 sgd_solver.cpp:180] [0.0] Iteration 12000, lr = 0.0033215, m = 0.9, lrm = 0.033215, wd = 0.0005, gs = 1
I0819 14:27:17.933728 12180 solver.cpp:333]     [0.0] Iteration 12100 (258.091 iter/s, 0.38746s/100 iter), 12.9/21.3ep, loss = 0
I0819 14:27:17.933846 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:17.933861 12180 sgd_solver.cpp:180] [0.0] Iteration 12100, lr = 0.00331022, m = 0.9, lrm = 0.0331022, wd = 0.0005, gs = 1
I0819 14:27:18.163508 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:18.325758 12180 solver.cpp:333]     [0.0] Iteration 12200 (255.103 iter/s, 0.391998s/100 iter), 13/21.3ep, loss = 0.000183225
I0819 14:27:18.325863 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:18.325882 12180 sgd_solver.cpp:180] [0.0] Iteration 12200, lr = 0.00329903, m = 0.9, lrm = 0.0329903, wd = 0.0005, gs = 1
I0819 14:27:18.715947 12180 solver.cpp:333]     [0.0] Iteration 12300 (256.309 iter/s, 0.390153s/100 iter), 13.1/21.3ep, loss = 0.00012207
I0819 14:27:18.716033 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012207 (* 1 = 0.00012207 loss)
I0819 14:27:18.716050 12180 sgd_solver.cpp:180] [0.0] Iteration 12300, lr = 0.00328793, m = 0.9, lrm = 0.0328793, wd = 0.0005, gs = 1
I0819 14:27:19.105150 12180 solver.cpp:333]     [0.0] Iteration 12400 (256.959 iter/s, 0.389167s/100 iter), 13.2/21.3ep, loss = 0.0015974
I0819 14:27:19.105255 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0015974 (* 1 = 0.0015974 loss)
I0819 14:27:19.105278 12180 sgd_solver.cpp:180] [0.0] Iteration 12400, lr = 0.00327692, m = 0.9, lrm = 0.0327691, wd = 0.0005, gs = 1
I0819 14:27:19.493669 12180 solver.cpp:501] Iteration 12500, Testing net (#0)
I0819 14:27:19.812252 12176 blocking_queue.cpp:40] Data layer prefetch queue empty
I0819 14:27:19.812331 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:19.834314 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.986123
I0819 14:27:19.834334 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0434638 (* 1 = 0.0434638 loss)
I0819 14:27:19.834391 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.729201s
I0819 14:27:19.838479 12180 solver.cpp:333]     [0.0] Iteration 12500 (137.136 iter/s, 0.729201s/100 iter), 13.3/21.3ep, loss = 0.000183225
I0819 14:27:19.838559 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:19.838574 12180 sgd_solver.cpp:180] [0.0] Iteration 12500, lr = 0.00326599, m = 0.9, lrm = 0.0326599, wd = 0.0005, gs = 1
I0819 14:27:20.234576 12180 solver.cpp:333]     [0.0] Iteration 12600 (252.492 iter/s, 0.396052s/100 iter), 13.4/21.3ep, loss = 0
I0819 14:27:20.234731 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:20.234755 12180 sgd_solver.cpp:180] [0.0] Iteration 12600, lr = 0.00325514, m = 0.9, lrm = 0.0325514, wd = 0.0005, gs = 1
I0819 14:27:20.628679 12180 solver.cpp:333]     [0.0] Iteration 12700 (253.765 iter/s, 0.394065s/100 iter), 13.5/21.3ep, loss = 0.000183225
I0819 14:27:20.628859 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:20.628877 12180 sgd_solver.cpp:180] [0.0] Iteration 12700, lr = 0.00324438, m = 0.9, lrm = 0.0324438, wd = 0.0005, gs = 1
I0819 14:27:21.021441 12180 solver.cpp:333]     [0.0] Iteration 12800 (254.622 iter/s, 0.392739s/100 iter), 13.7/21.3ep, loss = 0.00171375
I0819 14:27:21.021562 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00171375 (* 1 = 0.00171375 loss)
I0819 14:27:21.021598 12180 sgd_solver.cpp:180] [0.0] Iteration 12800, lr = 0.0032337, m = 0.9, lrm = 0.032337, wd = 0.0005, gs = 1
I0819 14:27:21.408782 12180 solver.cpp:333]     [0.0] Iteration 12900 (258.194 iter/s, 0.387305s/100 iter), 13.8/21.3ep, loss = 0.000366688
I0819 14:27:21.408885 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000366688 (* 1 = 0.000366688 loss)
I0819 14:27:21.408906 12180 sgd_solver.cpp:180] [0.0] Iteration 12900, lr = 0.00322311, m = 0.9, lrm = 0.0322311, wd = 0.0005, gs = 1
I0819 14:27:21.800763 12180 solver.cpp:501] Iteration 13000, Testing net (#0)
I0819 14:27:22.109992 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:22.141007 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.984575
I0819 14:27:22.141039 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0443439 (* 1 = 0.0443439 loss)
I0819 14:27:22.141146 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.732302s
I0819 14:27:22.145115 12180 solver.cpp:333]     [0.0] Iteration 13000 (136.556 iter/s, 0.732302s/100 iter), 13.9/21.3ep, loss = 0.00271416
I0819 14:27:22.145195 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00271416 (* 1 = 0.00271416 loss)
I0819 14:27:22.145211 12180 sgd_solver.cpp:180] [0.0] Iteration 13000, lr = 0.00321259, m = 0.9, lrm = 0.0321259, wd = 0.0005, gs = 1
I0819 14:27:22.510828 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:22.530239 12180 solver.cpp:333]     [0.0] Iteration 13100 (259.687 iter/s, 0.385079s/100 iter), 14/21.3ep, loss = 0.00246048
I0819 14:27:22.530345 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00246048 (* 1 = 0.00246048 loss)
I0819 14:27:22.530367 12180 sgd_solver.cpp:180] [0.0] Iteration 13100, lr = 0.00320215, m = 0.9, lrm = 0.0320215, wd = 0.0005, gs = 1
I0819 14:27:22.929361 12180 solver.cpp:333]     [0.0] Iteration 13200 (250.568 iter/s, 0.399094s/100 iter), 14.1/21.3ep, loss = 0.000489235
I0819 14:27:22.929467 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000489235 (* 1 = 0.000489235 loss)
I0819 14:27:22.929481 12180 sgd_solver.cpp:180] [0.0] Iteration 13200, lr = 0.0031918, m = 0.9, lrm = 0.031918, wd = 0.0005, gs = 1
I0819 14:27:23.326866 12180 solver.cpp:333]     [0.0] Iteration 13300 (251.597 iter/s, 0.397462s/100 iter), 14.2/21.3ep, loss = 6.10352e-05
I0819 14:27:23.327001 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:23.327023 12180 sgd_solver.cpp:180] [0.0] Iteration 13300, lr = 0.00318152, m = 0.9, lrm = 0.0318152, wd = 0.0005, gs = 1
I0819 14:27:23.726469 12180 solver.cpp:333]     [0.0] Iteration 13400 (250.268 iter/s, 0.399572s/100 iter), 14.3/21.3ep, loss = 0.00177479
I0819 14:27:23.726600 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00177479 (* 1 = 0.00177479 loss)
I0819 14:27:23.726621 12180 sgd_solver.cpp:180] [0.0] Iteration 13400, lr = 0.00317131, m = 0.9, lrm = 0.0317131, wd = 0.0005, gs = 1
I0819 14:27:24.119738 12180 solver.cpp:501] Iteration 13500, Testing net (#0)
I0819 14:27:24.413442 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:24.459936 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.985347
I0819 14:27:24.459985 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0556239 (* 1 = 0.0556239 loss)
I0819 14:27:24.460084 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.733583s
I0819 14:27:24.464371 12180 solver.cpp:333]     [0.0] Iteration 13500 (136.317 iter/s, 0.733583s/100 iter), 14.4/21.3ep, loss = 6.10352e-05
I0819 14:27:24.464541 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:24.464561 12180 sgd_solver.cpp:180] [0.0] Iteration 13500, lr = 0.00316119, m = 0.9, lrm = 0.0316119, wd = 0.0005, gs = 1
I0819 14:27:24.852434 12180 solver.cpp:333]     [0.0] Iteration 13600 (257.729 iter/s, 0.388005s/100 iter), 14.5/21.3ep, loss = 0.000366688
I0819 14:27:24.852527 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000366688 (* 1 = 0.000366688 loss)
I0819 14:27:24.852540 12180 sgd_solver.cpp:180] [0.0] Iteration 13600, lr = 0.00315114, m = 0.9, lrm = 0.0315114, wd = 0.0005, gs = 1
I0819 14:27:25.244174 12180 solver.cpp:333]     [0.0] Iteration 13700 (255.285 iter/s, 0.391719s/100 iter), 14.6/21.3ep, loss = 0.00476837
I0819 14:27:25.244303 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00476837 (* 1 = 0.00476837 loss)
I0819 14:27:25.244325 12180 sgd_solver.cpp:180] [0.0] Iteration 13700, lr = 0.00314116, m = 0.9, lrm = 0.0314116, wd = 0.0005, gs = 1
I0819 14:27:25.637338 12180 solver.cpp:333]     [0.0] Iteration 13800 (254.368 iter/s, 0.393131s/100 iter), 14.7/21.3ep, loss = 0.00390625
I0819 14:27:25.637475 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00390625 (* 1 = 0.00390625 loss)
I0819 14:27:25.637491 12180 sgd_solver.cpp:180] [0.0] Iteration 13800, lr = 0.00313126, m = 0.9, lrm = 0.0313125, wd = 0.0005, gs = 1
I0819 14:27:26.029569 12180 solver.cpp:333]     [0.0] Iteration 13900 (254.991 iter/s, 0.392171s/100 iter), 14.8/21.3ep, loss = 0.000183225
I0819 14:27:26.029652 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:26.029667 12180 sgd_solver.cpp:180] [0.0] Iteration 13900, lr = 0.00312142, m = 0.9, lrm = 0.0312142, wd = 0.0005, gs = 1
I0819 14:27:26.419548 12180 solver.cpp:501] Iteration 14000, Testing net (#0)
I0819 14:27:26.700500 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:26.759594 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.987666
I0819 14:27:26.759631 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0496093 (* 1 = 0.0496093 loss)
I0819 14:27:26.759672 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.730102s
I0819 14:27:26.763726 12180 solver.cpp:333]     [0.0] Iteration 14000 (136.967 iter/s, 0.730102s/100 iter), 14.9/21.3ep, loss = 0.000366688
I0819 14:27:26.763816 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000366688 (* 1 = 0.000366688 loss)
I0819 14:27:26.763836 12180 sgd_solver.cpp:180] [0.0] Iteration 14000, lr = 0.00311166, m = 0.9, lrm = 0.0311166, wd = 0.0005, gs = 1
I0819 14:27:26.896312 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:27.151182 12180 solver.cpp:333]     [0.0] Iteration 14100 (258.12 iter/s, 0.387416s/100 iter), 15/21.3ep, loss = 0.000427961
I0819 14:27:27.151305 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000427961 (* 1 = 0.000427961 loss)
I0819 14:27:27.151338 12180 sgd_solver.cpp:180] [0.0] Iteration 14100, lr = 0.00310198, m = 0.9, lrm = 0.0310198, wd = 0.0005, gs = 1
I0819 14:27:27.542544 12180 solver.cpp:333]     [0.0] Iteration 14200 (255.548 iter/s, 0.391316s/100 iter), 15.1/21.3ep, loss = 6.10352e-05
I0819 14:27:27.542711 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:27.542734 12180 sgd_solver.cpp:180] [0.0] Iteration 14200, lr = 0.00309236, m = 0.9, lrm = 0.0309236, wd = 0.0005, gs = 1
I0819 14:27:27.934154 12180 solver.cpp:333]     [0.0] Iteration 14300 (255.369 iter/s, 0.39159s/100 iter), 15.3/21.3ep, loss = 6.10352e-05
I0819 14:27:27.934242 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:27.934255 12180 sgd_solver.cpp:180] [0.0] Iteration 14300, lr = 0.00308281, m = 0.9, lrm = 0.0308281, wd = 0.0005, gs = 1
I0819 14:27:28.328300 12180 solver.cpp:333]     [0.0] Iteration 14400 (253.73 iter/s, 0.394119s/100 iter), 15.4/21.3ep, loss = 0.000183225
I0819 14:27:28.328442 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:28.328456 12180 sgd_solver.cpp:180] [0.0] Iteration 14400, lr = 0.00307333, m = 0.9, lrm = 0.0307333, wd = 0.0005, gs = 1
I0819 14:27:28.712457 12180 solver.cpp:501] Iteration 14500, Testing net (#0)
I0819 14:27:28.980823 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:29.051789 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.986123
I0819 14:27:29.051827 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0394769 (* 1 = 0.0394769 loss)
I0819 14:27:29.051851 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.723519s
I0819 14:27:29.056363 12180 solver.cpp:333]     [0.0] Iteration 14500 (138.213 iter/s, 0.723519s/100 iter), 15.5/21.3ep, loss = 0.000244379
I0819 14:27:29.056437 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000244379 (* 1 = 0.000244379 loss)
I0819 14:27:29.056452 12180 sgd_solver.cpp:180] [0.0] Iteration 14500, lr = 0.00306392, m = 0.9, lrm = 0.0306391, wd = 0.0005, gs = 1
I0819 14:27:29.445569 12180 solver.cpp:333]     [0.0] Iteration 14600 (256.951 iter/s, 0.389179s/100 iter), 15.6/21.3ep, loss = 0
I0819 14:27:29.445713 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:29.445734 12180 sgd_solver.cpp:180] [0.0] Iteration 14600, lr = 0.00305457, m = 0.9, lrm = 0.0305457, wd = 0.0005, gs = 1
I0819 14:27:29.842738 12180 solver.cpp:333]     [0.0] Iteration 14700 (251.802 iter/s, 0.397138s/100 iter), 15.7/21.3ep, loss = 0.00141144
I0819 14:27:29.842880 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00141144 (* 1 = 0.00141144 loss)
I0819 14:27:29.842911 12180 sgd_solver.cpp:180] [0.0] Iteration 14700, lr = 0.00304529, m = 0.9, lrm = 0.0304529, wd = 0.0005, gs = 1
I0819 14:27:30.244244 12180 solver.cpp:333]     [0.0] Iteration 14800 (249.096 iter/s, 0.401452s/100 iter), 15.8/21.3ep, loss = 0.019577
I0819 14:27:30.244328 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.019577 (* 1 = 0.019577 loss)
I0819 14:27:30.244345 12180 sgd_solver.cpp:180] [0.0] Iteration 14800, lr = 0.00303608, m = 0.9, lrm = 0.0303607, wd = 0.0005, gs = 1
I0819 14:27:30.639144 12180 solver.cpp:333]     [0.0] Iteration 14900 (253.251 iter/s, 0.394865s/100 iter), 15.9/21.3ep, loss = 0.00944519
I0819 14:27:30.639266 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00944519 (* 1 = 0.00944519 loss)
I0819 14:27:30.639307 12180 sgd_solver.cpp:180] [0.0] Iteration 14900, lr = 0.00302693, m = 0.9, lrm = 0.0302692, wd = 0.0005, gs = 1
I0819 14:27:30.914341 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:31.028600 12180 solver.cpp:501] Iteration 15000, Testing net (#0)
I0819 14:27:31.283694 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:31.368924 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.986123
I0819 14:27:31.368963 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.040018 (* 1 = 0.040018 loss)
I0819 14:27:31.369006 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.729852s
I0819 14:27:31.373312 12180 solver.cpp:333]     [0.0] Iteration 15000 (137.014 iter/s, 0.729852s/100 iter), 16/21.3ep, loss = 0
I0819 14:27:31.373409 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:31.373426 12180 sgd_solver.cpp:180] [0.0] Iteration 15000, lr = 0.00301784, m = 0.9, lrm = 0.0301784, wd = 0.0005, gs = 1
I0819 14:27:31.769130 12180 solver.cpp:333]     [0.0] Iteration 15100 (252.664 iter/s, 0.395782s/100 iter), 16.1/21.3ep, loss = 6.10352e-05
I0819 14:27:31.769268 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:31.769290 12180 sgd_solver.cpp:180] [0.0] Iteration 15100, lr = 0.00300882, m = 0.9, lrm = 0.0300882, wd = 0.0005, gs = 1
I0819 14:27:32.165801 12180 solver.cpp:333]     [0.0] Iteration 15200 (252.128 iter/s, 0.396624s/100 iter), 16.2/21.3ep, loss = 0.00485229
I0819 14:27:32.165938 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00485229 (* 1 = 0.00485229 loss)
I0819 14:27:32.165967 12180 sgd_solver.cpp:180] [0.0] Iteration 15200, lr = 0.00299986, m = 0.9, lrm = 0.0299986, wd = 0.0005, gs = 1
I0819 14:27:32.557323 12180 solver.cpp:333]     [0.0] Iteration 15300 (255.426 iter/s, 0.391503s/100 iter), 16.3/21.3ep, loss = 0.000672817
I0819 14:27:32.557482 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000672817 (* 1 = 0.000672817 loss)
I0819 14:27:32.557500 12180 sgd_solver.cpp:180] [0.0] Iteration 15300, lr = 0.00299096, m = 0.9, lrm = 0.0299096, wd = 0.0005, gs = 1
I0819 14:27:32.945071 12180 solver.cpp:333]     [0.0] Iteration 15400 (257.92 iter/s, 0.387717s/100 iter), 16.4/21.3ep, loss = 0
I0819 14:27:32.945174 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:32.945188 12180 sgd_solver.cpp:180] [0.0] Iteration 15400, lr = 0.00298213, m = 0.9, lrm = 0.0298213, wd = 0.0005, gs = 1
I0819 14:27:33.330049 12180 solver.cpp:501] Iteration 15500, Testing net (#0)
I0819 14:27:33.572710 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:33.672628 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.989199
I0819 14:27:33.672662 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0357635 (* 1 = 0.0357635 loss)
I0819 14:27:33.672703 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.727585s
I0819 14:27:33.676717 12180 solver.cpp:333]     [0.0] Iteration 15500 (137.441 iter/s, 0.727585s/100 iter), 16.5/21.3ep, loss = 0.0188446
I0819 14:27:33.676795 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0188446 (* 1 = 0.0188446 loss)
I0819 14:27:33.676815 12180 sgd_solver.cpp:180] [0.0] Iteration 15500, lr = 0.00297335, m = 0.9, lrm = 0.0297335, wd = 0.0005, gs = 1
I0819 14:27:34.072019 12180 solver.cpp:333]     [0.0] Iteration 15600 (252.996 iter/s, 0.395264s/100 iter), 16.6/21.3ep, loss = 0.000979424
I0819 14:27:34.072121 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000979424 (* 1 = 0.000979424 loss)
I0819 14:27:34.072139 12180 sgd_solver.cpp:180] [0.0] Iteration 15600, lr = 0.00296464, m = 0.9, lrm = 0.0296463, wd = 0.0005, gs = 1
I0819 14:27:34.463007 12180 solver.cpp:333]     [0.0] Iteration 15700 (255.794 iter/s, 0.39094s/100 iter), 16.7/21.3ep, loss = 0
I0819 14:27:34.463106 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:34.463129 12180 sgd_solver.cpp:180] [0.0] Iteration 15700, lr = 0.00295598, m = 0.9, lrm = 0.0295598, wd = 0.0005, gs = 1
I0819 14:27:34.856030 12180 solver.cpp:333]     [0.0] Iteration 15800 (254.451 iter/s, 0.393003s/100 iter), 16.9/21.3ep, loss = 0
I0819 14:27:34.856140 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:34.856155 12180 sgd_solver.cpp:180] [0.0] Iteration 15800, lr = 0.00294738, m = 0.9, lrm = 0.0294738, wd = 0.0005, gs = 1
I0819 14:27:35.244522 12180 solver.cpp:333]     [0.0] Iteration 15900 (257.423 iter/s, 0.388466s/100 iter), 17/21.3ep, loss = 0.00122643
I0819 14:27:35.244832 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00122643 (* 1 = 0.00122643 loss)
I0819 14:27:35.244850 12180 sgd_solver.cpp:180] [0.0] Iteration 15900, lr = 0.00293884, m = 0.9, lrm = 0.0293884, wd = 0.0005, gs = 1
I0819 14:27:35.280970 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:35.635706 12180 solver.cpp:501] Iteration 16000, Testing net (#0)
I0819 14:27:35.868583 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:35.980756 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.988433
I0819 14:27:35.980792 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0517566 (* 1 = 0.0517566 loss)
I0819 14:27:35.980839 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.736285s
I0819 14:27:35.985051 12180 solver.cpp:333]     [0.0] Iteration 16000 (135.817 iter/s, 0.736285s/100 iter), 17.1/21.3ep, loss = 0
I0819 14:27:35.985157 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:35.985183 12180 sgd_solver.cpp:180] [0.0] Iteration 16000, lr = 0.00293036, m = 0.9, lrm = 0.0293036, wd = 0.0005, gs = 1
I0819 14:27:36.380599 12180 solver.cpp:333]     [0.0] Iteration 16100 (252.837 iter/s, 0.395512s/100 iter), 17.2/21.3ep, loss = 0
I0819 14:27:36.380712 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:36.380733 12180 sgd_solver.cpp:180] [0.0] Iteration 16100, lr = 0.00292194, m = 0.9, lrm = 0.0292194, wd = 0.0005, gs = 1
I0819 14:27:36.777509 12180 solver.cpp:333]     [0.0] Iteration 16200 (251.965 iter/s, 0.39688s/100 iter), 17.3/21.3ep, loss = 0.00430679
I0819 14:27:36.777609 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00430679 (* 1 = 0.00430679 loss)
I0819 14:27:36.777647 12180 sgd_solver.cpp:180] [0.0] Iteration 16200, lr = 0.00291357, m = 0.9, lrm = 0.0291357, wd = 0.0005, gs = 1
I0819 14:27:37.174001 12180 solver.cpp:333]     [0.0] Iteration 16300 (252.233 iter/s, 0.396459s/100 iter), 17.4/21.3ep, loss = 0
I0819 14:27:37.174110 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:37.174132 12180 sgd_solver.cpp:180] [0.0] Iteration 16300, lr = 0.00290526, m = 0.9, lrm = 0.0290526, wd = 0.0005, gs = 1
I0819 14:27:37.562978 12180 solver.cpp:333]     [0.0] Iteration 16400 (257.115 iter/s, 0.38893s/100 iter), 17.5/21.3ep, loss = 0.00067234
I0819 14:27:37.563056 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00067234 (* 1 = 0.00067234 loss)
I0819 14:27:37.563078 12180 sgd_solver.cpp:180] [0.0] Iteration 16400, lr = 0.002897, m = 0.9, lrm = 0.02897, wd = 0.0005, gs = 1
I0819 14:27:37.955337 12180 solver.cpp:501] Iteration 16500, Testing net (#0)
I0819 14:27:38.174645 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:38.311553 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.988433
I0819 14:27:38.311589 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0518357 (* 1 = 0.0518357 loss)
I0819 14:27:38.311628 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.748634s
I0819 14:27:38.315997 12180 solver.cpp:333]     [0.0] Iteration 16500 (133.577 iter/s, 0.748634s/100 iter), 17.6/21.3ep, loss = 0.232056
I0819 14:27:38.316052 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.232056 (* 1 = 0.232056 loss)
I0819 14:27:38.316118 12180 sgd_solver.cpp:180] [0.0] Iteration 16500, lr = 0.0028888, m = 0.9, lrm = 0.0288879, wd = 0.0005, gs = 1
I0819 14:27:38.713359 12180 solver.cpp:333]     [0.0] Iteration 16600 (251.682 iter/s, 0.397326s/100 iter), 17.7/21.3ep, loss = 0.0216827
I0819 14:27:38.713481 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0216827 (* 1 = 0.0216827 loss)
I0819 14:27:38.713501 12180 sgd_solver.cpp:180] [0.0] Iteration 16600, lr = 0.00288065, m = 0.9, lrm = 0.0288065, wd = 0.0005, gs = 1
I0819 14:27:39.104097 12180 solver.cpp:333]     [0.0] Iteration 16700 (255.96 iter/s, 0.390686s/100 iter), 17.8/21.3ep, loss = 0
I0819 14:27:39.104202 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:39.104259 12180 sgd_solver.cpp:180] [0.0] Iteration 16700, lr = 0.00287255, m = 0.9, lrm = 0.0287255, wd = 0.0005, gs = 1
I0819 14:27:39.495709 12180 solver.cpp:333]     [0.0] Iteration 16800 (255.37 iter/s, 0.391589s/100 iter), 17.9/21.3ep, loss = 0
I0819 14:27:39.495846 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:39.495867 12180 sgd_solver.cpp:180] [0.0] Iteration 16800, lr = 0.00286451, m = 0.9, lrm = 0.0286451, wd = 0.0005, gs = 1
I0819 14:27:39.675549 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:39.895102 12180 solver.cpp:333]     [0.0] Iteration 16900 (250.401 iter/s, 0.399359s/100 iter), 18/21.3ep, loss = 0.00159168
I0819 14:27:39.895221 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00159168 (* 1 = 0.00159168 loss)
I0819 14:27:39.895236 12180 sgd_solver.cpp:180] [0.0] Iteration 16900, lr = 0.00285652, m = 0.9, lrm = 0.0285652, wd = 0.0005, gs = 1
I0819 14:27:40.282938 12180 solver.cpp:501] Iteration 17000, Testing net (#0)
I0819 14:27:40.491072 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:40.624936 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.990747
I0819 14:27:40.624969 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0361191 (* 1 = 0.0361191 loss)
I0819 14:27:40.625003 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.729877s
I0819 14:27:40.629124 12180 solver.cpp:333]     [0.0] Iteration 17000 (137.009 iter/s, 0.729877s/100 iter), 18.1/21.3ep, loss = 6.10352e-05
I0819 14:27:40.629197 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:40.629211 12180 sgd_solver.cpp:180] [0.0] Iteration 17000, lr = 0.00284858, m = 0.9, lrm = 0.0284858, wd = 0.0005, gs = 1
I0819 14:27:41.020599 12180 solver.cpp:333]     [0.0] Iteration 17100 (255.456 iter/s, 0.391457s/100 iter), 18.2/21.3ep, loss = 6.10352e-05
I0819 14:27:41.020704 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:41.020720 12180 sgd_solver.cpp:180] [0.0] Iteration 17100, lr = 0.00284069, m = 0.9, lrm = 0.0284069, wd = 0.0005, gs = 1
I0819 14:27:41.407433 12180 solver.cpp:333]     [0.0] Iteration 17200 (258.535 iter/s, 0.386796s/100 iter), 18.3/21.3ep, loss = 0.000244379
I0819 14:27:41.407536 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000244379 (* 1 = 0.000244379 loss)
I0819 14:27:41.407567 12180 sgd_solver.cpp:180] [0.0] Iteration 17200, lr = 0.00283286, m = 0.9, lrm = 0.0283286, wd = 0.0005, gs = 1
I0819 14:27:41.800132 12180 solver.cpp:333]     [0.0] Iteration 17300 (254.67 iter/s, 0.392665s/100 iter), 18.5/21.3ep, loss = 0.0199738
I0819 14:27:41.800249 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0199738 (* 1 = 0.0199738 loss)
I0819 14:27:41.800274 12180 sgd_solver.cpp:180] [0.0] Iteration 17300, lr = 0.00282507, m = 0.9, lrm = 0.0282507, wd = 0.0005, gs = 1
I0819 14:27:42.191341 12180 solver.cpp:333]     [0.0] Iteration 17400 (255.639 iter/s, 0.391177s/100 iter), 18.6/21.3ep, loss = 0.000183225
I0819 14:27:42.191417 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:42.191434 12180 sgd_solver.cpp:180] [0.0] Iteration 17400, lr = 0.00281733, m = 0.9, lrm = 0.0281733, wd = 0.0005, gs = 1
I0819 14:27:42.576354 12180 solver.cpp:501] Iteration 17500, Testing net (#0)
I0819 14:27:42.769623 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:42.918392 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.987666
I0819 14:27:42.918422 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0419272 (* 1 = 0.0419272 loss)
I0819 14:27:42.918457 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.727082s
I0819 14:27:42.922796 12180 solver.cpp:333]     [0.0] Iteration 17500 (137.536 iter/s, 0.727082s/100 iter), 18.7/21.3ep, loss = 6.10352e-05
I0819 14:27:42.922881 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:42.922974 12180 sgd_solver.cpp:180] [0.0] Iteration 17500, lr = 0.00280965, m = 0.9, lrm = 0.0280965, wd = 0.0005, gs = 1
I0819 14:27:43.326612 12180 solver.cpp:333]     [0.0] Iteration 17600 (247.677 iter/s, 0.403752s/100 iter), 18.8/21.3ep, loss = 0.117798
I0819 14:27:43.326740 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.117798 (* 1 = 0.117798 loss)
I0819 14:27:43.326763 12180 sgd_solver.cpp:180] [0.0] Iteration 17600, lr = 0.00280201, m = 0.9, lrm = 0.0280201, wd = 0.0005, gs = 1
I0819 14:27:43.715689 12180 solver.cpp:333]     [0.0] Iteration 17700 (257.025 iter/s, 0.389068s/100 iter), 18.9/21.3ep, loss = 0.000733852
I0819 14:27:43.715785 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000733852 (* 1 = 0.000733852 loss)
I0819 14:27:43.715813 12180 sgd_solver.cpp:180] [0.0] Iteration 17700, lr = 0.00279442, m = 0.9, lrm = 0.0279442, wd = 0.0005, gs = 1
I0819 14:27:44.042356 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:44.106453 12180 solver.cpp:333]     [0.0] Iteration 17800 (255.93 iter/s, 0.390732s/100 iter), 19/21.3ep, loss = 6.10352e-05
I0819 14:27:44.106566 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:44.106595 12180 sgd_solver.cpp:180] [0.0] Iteration 17800, lr = 0.00278688, m = 0.9, lrm = 0.0278688, wd = 0.0005, gs = 1
I0819 14:27:44.504736 12180 solver.cpp:333]     [0.0] Iteration 17900 (251.104 iter/s, 0.398241s/100 iter), 19.1/21.3ep, loss = 0.000366688
I0819 14:27:44.504832 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000366688 (* 1 = 0.000366688 loss)
I0819 14:27:44.504860 12180 sgd_solver.cpp:180] [0.0] Iteration 17900, lr = 0.00277938, m = 0.9, lrm = 0.0277938, wd = 0.0005, gs = 1
I0819 14:27:44.896610 12180 solver.cpp:501] Iteration 18000, Testing net (#0)
I0819 14:27:45.080484 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:45.239715 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.988433
I0819 14:27:45.239766 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0333037 (* 1 = 0.0333037 loss)
I0819 14:27:45.239790 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.735032s
I0819 14:27:45.244056 12180 solver.cpp:333]     [0.0] Iteration 18000 (136.048 iter/s, 0.735032s/100 iter), 19.2/21.3ep, loss = 0.000366688
I0819 14:27:45.244135 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000366688 (* 1 = 0.000366688 loss)
I0819 14:27:45.244149 12180 sgd_solver.cpp:180] [0.0] Iteration 18000, lr = 0.00277193, m = 0.9, lrm = 0.0277193, wd = 0.0005, gs = 1
I0819 14:27:45.644682 12180 solver.cpp:333]     [0.0] Iteration 18100 (249.639 iter/s, 0.400579s/100 iter), 19.3/21.3ep, loss = 0.0153732
I0819 14:27:45.644776 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0153732 (* 1 = 0.0153732 loss)
I0819 14:27:45.644798 12180 sgd_solver.cpp:180] [0.0] Iteration 18100, lr = 0.00276453, m = 0.9, lrm = 0.0276453, wd = 0.0005, gs = 1
I0819 14:27:46.047291 12180 solver.cpp:333]     [0.0] Iteration 18200 (248.4 iter/s, 0.402576s/100 iter), 19.4/21.3ep, loss = 0.000183225
I0819 14:27:46.047417 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:46.047441 12180 sgd_solver.cpp:180] [0.0] Iteration 18200, lr = 0.00275718, m = 0.9, lrm = 0.0275717, wd = 0.0005, gs = 1
I0819 14:27:46.446501 12180 solver.cpp:333]     [0.0] Iteration 18300 (250.518 iter/s, 0.399173s/100 iter), 19.5/21.3ep, loss = 0.000183225
I0819 14:27:46.446692 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:46.446728 12180 sgd_solver.cpp:180] [0.0] Iteration 18300, lr = 0.00274987, m = 0.9, lrm = 0.0274986, wd = 0.0005, gs = 1
I0819 14:27:46.839052 12180 solver.cpp:333]     [0.0] Iteration 18400 (254.776 iter/s, 0.392501s/100 iter), 19.6/21.3ep, loss = 0.000183225
I0819 14:27:46.839157 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000183225 (* 1 = 0.000183225 loss)
I0819 14:27:46.839222 12180 sgd_solver.cpp:180] [0.0] Iteration 18400, lr = 0.0027426, m = 0.9, lrm = 0.027426, wd = 0.0005, gs = 1
I0819 14:27:47.228477 12180 solver.cpp:501] Iteration 18500, Testing net (#0)
I0819 14:27:47.395493 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:47.571609 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.986904
I0819 14:27:47.571638 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0460434 (* 1 = 0.0460434 loss)
I0819 14:27:47.571682 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.73261s
I0819 14:27:47.575708 12180 solver.cpp:333]     [0.0] Iteration 18500 (136.498 iter/s, 0.73261s/100 iter), 19.7/21.3ep, loss = 0.00116253
I0819 14:27:47.575798 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00116253 (* 1 = 0.00116253 loss)
I0819 14:27:47.575822 12180 sgd_solver.cpp:180] [0.0] Iteration 18500, lr = 0.00273538, m = 0.9, lrm = 0.0273538, wd = 0.0005, gs = 1
I0819 14:27:47.966687 12180 solver.cpp:333]     [0.0] Iteration 18600 (255.824 iter/s, 0.390894s/100 iter), 19.8/21.3ep, loss = 0.0254974
I0819 14:27:47.966786 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0254974 (* 1 = 0.0254974 loss)
I0819 14:27:47.966800 12180 sgd_solver.cpp:180] [0.0] Iteration 18600, lr = 0.0027282, m = 0.9, lrm = 0.027282, wd = 0.0005, gs = 1
I0819 14:27:48.361131 12180 solver.cpp:333]     [0.0] Iteration 18700 (253.518 iter/s, 0.394449s/100 iter), 19.9/21.3ep, loss = 0.00012219
I0819 14:27:48.361234 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00012219 (* 1 = 0.00012219 loss)
I0819 14:27:48.361249 12180 sgd_solver.cpp:180] [0.0] Iteration 18700, lr = 0.00272107, m = 0.9, lrm = 0.0272107, wd = 0.0005, gs = 1
I0819 14:27:48.442080 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:48.759543 12180 solver.cpp:333]     [0.0] Iteration 18800 (251.032 iter/s, 0.398356s/100 iter), 20.1/21.3ep, loss = 0.0012846
I0819 14:27:48.759675 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.0012846 (* 1 = 0.0012846 loss)
I0819 14:27:48.759693 12180 sgd_solver.cpp:180] [0.0] Iteration 18800, lr = 0.00271398, m = 0.9, lrm = 0.0271398, wd = 0.0005, gs = 1
I0819 14:27:49.157918 12180 solver.cpp:333]     [0.0] Iteration 18900 (251.031 iter/s, 0.398356s/100 iter), 20.2/21.3ep, loss = 6.10352e-05
I0819 14:27:49.158027 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:49.158057 12180 sgd_solver.cpp:180] [0.0] Iteration 18900, lr = 0.00270694, m = 0.9, lrm = 0.0270693, wd = 0.0005, gs = 1
I0819 14:27:49.545063 12180 solver.cpp:501] Iteration 19000, Testing net (#0)
I0819 14:27:49.697620 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:49.698765 12228 blocking_queue.cpp:40] Data layer prefetch queue empty
I0819 14:27:49.888763 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.986118
I0819 14:27:49.888818 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.037638 (* 1 = 0.037638 loss)
I0819 14:27:49.888841 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.730892s
I0819 14:27:49.893033 12180 solver.cpp:333]     [0.0] Iteration 19000 (136.819 iter/s, 0.730892s/100 iter), 20.3/21.3ep, loss = 0
I0819 14:27:49.893095 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:49.893110 12180 sgd_solver.cpp:180] [0.0] Iteration 19000, lr = 0.00269993, m = 0.9, lrm = 0.0269993, wd = 0.0005, gs = 1
I0819 14:27:50.287739 12180 solver.cpp:333]     [0.0] Iteration 19100 (253.381 iter/s, 0.394663s/100 iter), 20.4/21.3ep, loss = 0
I0819 14:27:50.287842 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:50.287858 12180 sgd_solver.cpp:180] [0.0] Iteration 19100, lr = 0.00269297, m = 0.9, lrm = 0.0269297, wd = 0.0005, gs = 1
I0819 14:27:50.689630 12180 solver.cpp:333]     [0.0] Iteration 19200 (248.855 iter/s, 0.40184s/100 iter), 20.5/21.3ep, loss = 0
I0819 14:27:50.689730 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:50.689788 12180 sgd_solver.cpp:180] [0.0] Iteration 19200, lr = 0.00268605, m = 0.9, lrm = 0.0268605, wd = 0.0005, gs = 1
I0819 14:27:51.086681 12180 solver.cpp:333]     [0.0] Iteration 19300 (251.881 iter/s, 0.397013s/100 iter), 20.6/21.3ep, loss = 0.000427723
I0819 14:27:51.086788 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000427723 (* 1 = 0.000427723 loss)
I0819 14:27:51.086802 12180 sgd_solver.cpp:180] [0.0] Iteration 19300, lr = 0.00267917, m = 0.9, lrm = 0.0267917, wd = 0.0005, gs = 1
I0819 14:27:51.485642 12180 solver.cpp:333]     [0.0] Iteration 19400 (250.665 iter/s, 0.398938s/100 iter), 20.7/21.3ep, loss = 0
I0819 14:27:51.485769 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:51.485783 12180 sgd_solver.cpp:180] [0.0] Iteration 19400, lr = 0.00267233, m = 0.9, lrm = 0.0267233, wd = 0.0005, gs = 1
I0819 14:27:51.878471 12180 solver.cpp:501] Iteration 19500, Testing net (#0)
I0819 14:27:52.025992 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:52.228219 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.986118
I0819 14:27:52.228269 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0535487 (* 1 = 0.0535487 loss)
I0819 14:27:52.228309 12180 solver.cpp:271] [MultiGPU] Tests completed in 0.742619s
I0819 14:27:52.232753 12180 solver.cpp:333]     [0.0] Iteration 19500 (134.659 iter/s, 0.742619s/100 iter), 20.8/21.3ep, loss = 0.00878143
I0819 14:27:52.232823 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.00878143 (* 1 = 0.00878143 loss)
I0819 14:27:52.232838 12180 sgd_solver.cpp:180] [0.0] Iteration 19500, lr = 0.00266554, m = 0.9, lrm = 0.0266554, wd = 0.0005, gs = 1
I0819 14:27:52.641575 12180 solver.cpp:333]     [0.0] Iteration 19600 (244.628 iter/s, 0.408784s/100 iter), 20.9/21.3ep, loss = 6.10352e-05
I0819 14:27:52.641687 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:52.641707 12180 sgd_solver.cpp:180] [0.0] Iteration 19600, lr = 0.00265878, m = 0.9, lrm = 0.0265878, wd = 0.0005, gs = 1
I0819 14:27:52.885428 12318 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:53.055436 12180 solver.cpp:333]     [0.0] Iteration 19700 (241.658 iter/s, 0.413809s/100 iter), 21/21.3ep, loss = 6.10352e-05
I0819 14:27:53.055524 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 6.10352e-05 (* 1 = 6.10352e-05 loss)
I0819 14:27:53.055538 12180 sgd_solver.cpp:180] [0.0] Iteration 19700, lr = 0.00265206, m = 0.9, lrm = 0.0265206, wd = 0.0005, gs = 1
I0819 14:27:53.459993 12180 solver.cpp:333]     [0.0] Iteration 19800 (247.192 iter/s, 0.404544s/100 iter), 21.1/21.3ep, loss = 0
I0819 14:27:53.460112 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:53.460125 12180 sgd_solver.cpp:180] [0.0] Iteration 19800, lr = 0.00264539, m = 0.9, lrm = 0.0264539, wd = 0.0005, gs = 1
I0819 14:27:53.862141 12180 solver.cpp:333]     [0.0] Iteration 19900 (248.689 iter/s, 0.402108s/100 iter), 21.2/21.3ep, loss = 0.000550747
I0819 14:27:53.862237 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0.000550747 (* 1 = 0.000550747 loss)
I0819 14:27:53.862257 12180 sgd_solver.cpp:180] [0.0] Iteration 19900, lr = 0.00263875, m = 0.9, lrm = 0.0263875, wd = 0.0005, gs = 1
I0819 14:27:54.252153 12180 solver.cpp:333]     [0.0] Iteration 20000 (253.862 iter/s, 0.389976s/99 iter), 21.3/21.3ep, loss = 0
I0819 14:27:54.252243 12180 solver.cpp:361]     [0.0]     Train net output #0: loss = 0 (* 1 = 0 loss)
I0819 14:27:54.256789 12180 solver.cpp:769] Snapshotting to binary proto file _iter_20000.caffemodel
I0819 14:27:54.265588 12180 sgd_solver.cpp:448] Snapshotting solver state to binary proto file _iter_20000.solverstate
I0819 14:27:54.267252 12180 solver.cpp:466] Iteration 20000, loss = 0.0498047
I0819 14:27:54.267278 12180 solver.cpp:501] Iteration 20000, Testing net (#0)
I0819 14:27:54.396373 12177 data_reader.cpp:321] Restarting data pre-fetching
I0819 14:27:54.607734 12180 solver.cpp:588]     (0.0)    Test net output #0: accuracy = 0.992285
I0819 14:27:54.607769 12180 solver.cpp:588]     (0.0)    Test net output #1: loss = 0.0369105 (* 1 = 0.0369105 loss)
I0819 14:27:54.608280 12127 parallel.cpp:118] Root Solver performance on device 0: 218.8 * 8 = 1750 img/sec (20000 itr in 91.39 sec)
I0819 14:27:54.608325 12127 parallel.cpp:128]      Solver performance on device 1: 218.8 * 8 = 1750 img/sec (20000 itr in 91.4 sec)
I0819 14:27:54.608333 12127 parallel.cpp:128]      Solver performance on device 2: 218.8 * 8 = 1750 img/sec (20000 itr in 91.4 sec)
I0819 14:27:54.608340 12127 parallel.cpp:128]      Solver performance on device 3: 218.8 * 8 = 1750 img/sec (20000 itr in 91.4 sec)
I0819 14:27:54.608347 12127 parallel.cpp:128]      Solver performance on device 4: 218.8 * 8 = 1750 img/sec (20000 itr in 91.4 sec)
I0819 14:27:54.608355 12127 parallel.cpp:128]      Solver performance on device 5: 218.8 * 8 = 1750 img/sec (20000 itr in 91.4 sec)
I0819 14:27:54.608378 12127 parallel.cpp:128]      Solver performance on device 6: 218.8 * 8 = 1750 img/sec (20000 itr in 91.4 sec)
I0819 14:27:54.608386 12127 parallel.cpp:128]      Solver performance on device 7: 218.8 * 8 = 1750 img/sec (20000 itr in 91.4 sec)
I0819 14:27:54.608388 12127 parallel.cpp:143] Overall multi-GPU performance: 14003.7 img/sec
I0819 14:27:55.038269 12127 caffe.cpp:269] Optimization Done in 1m 51s
Entering ReduceAndUpdate thread ........................
Entering ReduceAndUpdate thread ........................
Entering ReduceAndUpdate thread ........................
Entering ReduceAndUpdate thread ........................
Entering ReduceAndUpdate thread ........................
Entering ReduceAndUpdate thread ........................
Entering ReduceAndUpdate thread ........................
Entering ReduceAndUpdate thread ........................