Advertisement
Guest User

Untitled

a guest
Oct 1st, 2019
143
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.05 KB | None | 0 0
  1. #!/bin/bash
  2. # BASH SCRIPT FOR ADDING CHARACTERS
  3. ################################################
  4. # directory with training scripts
  5. tesstrain_dir=/usr/local/bin/tesseract-training/
  6.  
  7. # directory with the old 'best' training set
  8. tessdata_dir=~/Desktop/tesseract/tessdata
  9.  
  10. # fonts directory for this system
  11. fonts_dir=~/../../Library/Fonts
  12.  
  13. # fonts to use for training - not a huge set but we hope enough to
  14. # add the extra chars
  15. #fonts_for_training="Magic R Regular"
  16.  
  17. # fonts for computing evals of best fit model
  18. #fonts_for_eval="Arial"
  19.  
  20. # IMPORTANT - ADD THE NEW CHARS TO langdata/eng/eng.training_text with
  21. # about 15 instances per char
  22. langdata_dir=~/Desktop/langdata_lstm
  23.  
  24. # output directories for this run
  25. train_output_dir=~/Desktop/tesstutorial/trainplusminus
  26. eval_output_dir=~/Desktop/tesstutorial/evalplusminus
  27.  
  28. # maxpages one wants to create on training
  29. maxpages=1000
  30.  
  31. # maximum iterations for finetuning
  32. maxiterations=3600
  33.  
  34. # the output trained data file to drop into tesseract
  35. final_trained_data_file=$train_output_dir/eng.traineddata.NEW
  36.  
  37. # fatal bug workaround for pango
  38. export PANGOCAIRO_BACKEND=fc
  39. ################################################
  40.  
  41. ################################################################
  42. # variables to set tasks performed
  43. MakeTraining=no
  44. MakeEval=yes
  45. MakeLSTM=yes
  46. RunTraining=yes
  47. BuildFinalTrainedFile=yes
  48. ################################################################
  49.  
  50. ################################################
  51. if [ $MakeTraining = "yes" ]; then
  52. echo "###### MAKING TRAINING DATA ######"
  53. # GENERATE TRAINING DATA
  54. rm $train_output_dir/*
  55. ~/../../usr/local/bin/tesstrain.sh \
  56. --fonts_dir $fonts_dir \
  57. --lang eng \
  58. --linedata_only \
  59. --langdata_dir $langdata_dir \
  60. --tessdata_dir $tessdata_dir \
  61. --fontlist "Arial" \
  62. --noextract_font_properties\
  63. --exposures "0" \
  64. --maxpages $maxpages \
  65. --save_box_tiff \
  66. --output_dir $train_output_dir
  67. fi
  68. ################################################
  69.  
  70. ################################################
  71. if [ $MakeEval = "yes" ]; then
  72. echo "###### MAKING EVAL DATA ######"
  73. # GENERATE TRAINING DATA
  74. rm $eval_output_dir/*
  75. ~/../../usr/local/bin/tesstrain.sh \
  76. --fonts_dir $fonts_dir \
  77. --lang eng \
  78. --linedata_only \
  79. --langdata_dir $langdata_dir \
  80. --tessdata_dir $tessdata_dir \
  81. --fontlist "Times New Roman" \
  82. --noextract_font_properties \
  83. --exposures "0" \
  84. --maxpages $maxpages \
  85. --save_box_tiff \
  86. --output_dir $eval_output_dir
  87. fi
  88. ################################################
  89.  
  90. ################################################
  91. # EXTRACT THE CURRENT MODEL OF THE BEST TRAINING DATA SET
  92. if [ $MakeLSTM = "yes" ]; then
  93. echo "#### combine_testdata to make lstm from previous trained set ####"
  94. ~/../../usr/local/bin/combine_tessdata \
  95. -e $tessdata_dir/eng.traineddata $train_output_dir/eng.lstm
  96. fi
  97. ################################################
  98.  
  99. ################################################
  100. # FINETUNE THE CURRENT MODEL VIA THE NEW TRAINING DATA
  101. if [ $RunTraining = "yes" ]; then
  102. echo "#### training from previous optimum #####"
  103. ~/../../usr/local/bin/lstmtraining \
  104. --continue_from $train_output_dir/eng.lstm \
  105. --model_output $train_output_dir/plusminus \
  106. --traineddata $train_output_dir/eng/eng.traineddata \
  107. --old_traineddata $tessdata_dir/eng.traineddata \
  108. --train_listfile $train_output_dir/eng.training_files.txt \
  109. --max_iterations $maxiterations
  110. fi
  111. ################################################
  112.  
  113. ################################################
  114. # COMBINE THE NEW BEST TRAINING DATA
  115. if [ $BuildFinalTrainedFile = "yes" ] ; then
  116. echo "#### Building final trained file $final_trained_data_file ####"
  117. ~/../../usr/local/bin/lstmtraining --stop_training\
  118. --continue_from $train_output_dir/plusminus_checkpoint\
  119. --traineddata $train_output_dir/eng/eng.traineddata\
  120. --old_traineddata $tessdata_dir/eng.traineddata\
  121. --U $train_output_dir/eng/eng.unicharset\
  122. --model_output $train_output_dir/eng.traineddata
  123. fi
  124. ################################################
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement