Guest User

Untitled

a guest
Jun 21st, 2020
87
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 4.36 KB | None | 0 0
  1. #!/bin/bash
  2.  
  3. # Convert an image file by Abbyy Cloud OCR SDK using cURL
  4. # Usage: cloud_recognize <input> <output> [-l language] [-f txt|rtf|docx|xlsx|pptx|pdfSearchable|pdfTextAndImages|xml]
  5.  
  6. # before calling this script, set ABBYY_APPID and ABBYY_PWD environment variables
  7. # do not forget to set http_proxy and https_proxy variables if necessary
  8.  
  9. ServerUrl='http://cloud.ocrsdk.com'
  10. # To create an application and obtain a password,
  11. # register at http://cloud.ocrsdk.com/Account/Register
  12. # More info on getting your application id and password at
  13. # http://ocrsdk.com/documentation/faq/#faq3
  14. ApplicationId="..."
  15. Password="..."
  16.  
  17. echo "ABBYY Cloud OCR SDK demo recognition script"
  18. echo
  19.  
  20. if [ -n "$ABBYY_APPID" ]; then
  21.     ApplicationId="$ABBYY_APPID";
  22. elif [ -z "$ApplicationId" ]; then
  23.     echo "No application id specified. Please execute"
  24.     echo "\"export ABBYY_APPID=<your app id>\""
  25.     exit 1
  26. fi;
  27.  
  28. if [ -n "$ABBYY_PWD" ]; then
  29.     Password="$ABBYY_PWD";
  30. elif [ -z $Password ]; then
  31.     echo "No application password specified. Please execute"
  32.     echo "\"export ABBYY_PWD=<your app password>\""
  33.         echo "The password should be sent to you after application was created."
  34.     exit 1
  35. fi;
  36.  
  37. function printUsage {
  38.     echo "Usage:"
  39.     echo "$0 <input> <output> [-f output_format] [-l language]"
  40.     echo "output_format: txt|rtf|docx|xlsx|pptx|pdfSearchable|pdfTextAndImages|xml"
  41.     echo "Some language examples: Russian Russian,English English,ChinesePRC etc. For full list see ocrsdk documentation"
  42. }
  43.  
  44. params=`getopt f:l: "$@"`
  45. if [ $? != 0 ] ; then
  46.     echo "Invalid arguments."
  47.     printUsage >&2
  48.     exit 1;
  49. fi
  50.  
  51. OutFormat="txt"
  52. Language="english"
  53.  
  54. eval set -- "$params"
  55. while true; do
  56.     case "$1" in
  57.         -f) OutFormat="$2"; shift 2;;
  58.         -l) Language="$2"; shift 2;;
  59.         --) shift;;
  60.         *) if [ -z $1 ]; then
  61.             break;
  62.         elif [ -z $SourceFile ]; then
  63.             SourceFile=$1;
  64.         elif [ -z $TargetFile ]; then
  65.             TargetFile=$1;
  66.         else
  67.             echo "Invalid argument: $1" >&2;
  68.             printUsage
  69.             exit 1;
  70.         fi
  71.         shift;;
  72.     esac
  73. done
  74.  
  75. if [ -z $TargetFile ]; then
  76.     echo "Invalid arguments." >&2;
  77.     printUsage >&2;
  78.     exit 1;
  79. fi
  80.  
  81. if [ ! -e "$SourceFile" ]; then
  82.     echo "Source file $SourceFile doesn't exist";
  83.     exit 1;
  84. fi
  85.  
  86. sourceFileName=`basename "$SourceFile"`
  87. echo "Recognizing $sourceFileName with $Language language. Result will be saved in $OutFormat format.."
  88.  
  89. echo "Uploading.."
  90. response=`curl -s -S --user "$ApplicationId:$Password" --form "upload=@$SourceFile" "$ServerUrl/processImage?exportFormat=$OutFormat&language=$Language"`
  91.  
  92.  
  93. #Select guid from response string
  94. taskId=`echo $response | grep -o -E 'task id="[^"]*"' | cut -d '"' -f 2`
  95. if [ -z $taskId ]; then
  96.     echo "Error uploading file" >&2;
  97.     exit 1;
  98. fi
  99.  
  100. taskStatus=`echo $response | grep -o -E 'status="[^"]+"' | cut -d '"' -f 2`
  101. if [ $taskStatus == "NotEnoughCredits" ]; then
  102.         echo "Not enough credits to process the document. Please add more pages to your application's account."
  103.         exit 1
  104. fi
  105.  
  106. echo "Uploaded, task id is '$taskId'"
  107.  
  108. # Wait until image is processed
  109. # Note: it's recommended that your application waits
  110. # at least 2 seconds before making the first getTaskStatus request
  111. # and also between such requests for the same task.
  112. # Making requests more often will not improve your application performance.
  113. # Note: if your application queues several files and waits for them
  114. # it's recommended that you use listFinishedTasks instead (which is described
  115. # at http://ocrsdk.com/documentation/apireference/listFinishedTasks/).
  116. echo -n "Waiting.."
  117. while [ $taskStatus == "Queued" ] || [ $taskStatus == "InProgress" ]
  118. do
  119.     sleep 5
  120.     echo -n "."
  121.     response=`curl -s -S --user "$ApplicationId:$Password" $ServerUrl/getTaskStatus?taskId=$taskId`
  122.     taskStatus=`echo $response | grep -o -E 'status="[^"]+"' | cut -d '"' -f 2`
  123. done
  124.  
  125. if [ $taskStatus != "Completed" ]; then
  126.     echo "Unexpected task status $taskStatus"
  127.     exit 1
  128. fi
  129.  
  130. echo
  131.  
  132. # Get result url by treating the returned XML as text
  133. resultUrl=`echo $response | grep -o -E 'resultUrl="[^"]+"' | cut -d '"' -f 2`
  134. # Now replace all occurences of "&amp;" with "&"
  135. resultUrl="${resultUrl//&amp;/&}"
  136.  
  137. # Get result
  138. response=`curl -s -S -o $TargetFile $resultUrl`
  139. echo "Done."
Add Comment
Please, Sign In to add comment