Advertisement
anoncheg

Google translate from command line and some more features

Jul 28th, 2012
1,895
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 29.99 KB | None | 0 0
  1. #!/usr/bin/env bash
  2. # AWESOME GOOGLE TRANSLATE. This tool for access translate.google.com from terminal and additional English features.
  3.  
  4. #    Copyright (C) 2012 Vitalij Chepelev.
  5.  
  6. #    This program is free software: you can redistribute it and/or modify
  7. #    it under the terms of the GNU General Public License as published by
  8. #    the Free Software Foundation, either version 3 of the License, or
  9. #    (at your option) any later version.
  10.  
  11. #    This program is distributed in the hope that it will be useful,
  12. #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. #    GNU General Public License for more details.
  15.  
  16. #    You should have received a copy of the GNU General Public License
  17. #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18.  
  19. # You can contact me there:
  20. # http://www.unix.com/shell-programming-scripting/196823-completed-command-line-google-translation-tool.html                       654321 - profile name
  21.  
  22. # main mirror https://github.com/Anoncheg1/Command-line-translator
  23. # mirror http://pastebin.com/kPTYjY6W
  24. # requirements:
  25. #   UTF-8 support for required languages
  26. #   curl >= 7.21.0,
  27. #   Mozilla js shell (spidermonkey) >= 1.8.0,
  28. #   mpg123
  29. #   html2text
  30. #   forvo.com account
  31. #features:
  32. #- translated text, fixed text with highlight, language detection, dictionary, translit
  33. # for english:
  34. #- phrases, ideom, word forms, transcription, audio pronunciation
  35. #- cache for words
  36. #- saving words to file for learning
  37. #
  38. #:) translate.google.com, thefreedictionary.com, lingvo-online.ru, www.forvo.com
  39.  
  40. help=$(basename "$0")' [-s[2,3]] [-l] [-h] <text>
  41. if text is LATIN_LANG, then target language is FIRST_LANG
  42. otherwise, target language is LATIN_LANG
  43. -s --sound Enable sound for one word
  44. -l, --list List of languages
  45. You can force the language with environment varibles by command:
  46. export TLSOURCE=en TLTARGET=ru
  47. but better configure "FIRST_LANG" and "LATIN_LANG" in script for auto detection of direction by the first character!
  48. You neeed UTF-8 support for required languages.
  49. '
  50.  
  51. # adjust to taste
  52. declare -r FIRST_LANG=ru            #target language for request in LATIN_LANG      NOT in A-z latin alphabet
  53. declare -r LATIN_LANG=en            #target for all not A-z latin requests          A-z latin alphabet will be detected!
  54. declare -r flogin=121212            #forvo.com login and pass REQUIRED!
  55. declare -r fpass=121212
  56. TERMINAL_C="WOB"                #Your terminal - white on black:WOB, black on white:BOW, anything other:O
  57. #httpproxy="127.0.0.1:4444"     #proxy for long strings
  58. #httpsproxy="--socks5 127.0.0.1:9050"   #socks5
  59. #
  60. declare -r words_buffer=4000        #4000 files max. there is removeing all files older than 20 days.
  61. declare -r timeout=6
  62. declare -r TRANSLIT_WORDS_MAX=10
  63. declare -r SOUND_DOWNLOAD_AWS=1     # 1 - always. 0 - on demand.
  64. declare -r useragent="Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.34 (KHTML, like Gecko) QupZilla/1.3.1 Safari/534.34"
  65. #
  66. declare -r PR_DIR="$HOME/.translate"
  67. [ ! -d "$PR_DIR" ] && mkdir "$PR_DIR"
  68. [ ! -d "$PR_DIR"/cache ] && mkdir "$PR_DIR"/cache
  69.  
  70. declare -r TR_ENG_WORDS="$PR_DIR"/translated_words
  71. declare -r FIXED_STRING="$PR_DIR"/fixed_string
  72.  
  73. declare -A ln_arr
  74. ln_arr["af"]="Afrikaans"
  75. ln_arr["sq"]="Albanian"
  76. ln_arr["am"]="Amharic"
  77. ln_arr["ar"]="Arabic"
  78. ln_arr["hy"]="Armenian"
  79. ln_arr["az"]="Azerbaijani"
  80. ln_arr["eu"]="Basque"
  81. ln_arr["be"]="Belarusian"
  82. ln_arr["bn"]="Bengali"
  83. ln_arr["bg"]="Bulgarian"
  84. ln_arr["ca"]="Catalan"
  85. ln_arr["zh-CN"]="Chinese (Simplified)"
  86. ln_arr["zh"]="Chinese"
  87. ln_arr["hr"]="Croatian"
  88. ln_arr["cs"]="Czech"
  89. ln_arr["da"]="Danish"
  90. ln_arr["nl"]="Dutch"
  91. ln_arr["en"]="English"
  92. ln_arr["eo"]="Esperanto"
  93. ln_arr["et"]="Estonian"
  94. ln_arr["fo"]="Faroese"
  95. ln_arr["tl"]="Filipino"
  96. ln_arr["fi"]="Finnish"
  97. ln_arr["fr"]="French"
  98. ln_arr["gl"]="Galician"
  99. ln_arr["ka"]="Georgian"
  100. ln_arr["de"]="German"
  101. ln_arr["el"]="Greek"
  102. ln_arr["gu"]="Gujarati"
  103. ln_arr["ht"]="Haitian Creole"
  104. ln_arr["iw"]="Hebrew"
  105. ln_arr["hi"]="Hindi"
  106. ln_arr["hu"]="Hungarian"
  107. ln_arr["is"]="Icelandic"
  108. ln_arr["id"]="Indonesian"
  109. ln_arr["ga"]="Irish"
  110. ln_arr["it"]="Italian"
  111. ln_arr["ja"]="Japanese"
  112. ln_arr["kn"]="Kannada"
  113. ln_arr["ko"]="Korean"
  114. ln_arr["lo"]="Laothian"
  115. ln_arr["la"]="Latin"
  116. ln_arr["lv"]="Latvian"
  117. ln_arr["lt"]="Lithuanian"
  118. ln_arr["mk"]="Macedonian"
  119. ln_arr["ms"]="Malay"
  120. ln_arr["mt"]="Maltese"
  121. ln_arr["no"]="Norwegian"
  122. ln_arr["fa"]="Persian"
  123. ln_arr["pl"]="Polish"
  124. ln_arr["pt"]="Portuguese"
  125. ln_arr["ro"]="Romanian"
  126. ln_arr["ru"]="Russian"
  127. ln_arr["sr"]="Serbian"
  128. ln_arr["sk"]="Slovak"
  129. ln_arr["sl"]="Slovenian"
  130. ln_arr["es"]="Spanish"
  131. ln_arr["sw"]="Swahili"
  132. ln_arr["sv"]="Swedish"
  133. ln_arr["ta"]="Tamil"
  134. ln_arr["te"]="Telugu"
  135. ln_arr["th"]="Thai"
  136. ln_arr["tr"]="Turkish"
  137. ln_arr["uk"]="Ukrainian"
  138. ln_arr["ur"]="Urdu"
  139. ln_arr["vi"]="Vietnamese"
  140. ln_arr["cy"]="Welsh"
  141. ln_arr["yi"]="Yiddish"
  142.  
  143. trap bashtrap INT
  144. bashtrap()
  145. {
  146.     echo "Exit signal detected. Deleting cache files."
  147.     rm "$cachefile" 2> /dev/null
  148.     exit
  149. }
  150.  
  151. if [[ $TERMINAL_C == "WOB" ]];then
  152.     declare -r C_RED="$(tput bold)$(tput setaf 1)"      #highlight
  153.     declare -r C_YELLOW="$(tput bold)$(tput setaf 3)"   #highlight
  154.     declare -r C_GRAY="$(tput setaf 7)" #language detected
  155.     declare -r C_CYAN_RAW="\033[1;36m"  #forms
  156.     declare -r C_GRAY_RED_RAW="\033[1;35m"  #phrases
  157.     declare -r C_DARK_BLUE_RAW="\033[34m"   #link for dictionary
  158.     declare -r C_BLUE_RAW="\033[1;34m"  #dictionary and vform1
  159.     declare -r C_BRIGHT_RAW="\033[1;37m"    #phrases, examples main part, vform2
  160.     declare -r C_GREEN="\033[1;32m"     #t_result
  161. elif [[ $TERMINAL_C == "BOW" ]];then
  162.     declare -r C_RED="$(tput bold)$(tput setaf 1)"      #highlight
  163.     declare -r C_YELLOW="$(tput setaf 3)"           #highlight
  164.     declare -r C_GRAY="$(tput bold)$(tput setaf 5)" #language detected
  165.     declare -r C_CYAN_RAW="\033[1;36m"      #forms
  166.     declare -r C_GRAY_RED_RAW="\033[1;35m"      #phrases
  167.     declare -r C_DARK_BLUE_RAW="$(tput setaf 7)"    #link for dictionary
  168.     declare -r C_BLUE_RAW="\033[1;34m"      #dictionary and vform1
  169.     declare -r C_BRIGHT_RAW="$(tput bold)"      #phrases, examples main part, vform2
  170.     declare -r C_GREEN="$(tput bold)"       #t_result
  171. else #universal
  172.     declare -r C_RED="$(tput setaf 1)"      #highlight
  173.     declare -r C_YELLOW="$(tput bold)"      #highlight
  174.     declare -r C_GRAY=""            #language detected
  175.     declare -r C_CYAN_RAW=""        #forms
  176.     declare -r C_GRAY_RED_RAW=""        #phrases
  177.     declare -r C_DARK_BLUE_RAW=""       #link for dictionary
  178.     declare -r C_BLUE_RAW=""        #dictionary and vform1
  179.     declare -r C_BRIGHT_RAW="$(tput bold)"  #phrases, examples main part, vform2
  180.     declare -r C_GREEN="$(tput bold)"   #t_result
  181. fi
  182. declare -r C_NORMAL="$(tput sgr0)"
  183. declare -r C_NORMAL_RAW="\033[0m"
  184.  
  185. jsscript="var googlearr = eval(JSON.stringify(myJSONObject));
  186. function translated_result(arr){
  187.     var rsum=\"\"; //translated text
  188.     if(typeof googlearr[0] !== 'undefined' && googlearr[0] !== null){ //summing sentences
  189.         for (var i = 0; i < googlearr[0].length; i++){
  190.             if(typeof googlearr[0][i][0] !== 'undefined' && googlearr[0][i][0] !== null){
  191.                 rsum=rsum+googlearr[0][i][0];
  192.             }
  193.         }
  194.     }
  195.     print(rsum);
  196. }
  197. function translit(arr){
  198.     var rsum=\"\"; //translit
  199.     if(typeof googlearr[0] !== 'undefined' && googlearr[0] !== null){ //summing sentences
  200.         for (var i = 0; i < googlearr[0].length; i++){
  201.             if(typeof googlearr[0][i][3] !== 'undefined' && googlearr[0][i][3] !== null){
  202.                 rsum=rsum+'\n'+googlearr[0][i][3];
  203.             }
  204.         }
  205.     }
  206.     print(rsum);
  207. }
  208. function dictionary(arr){ //dictionary output
  209.     if(typeof arr[1] !== 'undefined' && arr[1] !== null){
  210.         for (var a = 0; a < arr[1].length; a++){
  211.             if(typeof arr[1][a][0] !== 'undefined' && arr[1][a][0] !== null){
  212.                 print(\"$C_BLUE_RAW\"+arr[1][a][0]+\"$C_NORMAL_RAW\");//part of speach
  213.                 for (var b = 0; b < arr[1][a][2].length; b++){//words
  214.                     num=parseFloat(arr[1][a][2][b][3]); //frequency
  215.                     num2=Math.round(num*100000)/10;//round
  216.                     if (num2 >= 10)
  217.                         num2=Math.round(num2/10)*10;
  218.                     print(arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]+' '+num2);//word + variant of speach + frequency
  219.                     //variant1 print(arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]+' '+Math.round(num*1000000)/1000000);//word + variant of speach + frequency
  220.                     //old print(arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]);//word + variant of speach
  221.                 }
  222.             }
  223.         }
  224.     }
  225. }
  226. function language_identification(arr){ //get detected languages
  227.     if(typeof arr[8] !== 'undefined' && arr[8] !== null && typeof arr[8][0] !== 'undefined' && arr[8][0] !== null && typeof arr[8][0][0] !== 'undefined' && arr[8][0][0] !== null){
  228.         print('Detected language: '+arr[8][0][0]); //detected language
  229.     }
  230. }
  231. "
  232.  
  233. sound=0
  234. volume=0.8
  235. if [[ $1 = -h || $1 = --help ]]; then #help
  236.     echo "$help"
  237.     exit
  238. fi
  239. if [[ $1 == -s || $1 == --sound ]];then #sound varibles
  240.     sound=1
  241.     request=($*) #get array
  242.     request[0]="" #remove first parameter
  243.     request=${request[*]} #to string
  244. else
  245.     request="$*"
  246. fi
  247. if [[ $1 = -l || $1 = --list ]]; then #list
  248.     for i in "${!ln_arr[@]}" ;do
  249.         echo -e "$i\t${ln_arr[$i]}"
  250.     done | sort -k2
  251.     exit
  252. fi
  253.  
  254. if [[ ${#request} -eq 0 ]];then #using saved fixed string without parameters
  255.     if [[ -e $FIXED_STRING ]]; then
  256.         request=$(cat $FIXED_STRING 2>/dev/null)
  257.         echo -e "$(tput bold)$(tput setaf 3)$request$(tput sgr0)"
  258.     else
  259.         exit
  260.     fi
  261. fi
  262. #rm $FIXED_STRING 2> /dev/null
  263.  
  264. tchar=${request:0:1} #language detection by the first character
  265. tcharnum=$(printf "%d" "'${tchar}")
  266. if [[ $tcharnum -ge 65 && $tcharnum -le 122 || (( $TLSOURCE == $LATIN_LANG && ! $TLTARGET )) ]]; then #if request is A-Za-z then it is LATIN_LANG... ("en,it" will not be detected here idk how to make it for now)
  267.     # latin to first
  268.     source="$LATIN_LANG" #english or latin alphabet
  269.     target="$FIRST_LANG" #if text is english target language is FIRST_LANG
  270. else    # ANY language to latin
  271.     source="$FIRST_LANG"
  272.     target="$LATIN_LANG"
  273. fi
  274.  
  275. [[ $TLSOURCE ]] && source=$TLSOURCE;
  276. [[ $TLTARGET ]] && target=$TLTARGET; # export TLSOURCE=en TLTARGET=ru; will force the language
  277.  
  278. eng_sound_download(){           # $cachefile,
  279.     [[ $(ls "$PR_DIR"/cache | wc -l) -gt $words_buffer ]] && find "$PR_DIR"/cache -mtime +20 -delete #cache cleaning. remove words older than 20 days.
  280.     rm "$PR_DIR"/tmpcookie 2>/dev/null &
  281.     curl -s -c "$PR_DIR"/tmpcookie --connect-timeout $timeout -m $timeout --user-agent "$useragent" $httpsproxy -x "$httpproxy" -d "login=$flogin&password=$fpass" http://ru.forvo.com/login/ -o/dev/null
  282.     if [[ -e "$PR_DIR"/tmpcookie ]]; then
  283.         slink=$(curl -s -b $PR_DIR/tmpcookie --connect-timeout $timeout -m $timeout --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://ru.forvo.com/word/"$(echo $request | tr ' ' _ )"/ | grep -o '[^"]*/download/mp3/'"$(echo $request | tr ' ' _ )"'/en/[^"]*' |head -n 1 )
  284.         if [[ ${#slink} -gt 5 ]];then
  285.              curl -s -b "$PR_DIR"/tmpcookie --connect-timeout $timeout -m $timeout --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://ru.forvo.com"$slink" > "$cachefile".mp3
  286.             return
  287.         fi
  288.     fi
  289.     [ $sound -eq 1 ] && echo fail to get sound from forvo.com
  290. }
  291.  
  292.  
  293. #----------------------------------------------- MAIN PART ----------------------------------------------------------------
  294.  
  295. if [[ ${#request} -gt 300 ]]; then
  296.     if ! result=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -d "sl=$source" -d "tl=$target" --data-urlencode "text=$request" http://translate.google.com) && [[ $httpproxy != "" ]] #getting google respond for short sentence
  297.     then    # second attempt without https proxy
  298.         httpsproxy=""
  299.         result=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -d "sl=$source" -d "tl=$target" --data-urlencode "text=$request" http://translate.google.com)
  300.     fi
  301.     #encoding=$(awk '/Content-Type: .* charset=/ {sub(/^.*charset=["'\'']?/,""); sub(/[ "'\''].*$/,""); print}' <<<"$result")
  302.     #iconv -f $encoding <<<"$result" | awk 'BEGIN {RS="<div"};/<span[^>]* id=["'\'']?result_box["'\'']?/ {sub(/^.*id=["'\'']?result_box["'\'']?(>| [^>]*>)([ \n\t]*<[^>]*>)*/,"");sub(/<.*$/,"");print}' | html2text -utf8
  303.     #echo -e "\033[32;1m"$(iconv -f "$encoding" <<<"$result" |  awk 'BEGIN {RS="</div>"};/<span[^>]* id=["'\'']?result_box["'\'']?/' | html2text -utf8)"$C_NORMAL_RAW"
  304.     echo -e "\033[32;1m"$(echo "$result" |  awk 'BEGIN {RS="</div>"};/<span[^>]* id=["'\'']?result_box["'\'']?/' | html2text -utf8)"$C_NORMAL_RAW"
  305.  
  306. else
  307.     if [[ $r_words_count -le 4 ]];then
  308.         httpproxy="" ; httpsproxy=""  #don't wanna proxy for 4 words.
  309.         request=$(echo "$request" | tr '[:upper:]' '[:lower:]') #lower request for short sentences
  310.     fi
  311.     cachefile="$PR_DIR/cache/$source-$target"_"$request"
  312.     r_words_count=$(echo "$request"  |wc -w)
  313.     if [[ ! -e "$cachefile" || (( $sound == 1 && ! -e "$cachefile".mp3 )) ]]; then
  314.  
  315.         if ! grespond=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -m $timeout --data-urlencode "text=$request" "http://translate.google.com/translate_a/t?client=t&hl=$target&sl=$source&tl=$target&ie=UTF-8&oe=UTF-8&multires=1&ssel=0&tsel=0&sc=1") && [[ "$httpproxy" != "" ]] #getting google respond for short sentence
  316.         then    # second attempt without https proxy
  317.             #echo https proxy not working. using http proxy.
  318.             httpsproxy=""
  319.             grespond=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -m $timeout --data-urlencode "text=$request" "http://translate.google.com/translate_a/t?client=t&hl=$target&sl=$source&tl=$target&ie=UTF-8&oe=UTF-8&multires=1&ssel=0&tsel=0&sc=1") #getting google respond for short sentence
  320.         fi
  321.         [[ ! $(echo "$grespond" | grep -o '\[.*\]') ]] && { echo "can't connect" ; exit; } #small connection check
  322.  
  323.         #echo -n "var myJSONObject = " > "$PR_DIR"/tmpjsobj2
  324.         #echo [$(echo "$grespond" | grep -o '"[^"]*/i[^"]*"')"];" >> "$PR_DIR"/tmpjsobj2
  325.         #echo [$(echo "$grespond" | grep -o '[^"]*/i[^"]*')"];"
  326.         #echo -e "var googlearr = eval(JSON.stringify(myJSONObject));\n print(googlearr);" >> "$PR_DIR"/tmpjsobj2
  327.         #js "$PR_DIR"/tmpjsobj2
  328.         fl_raw=$(echo "$grespond" | grep -o '[^"]*/i[^"]*') #google correction from $grespond
  329.  
  330.         #Highlight
  331.         declare difftest
  332.         [[ $fl_raw ]] &&
  333.             if [[ $r_words_count -le 2 ]];then #for 1-2 words
  334.                 fl=$(echo "$fl_raw" | sed 's/\\u003cb\\u003e\\u003ci\\u003e//g' | sed 's/\\u003c\/i\\u003e\\u003c\/b\\u003e//g' | sed 's/\\u0026//g' | sed "s/\#39;/'/g") #removing shit
  335.                 [[ $(echo "$fl" | tr -d "'") == "$request" ]] && fl="" # skip highlight for ' char
  336.                 difftest="$(cmp -l <(echo -n $request) <(echo -n $fl) 2>/dev/null)" #comparison
  337.                 if [[ $(echo "$difftest" | grep '[0-9]') ]];then
  338.                 echo "$fl" > $FIXED_STRING                 
  339.                     #[[ $source != en ]]&& diffnum=$(($diffnum/2+$diffnum%2))   #MUST BE CHECKED FOR REQUIRED LANGUAGES
  340.                     #[[ ${fl:$diffnum-1:1} == ' ' ]] && let diffnum-- #white space correction
  341.                     for (( i=$(echo "$difftest" | wc -l); i>=1; i--)); do
  342.                         pos=$(($(echo "$difftest" | sed -n $i'p' | sed -E 's/(([^ ]+ )).*/\1/' ) - 1))
  343.                         fl=$(echo "$fl" | sed 's/^\(.\{'$pos'\}\).\(.*\)/\1'$C_RED"${fl:$pos:1}"$C_NORMAL$C_YELLOW'\2/') #highlight difference in one word
  344.                     done
  345.                 fi
  346.                 fl="$C_YELLOW$fl$C_NORMAL"
  347.             else
  348.                 fl=$(echo "$fl_raw" | sed 's/\\u003cb\\u003e\\u003ci\\u003e/'$C_YELLOW'/g' | sed 's/\\u003c\/i\\u003e\\u003c\/b\\u003e/'$C_NORMAL'/g' | sed 's/\\u0026//g' | sed "s/\#39;/'/g" ) #google fixed text
  349.                 echo "$fl" > $FIXED_STRING
  350.             fi
  351. #echo $difftest
  352. #echo $grespond | grep -o '\[.*\]'
  353.  
  354.         echo -n "var myJSONObject = " > "$PR_DIR"/tmpjsobj
  355.         echo -n "$grespond" | grep -o '\[.*\]' >> "$PR_DIR"/tmpjsobj
  356.         echo ";">> "$PR_DIR"/tmpjsobj
  357.         echo -n "$jsscript" >> "$PR_DIR"/tmpjsobj
  358.  
  359.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  360.         echo -n "language_identification(googlearr);" >> "$PR_DIR"/tmpjsobj2
  361.         det_language=$(js "$PR_DIR"/tmpjsobj2 | tail -c3)       # detected language
  362.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  363.         echo -n "translated_result(googlearr);" >> "$PR_DIR"/tmpjsobj2
  364.         t_result=$(js "$PR_DIR"/tmpjsobj2)              # translated text
  365.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  366.         echo -n "translit(googlearr);" >> "$PR_DIR"/tmpjsobj2
  367.         translit=$(js "$PR_DIR"/tmpjsobj2)              # translit
  368.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  369.         echo -n "dictionary(googlearr);" >> "$PR_DIR"/tmpjsobj2
  370.         dictionary=$(js "$PR_DIR"/tmpjsobj2)                # dictionary
  371.  
  372.         #Language detection
  373.         [[ $det_language != $source && (( $TLSOURCE || $det_language != $ENGLISH_TARGET_LANG )) ]] && echo -e "$C_GRAY* $det_language ${ln_arr[$det_language]}$C_NORMAL"
  374.  
  375.         if [[ $source != $det_language && ! $fl_raw && ((
  376.                 $(echo $request | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ==  $(echo $t_result | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ||
  377.                 $(echo $t_result | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ==  $(echo $translit | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ||
  378.                 ${request:0:1} == ${t_result:0:1}
  379.                 )) && ! $dictionary ]]; then
  380.             [[ $source != "auto" ]] && echo -e "trying with detected language" # I guess google "auto" is not working then we will do it by yourself mutely
  381.             export TLSOURCE=$det_language #TLTARGET=en #source and target language for second attempt
  382.             if [[ $sound == 1 ]]; then $0 -s "$request" ; else $0 "$request" ;fi #second attempt to translate with detected language  #sound will be quiet...
  383.             exit
  384.         fi
  385.  
  386.         [[ $source == "auto" ]] && { source=$det_language; cachefile="$PR_DIR/cache/$source-$target"_"$request"; } #auto in cachefile fix #not necessary
  387.  
  388.  
  389.  
  390.  
  391.  
  392.         declare trans
  393.         if [[ $r_words_count -le 2 && ${#request} -gt 1 && $source == en && ! $(echo "$difftest" | wc -l) -gt 1 ]];then #special check( expample: advise advice)
  394.         #transcription
  395.             trans=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://lingvo-online.ru/en/Translate/en-"$target"/"$request" | grep -o '"[^"]*/transcription\.gif.Text=[^"]*"' | sed 's/.*=\(.*\)"/\1/'| echo -n -e $(sed 's/+/ /g; s/%/\\x/g')) #getting transcription
  396.             [[ $? != 0 ]] &&  { echo "can't get transcription"; } #bashtrap;
  397.         fi
  398.         #"Dictionary part" for english only. but it can be extended for every required language
  399.         if [[ $r_words_count -eq 1 && ${#request} -gt 1 && $source == en && (( ! $fl_raw || $trans )) ]]; then #dictionary
  400.        
  401.             echo -e "$C_GREEN$t_result$C_NORMAL_RAW" > "$cachefile" #google translated text to cache
  402.             [[ $dictionary ]] && echo -e "$dictionary" >> "$cachefile" #google dictionary to cache
  403.  
  404.             #phrases, forms and a vform
  405.             macmill=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" -m $timeout http://www.macmillandictionary.com/dictionary/british/"$(echo $request | tr '[:upper:]' '[:lower:]' | tr ' ' - )" )
  406.             [[ $? != 0 ]] && { echo "cant get phrases" ; bashtrap; }
  407.             #forms
  408.             forms=$(echo $macmill | grep -o "id=\"wordsformslayer-head\".*End of DIV wordforms" | sed 's/INFLECTION-CONTENT/\n/g' | sed -e 's/.*INFLECTION-ENTRY\">\([^<]*\)<.*I-VARIANT-before\">\([^<]*\).*INFLECTION-ENTRY\">\([^<]*\).*/666\1\2\3,/' -e 's/.*INFLECTION-ENTRY\">\([^<]*\)<.*/666\1,/' | grep 666 | cut -c 4-)
  409.             fwc=$(echo "$forms" | wc -l)
  410.             firstf=$(echo "$forms" | head -n 1) #1
  411.             last1f=$(echo "$forms" | tail -n 2 | head -n 1) #end-1
  412.             lastf=$(echo "$forms" | tail -n 1) #end
  413.             [[ (( fwc -eq 5 && (( ${firstf%?}"ed," != $lastf || ${firstf%?}"ed," != $last1f || $lastf != $last1f )) )) ||
  414.                (( fwc -eq 3 && (( ${firstf%?}"er," != $last1f  || ${firstf%?}"est," != $lastf )) )) ||
  415.                (( fwc -eq 2 && (( ${firstf%?}"s," != $lastf )) )) ||
  416.                (( fwc -eq 4 )) ]] && echo -e "$C_CYAN_RAW"forms:$C_NORMAL ${forms%?} >> "$cachefile"
  417.             #vform
  418.             vform=$(echo "$macmill" | grep -o "GREF-ENTRY-before.*</a>" | sed 's/<\/a>/<\/a>\n/g' | head -n 1 | sed 's/.*<a[^>]*>\([^<]*\).*/\1/')
  419.             vform_desc=$(echo "$macmill" | grep "span class=\"GREF-TYPE\"" | sed 's/.*<span class=\"GREF-TYPE\">\([^<]*\)<.*/\1/')
  420.             [[ $vform ]] && echo -e "$C_BLUE_RAW$vform_desc$C_NORMAL_RAW" "$C_BRIGHT_RAW$vform$C_NORMAL_RAW" >> "$cachefile"
  421.             #phrases
  422.             raw_phras=$(echo "$macmill" | grep -o '<li ID.*End of DIV SENSE--></li>' | sed 's/<.\?span[^>]*>//g')
  423.             #div class="P-HEAD" #| sed 's/End of DIV SENSE--><\/li>/\n/g' \ -e 's/.*\"h2\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\'$C_BRIGHT_RAW'\1'$C_NORMAL'\. \2<FUCKINGSHIT>\t\3/' \ | nl -s ' ' | sed -e 's/<FUCKINGSHIT>/\n/' -e 's/^ *//'
  424. #-e 's/.*\"EXAMPLE\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\1<FUCKINGSHIT>\t\2/' \
  425.             phras=$(echo $raw_phras | sed -e 's/<a[^>]*>//g' -e 's/<\/a>//g' | sed -e 's/div class=\"P-HEAD\"/\n?????/g' | grep "?????" | grep -n . | sed 's/^[0-9]*:/&\n/' | sed 's/End of DIV EXAMPLES/\n/g' | sed '/^[0-9]*:/{h;d;};G; s/^\(.*\)\n\([0-9]*:\)/\2 \1/' | sed \
  426. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*\"h2\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2\3'$C_NORMAL'\. \4/' \
  427. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'\. \3/' \
  428. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*\"h2\">\([^<]*\).*class=\"SENSE-BODY\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2\3'$C_NORMAL'\. \4/' \
  429. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*class=\"SENSE-BODY\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'\. \3/' \
  430. -e 's/^\([0-9]*\):.*\"EXAMPLE\">\([^<]*\).*/**\1 \2/' | grep -o "^\*\*[0-9].*" | sed 's/^\*\*//' ) # doubleh2+exmaple, h2+exmaple, doubleh2+sentence, h2+sentence, only example
  431.             if [[ $phras ]]; then   #phrases
  432.                 echo -e "$C_GRAY_RED_RAW"PHRASES:$C_NORMAL >> "$cachefile"
  433.                 echo -e "$phras" >> "$cachefile"
  434.                 #echo -e "$C_DARK_BLUE_RAW"http://www.macmillandictionary.com/dictionary/american/"$request"$C_NORMAL >> "$cachefile"
  435.             else            #second attempt examples
  436.                 phrases_2=$(echo "$macmill" | sed -e 's/<a[^>]*>//g' -e 's/<\/a>//g' | sed 's/<.\?span[^>]*>//g' | grep -o 'div class="SENSE".*End of DIV SENSE--' | sed 's/End of DIV SENSE--/\n/g' | grep -n . | sed 's/^.:/&\n/' | sed 's/End of DIV EXAMPLES/\n/g' | sed '/^[0-9]:/{h;d;};G; s/^\(.*\)\n\([0-9]:\)/\2 \1/' | grep "<strong>" | sed 's/^\([0-9]\).*<strong>\([^<]*\)<.*class=\"EXAMPLE\">\([^<]*\)<.*/\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'. \3/' | sed 's/^\([0-9]\).*<strong>\([^<]*\)<\/strong>\([^<]*\).*/\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'. \3/' | sed 's/\. / /')
  437.                 if [[ $phrases_2 ]];then
  438.                     echo -e "$C_GRAY_RED_RAW"EXAMPLES:$C_NORMAL >> "$cachefile"
  439.                     echo -e "$phrases_2" >> "$cachefile"
  440.                 fi
  441.             fi
  442.            
  443.             #transcription
  444.             [[ $trans ]] && echo "[$trans]" >> "$cachefile"
  445.  
  446.             cat "$cachefile" 2>/dev/null #output
  447.             echo -e "$C_DARK_BLUE_RAW"http://oxforddictionaries.com/definition/english/$(echo "$request" | sed "s/'/%27/" )$C_NORMAL # just another good english dictionary
  448.  
  449.             if [[ ${#request} -gt 2 || $r_words_count -le 2 ]] ; then
  450.  
  451.                 #saving words
  452.                 ted_words_file=$(cat "$TR_ENG_WORDS")
  453.                 ted_words_file=$(echo "$ted_words_file" | tail -n 70)
  454.                 if [[ ! $(echo "$ted_words_file" | grep "$request" 2>/dev/null) ]]; then
  455.                     echo -e "$ted_words_file\n""$request" > "$TR_ENG_WORDS"
  456.                     #echo -e "$request \t\t\t\t\t\t\t\t\t\t [$trans]" >> "$TR_ENG_WORDS"
  457.                 fi
  458.                
  459.                 #getting sound from forvo.com
  460.                     [[ ! -e "$cachefile".mp3 ]] &&
  461.                         if [[ $sound == 1 || $SOUND_DOWNLOAD_AWS == 1 ]];then
  462.                             eng_sound_download &      # in background
  463.                         fi
  464.                        
  465.             fi
  466.         else #not english dictionary
  467.             echo -e "$C_GREEN$t_result$C_NORMAL_RAW"                    #google translated text output
  468.             [[ $source != en && $r_words_count -le $TRANSLIT_WORDS_MAX && $(echo $t_result | sed 's/ //g' | tr '[:upper:]' '[:lower:]') !=  $(echo $translit | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ]] && echo -e "$translit"                 #google translit    output
  469.             [[ $dictionary ]] && echo -e "$dictionary"                  #google dictionary  output
  470.  
  471.             if [[ $source == en && ! $fl_raw && $r_words_count -le 4 ]]; then
  472.                 #phrases for 2 words
  473.                 if [[ $r_words_count -le 2 ]];then
  474.  
  475.                     #getting sound from forvo.com
  476.                     [[ ! -e "$cachefile".mp3 ]] &&
  477.                         if [[ $sound == 1 || $SOUND_DOWNLOAD_AWS == 1 ]];then
  478.                             eng_sound_download &      # in background
  479.                         fi
  480.  
  481.                     macmill=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://www.macmillandictionary.com/dictionary/british/"$(echo $request | tr ' ' - )" )
  482.                     [[ $? != 0 ]] && { echo "cant getmacmillandictionary.com phrases for string" ; bashtrap; }
  483.                     macmill=$(echo "$macmill" | grep -o 'div class="SENSE".*End of DIV SENSE--' | sed -e 's/<a[^>]*>//g' -e 's/<\/a>//g' | sed 's/End of DIV SENSE--/\n/g' | grep -n . | sed 's/^[0-9]*:/&\n/' | sed 's/End of DIV EXAMPLES/\n/g' | sed '/^[0-9]*:/{h;d;};G; s/^\(.*\)\n\([0-9]*:\)/\2 \1/' | sed \
  484. -e's/^\([0-9]*\):.*p id=\"EXAMPLE\" class=\"EXAMPLE\">\([^<]*\)<.*/*\1 \2/' \
  485. -e 's/^\([0-9]\).*span class=\"BASE\">\([^<]*\).*context=\"DEFINITION-before\"> <\/span>\([^<]*\).*/*\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'\. \3/' | grep -o "^\*[0-9].*" | sed 's/^*//')
  486.                 fi
  487.                 examples=$macmill
  488.                 if [[ $examples ]];then
  489.                     echo -e "$C_GRAY_RED_RAW"EXAMPLES:$C_NORMAL
  490.                     echo -e "$examples"
  491.                 fi
  492.  
  493.                 #search for ideom for 2-4 words
  494.                 if [[ ! $phras2 ]]; then
  495.                     list=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://www.thefreedictionary.com/"$(echo $request | tr ' ' + )")
  496.                     [[ $? != 0 ]] && { echo "cant get thefreedictionary.com phrases for string" ; bashtrap; }
  497.                     list=$(echo "$list" | grep "[fF]ound in:")
  498.                     if [[ $list ]]; then
  499.                         ideomlink=$(echo "$list" | grep -o 'http://idioms[^"]*')
  500.                         if [[ $ideomlink ]]; then
  501.                             raw_ideoms=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" "$ideomlink" | grep -o 'div class="ds-single".*</div><div')
  502.  
  503.                             ideom=$(echo "$raw_ideoms" | sed 's/<i>//' | sed 's/<\/i>//' | sed 's/.*ds-single\">\([^<]*\)<.*/\1/')
  504.                             illustration=$(echo "$raw_ideoms" | sed 's/.*class=illustration>\([^<]*\)<.*/\1/')
  505.                             if [[ $ideom ]]; then
  506.                                 echo -e "$C_GRAY_RED_RAW"Ideom:$C_NORMAL
  507.                                 echo $ideom
  508.                                 [[ $illustration ]] && echo $illustration
  509.                             fi
  510.                         else
  511.                             ency_link=$(echo "$list" | grep -o 'http://encyclopedia2[^"]*')
  512.                             if [[ $ency_link ]]; then
  513.                                 raw_ency=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" "$ency_link")                             
  514.                                 ency=$(echo "$raw_ency" | grep '<div class=hw>' | grep -o 'div>.*<br /' | sed -e 's/"//g' | sed -e 's/<b>/'$(tput bold)'/g' -e 's/<\/b>/'$C_NORMAL'/g' | sed 's/div>\([^<]*\).*/\1/')
  515.                                 contr_ency=$(echo "$raw_ency" | grep "Contrast with" | sed 's/.*\">\([^<]*\)<\/a.*/\1/')
  516.                                 if [[ $ency ]]; then
  517.                                     echo -e "$C_GRAY_RED_RAW"Ideom:$C_NORMAL
  518.                                     echo "$ency"
  519.                                     [[ $contr_ency ]] && echo -e  Contrast with: "$C_BRIGHT_RAW"$contr_ency"$C_NORMAL"
  520.                                 fi 
  521.                             fi
  522.                         fi
  523.                     fi
  524.                 fi
  525.             fi
  526.  
  527.         fi #end of english dictionary
  528.         [[ $fl_raw ]] && echo -e "$fl" #empty line... cant fix it (:'d      #google fixed text  output
  529.     else #cache output
  530.         cat "$cachefile" #output
  531.         [[ $source == en ]] && echo -e "$C_DARK_BLUE_RAW"http://oxforddictionaries.com/definition/english/"$request"$C_NORMAL
  532.     fi
  533.  
  534.     #sound 
  535.     if [[ $sound == 1 ]]; then
  536.         wait
  537.         if [[ -e "$cachefile".mp3 ]]; then
  538.             if [[ $(stat -c%s "$cachefile".mp3) -ge 11000 ]]; then #if .mp3 is corrrect
  539.                 stat=$(mpg123 "$cachefile".mp3 2>&1)
  540. #               echo "$stat"
  541.                 [[ ! $(echo "$stat" | grep -o 'Comment') ]] && rm "$cachefile".mp3
  542.             else
  543.                 rm "$cachefile".mp3
  544.                 echo ".mp3 file is corrupt. try again."
  545.             fi
  546.         else
  547.             #echo ".mp3 file not found."
  548.             sleep 1 #to be able Ctrl+C to delete cache file
  549.         fi
  550.     fi
  551. fi
  552.  
  553. exit
  554.  
  555.  
  556. #rest languages for interface. not in list of source language
  557. #hl=ak          Akan
  558. #hl=bem         Bemba
  559. #hl=bh          Bihari
  560. #hl=xx-bork     Bork, bork, bork!
  561. #hl=bs          Bosnian
  562. #hl=br          Breton
  563. #hl=km          Cambodian
  564. #hl=chr         Cherokee
  565. #hl=ny          Chichewa
  566. #hl=zh-TW       Chinese (Traditional)
  567. #hl=co          Corsican
  568. #hl=xx-elmer    Elmer Fudd
  569. #hl=ee          Ewe
  570. #hl=fy          Frisian
  571. #hl=gaa         Ga
  572. #hl=gn          Guarani
  573. #hl=xx-hacker   Hacker
  574. #hl=ha          Hausa
  575. #hl=haw         Hawaiian
  576. #hl=ig          Igbo
  577. #hl=ia          Interlingua
  578. #hl=jw          Javanese
  579. #hl=kk          Kazakh
  580. #hl=rw          Kinyarwanda
  581. #hl=rn          Kirundi
  582. #hl=xx-klingon  Klingon
  583. #hl=kg          Kongo
  584. #hl=kri         Krio (Sierra Leone)
  585. #hl=ku          Kurdish
  586. #hl=ckb         Kurdish (Soranî)
  587. #hl=ky          Kyrgyz
  588. #hl=ln          Lingala
  589. #hl=loz         Lozi
  590. #hl=lg          Luganda
  591. #hl=ach         Luo
  592. #hl=mg          Malagasy
  593. #hl=ml          Malayalam
  594. #hl=mi          Maori
  595. #hl=mr          Marathi
  596. #hl=mfe         Mauritian Creole
  597. #hl=mo          Moldavian
  598. #hl=mn          Mongolian
  599. #hl=sr-ME       Montenegrin
  600. #hl=ne          Nepali
  601. #hl=pcm         Nigerian Pidgin
  602. #hl=nso         Northern Sotho
  603. #hl=nn          Norwegian (Nynorsk)
  604. #hl=oc          Occitan
  605. #hl=or          Oriya
  606. #hl=om          Oromo
  607. #hl=ps          Pashto
  608. #hl=xx-pirate   Pirate
  609. #hl=pt-BR       Portuguese (Brazil)
  610. #hl=pt-PT       Portuguese (Portugal)
  611. #hl=pa          Punjabi
  612. #hl=qu          Quechua
  613. #hl=rm          Romansh
  614. #hl=nyn         Runyakitara
  615. #hl=gd          Scots Gaelic
  616. #hl=sh          Serbo-Croatian
  617. #hl=st          Sesotho
  618. #hl=tn          Setswana
  619. #hl=crs         Seychellois Creole
  620. #hl=sn          Shona
  621. #hl=sd          Sindhi
  622. #hl=si          Sinhalese
  623. #hl=so          Somali
  624. #hl=es-419      Spanish (Latin American)
  625. #hl=su          Sundanese
  626. #hl=tg          Tajik
  627. #hl=tt          Tatar
  628. #hl=ti          Tigrinya
  629. #hl=to          Tonga
  630. #hl=lua         Tshiluba
  631. #hl=tum         Tumbuka
  632. #hl=tk          Turkmen
  633. #hl=tw          Twi
  634. #hl=ug          Uighur
  635. #hl=uz          Uzbek
  636. #hl=wo          Wolof
  637. #hl=xh          Xhosa
  638. #hl=yo          Yoruba
  639. #hl=zu          Zulu
  640.  
  641. #comment="arr=googlearr;
  642. #for (var c = 0; c < arr.length; c++){ //testing
  643. #   if(typeof arr[c] !== 'undefined' && arr[c] !== null){ //dictionary output
  644. #       for (var i = 0; i < arr[c].length; i++){
  645. #           if(typeof arr[c][i] !== 'undefined' && arr[c][i] !== null){
  646. #               for (var e = 0; e < arr[c][i].length; e++){
  647. #                   print(c);
  648. #                   print(arr[c][i][e]);
  649. #                   //print(arr[8][0][1]);
  650. #               }
  651. #           }
  652. #       }
  653. #   }
  654. #}
  655. #   frequency experiment    whitespace=' ' //for x.x and xxx format
  656. #                   num2=Math.round(num*10000)/10;
  657. #                   if (num2 < 10 && num2%1 == 0){
  658. #                       whitespace='   '; //for x format
  659. #                   }
  660. #                   if (num2 >= 10){
  661. #                       num2=Math.round(num2/10)*10;
  662. #                       if (num2 < 100){
  663. #                           whitespace='  '; //for xx format
  664. #                       }
  665. #                   }
  666. #                   print(num2+whitespace+arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]);//frequency + word + variant of speach
  667. #"
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement