anoncheg

Google translate from command line and some more features

Jul 28th, 2012
1,396
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. #!/usr/bin/env bash
  2. # AWESOME GOOGLE TRANSLATE. This tool for access translate.google.com from terminal and additional English features.
  3.  
  4. #    Copyright (C) 2012 Vitalij Chepelev.
  5.  
  6. #    This program is free software: you can redistribute it and/or modify
  7. #    it under the terms of the GNU General Public License as published by
  8. #    the Free Software Foundation, either version 3 of the License, or
  9. #    (at your option) any later version.
  10.  
  11. #    This program is distributed in the hope that it will be useful,
  12. #    but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. #    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14. #    GNU General Public License for more details.
  15.  
  16. #    You should have received a copy of the GNU General Public License
  17. #    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  18.  
  19. # You can contact me there:
  20. # http://www.unix.com/shell-programming-scripting/196823-completed-command-line-google-translation-tool.html                       654321 - profile name
  21.  
  22. # main mirror https://github.com/Anoncheg1/Command-line-translator
  23. # mirror http://pastebin.com/kPTYjY6W
  24. # requirements:
  25. #   UTF-8 support for required languages
  26. #   curl >= 7.21.0,
  27. #   Mozilla js shell (spidermonkey) >= 1.8.0,
  28. #   mpg123
  29. #   html2text
  30. #   forvo.com account
  31. #features:
  32. #- translated text, fixed text with highlight, language detection, dictionary, translit
  33. # for english:
  34. #- phrases, ideom, word forms, transcription, audio pronunciation
  35. #- cache for words
  36. #- saving words to file for learning
  37. #
  38. #:) translate.google.com, thefreedictionary.com, lingvo-online.ru, www.forvo.com
  39.  
  40. help=$(basename "$0")' [-s[2,3]] [-l] [-h] <text>
  41. if text is LATIN_LANG, then target language is FIRST_LANG
  42. otherwise, target language is LATIN_LANG
  43. -s --sound Enable sound for one word
  44. -l, --list List of languages
  45. You can force the language with environment varibles by command:
  46. export TLSOURCE=en TLTARGET=ru
  47. but better configure "FIRST_LANG" and "LATIN_LANG" in script for auto detection of direction by the first character!
  48. You neeed UTF-8 support for required languages.
  49. '
  50.  
  51. # adjust to taste
  52. declare -r FIRST_LANG=ru            #target language for request in LATIN_LANG      NOT in A-z latin alphabet
  53. declare -r LATIN_LANG=en            #target for all not A-z latin requests          A-z latin alphabet will be detected!
  54. declare -r flogin=121212            #forvo.com login and pass REQUIRED!
  55. declare -r fpass=121212
  56. TERMINAL_C="WOB"                #Your terminal - white on black:WOB, black on white:BOW, anything other:O
  57. #httpproxy="127.0.0.1:4444"     #proxy for long strings
  58. #httpsproxy="--socks5 127.0.0.1:9050"   #socks5
  59. #
  60. declare -r words_buffer=4000        #4000 files max. there is removeing all files older than 20 days.
  61. declare -r timeout=6
  62. declare -r TRANSLIT_WORDS_MAX=10
  63. declare -r SOUND_DOWNLOAD_AWS=1     # 1 - always. 0 - on demand.
  64. declare -r useragent="Mozilla/5.0 (X11; Linux i686) AppleWebKit/534.34 (KHTML, like Gecko) QupZilla/1.3.1 Safari/534.34"
  65. #
  66. declare -r PR_DIR="$HOME/.translate"
  67. [ ! -d "$PR_DIR" ] && mkdir "$PR_DIR"
  68. [ ! -d "$PR_DIR"/cache ] && mkdir "$PR_DIR"/cache
  69.  
  70. declare -r TR_ENG_WORDS="$PR_DIR"/translated_words
  71. declare -r FIXED_STRING="$PR_DIR"/fixed_string
  72.  
  73. declare -A ln_arr
  74. ln_arr["af"]="Afrikaans"
  75. ln_arr["sq"]="Albanian"
  76. ln_arr["am"]="Amharic"
  77. ln_arr["ar"]="Arabic"
  78. ln_arr["hy"]="Armenian"
  79. ln_arr["az"]="Azerbaijani"
  80. ln_arr["eu"]="Basque"
  81. ln_arr["be"]="Belarusian"
  82. ln_arr["bn"]="Bengali"
  83. ln_arr["bg"]="Bulgarian"
  84. ln_arr["ca"]="Catalan"
  85. ln_arr["zh-CN"]="Chinese (Simplified)"
  86. ln_arr["zh"]="Chinese"
  87. ln_arr["hr"]="Croatian"
  88. ln_arr["cs"]="Czech"
  89. ln_arr["da"]="Danish"
  90. ln_arr["nl"]="Dutch"
  91. ln_arr["en"]="English"
  92. ln_arr["eo"]="Esperanto"
  93. ln_arr["et"]="Estonian"
  94. ln_arr["fo"]="Faroese"
  95. ln_arr["tl"]="Filipino"
  96. ln_arr["fi"]="Finnish"
  97. ln_arr["fr"]="French"
  98. ln_arr["gl"]="Galician"
  99. ln_arr["ka"]="Georgian"
  100. ln_arr["de"]="German"
  101. ln_arr["el"]="Greek"
  102. ln_arr["gu"]="Gujarati"
  103. ln_arr["ht"]="Haitian Creole"
  104. ln_arr["iw"]="Hebrew"
  105. ln_arr["hi"]="Hindi"
  106. ln_arr["hu"]="Hungarian"
  107. ln_arr["is"]="Icelandic"
  108. ln_arr["id"]="Indonesian"
  109. ln_arr["ga"]="Irish"
  110. ln_arr["it"]="Italian"
  111. ln_arr["ja"]="Japanese"
  112. ln_arr["kn"]="Kannada"
  113. ln_arr["ko"]="Korean"
  114. ln_arr["lo"]="Laothian"
  115. ln_arr["la"]="Latin"
  116. ln_arr["lv"]="Latvian"
  117. ln_arr["lt"]="Lithuanian"
  118. ln_arr["mk"]="Macedonian"
  119. ln_arr["ms"]="Malay"
  120. ln_arr["mt"]="Maltese"
  121. ln_arr["no"]="Norwegian"
  122. ln_arr["fa"]="Persian"
  123. ln_arr["pl"]="Polish"
  124. ln_arr["pt"]="Portuguese"
  125. ln_arr["ro"]="Romanian"
  126. ln_arr["ru"]="Russian"
  127. ln_arr["sr"]="Serbian"
  128. ln_arr["sk"]="Slovak"
  129. ln_arr["sl"]="Slovenian"
  130. ln_arr["es"]="Spanish"
  131. ln_arr["sw"]="Swahili"
  132. ln_arr["sv"]="Swedish"
  133. ln_arr["ta"]="Tamil"
  134. ln_arr["te"]="Telugu"
  135. ln_arr["th"]="Thai"
  136. ln_arr["tr"]="Turkish"
  137. ln_arr["uk"]="Ukrainian"
  138. ln_arr["ur"]="Urdu"
  139. ln_arr["vi"]="Vietnamese"
  140. ln_arr["cy"]="Welsh"
  141. ln_arr["yi"]="Yiddish"
  142.  
  143. trap bashtrap INT
  144. bashtrap()
  145. {
  146.     echo "Exit signal detected. Deleting cache files."
  147.     rm "$cachefile" 2> /dev/null
  148.     exit
  149. }
  150.  
  151. if [[ $TERMINAL_C == "WOB" ]];then
  152.     declare -r C_RED="$(tput bold)$(tput setaf 1)"      #highlight
  153.     declare -r C_YELLOW="$(tput bold)$(tput setaf 3)"   #highlight
  154.     declare -r C_GRAY="$(tput setaf 7)" #language detected
  155.     declare -r C_CYAN_RAW="\033[1;36m"  #forms
  156.     declare -r C_GRAY_RED_RAW="\033[1;35m"  #phrases
  157.     declare -r C_DARK_BLUE_RAW="\033[34m"   #link for dictionary
  158.     declare -r C_BLUE_RAW="\033[1;34m"  #dictionary and vform1
  159.     declare -r C_BRIGHT_RAW="\033[1;37m"    #phrases, examples main part, vform2
  160.     declare -r C_GREEN="\033[1;32m"     #t_result
  161. elif [[ $TERMINAL_C == "BOW" ]];then
  162.     declare -r C_RED="$(tput bold)$(tput setaf 1)"      #highlight
  163.     declare -r C_YELLOW="$(tput setaf 3)"           #highlight
  164.     declare -r C_GRAY="$(tput bold)$(tput setaf 5)" #language detected
  165.     declare -r C_CYAN_RAW="\033[1;36m"      #forms
  166.     declare -r C_GRAY_RED_RAW="\033[1;35m"      #phrases
  167.     declare -r C_DARK_BLUE_RAW="$(tput setaf 7)"    #link for dictionary
  168.     declare -r C_BLUE_RAW="\033[1;34m"      #dictionary and vform1
  169.     declare -r C_BRIGHT_RAW="$(tput bold)"      #phrases, examples main part, vform2
  170.     declare -r C_GREEN="$(tput bold)"       #t_result
  171. else #universal
  172.     declare -r C_RED="$(tput setaf 1)"      #highlight
  173.     declare -r C_YELLOW="$(tput bold)"      #highlight
  174.     declare -r C_GRAY=""            #language detected
  175.     declare -r C_CYAN_RAW=""        #forms
  176.     declare -r C_GRAY_RED_RAW=""        #phrases
  177.     declare -r C_DARK_BLUE_RAW=""       #link for dictionary
  178.     declare -r C_BLUE_RAW=""        #dictionary and vform1
  179.     declare -r C_BRIGHT_RAW="$(tput bold)"  #phrases, examples main part, vform2
  180.     declare -r C_GREEN="$(tput bold)"   #t_result
  181. fi
  182. declare -r C_NORMAL="$(tput sgr0)"
  183. declare -r C_NORMAL_RAW="\033[0m"
  184.  
  185. jsscript="var googlearr = eval(JSON.stringify(myJSONObject));
  186. function translated_result(arr){
  187.     var rsum=\"\"; //translated text
  188.     if(typeof googlearr[0] !== 'undefined' && googlearr[0] !== null){ //summing sentences
  189.         for (var i = 0; i < googlearr[0].length; i++){
  190.             if(typeof googlearr[0][i][0] !== 'undefined' && googlearr[0][i][0] !== null){
  191.                 rsum=rsum+googlearr[0][i][0];
  192.             }
  193.         }
  194.     }
  195.     print(rsum);
  196. }
  197. function translit(arr){
  198.     var rsum=\"\"; //translit
  199.     if(typeof googlearr[0] !== 'undefined' && googlearr[0] !== null){ //summing sentences
  200.         for (var i = 0; i < googlearr[0].length; i++){
  201.             if(typeof googlearr[0][i][3] !== 'undefined' && googlearr[0][i][3] !== null){
  202.                 rsum=rsum+'\n'+googlearr[0][i][3];
  203.             }
  204.         }
  205.     }
  206.     print(rsum);
  207. }
  208. function dictionary(arr){ //dictionary output
  209.     if(typeof arr[1] !== 'undefined' && arr[1] !== null){
  210.         for (var a = 0; a < arr[1].length; a++){
  211.             if(typeof arr[1][a][0] !== 'undefined' && arr[1][a][0] !== null){
  212.                 print(\"$C_BLUE_RAW\"+arr[1][a][0]+\"$C_NORMAL_RAW\");//part of speach
  213.                 for (var b = 0; b < arr[1][a][2].length; b++){//words
  214.                     num=parseFloat(arr[1][a][2][b][3]); //frequency
  215.                     num2=Math.round(num*100000)/10;//round
  216.                     if (num2 >= 10)
  217.                         num2=Math.round(num2/10)*10;
  218.                     print(arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]+' '+num2);//word + variant of speach + frequency
  219.                     //variant1 print(arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]+' '+Math.round(num*1000000)/1000000);//word + variant of speach + frequency
  220.                     //old print(arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]);//word + variant of speach
  221.                 }
  222.             }
  223.         }
  224.     }
  225. }
  226. function language_identification(arr){ //get detected languages
  227.     if(typeof arr[8] !== 'undefined' && arr[8] !== null && typeof arr[8][0] !== 'undefined' && arr[8][0] !== null && typeof arr[8][0][0] !== 'undefined' && arr[8][0][0] !== null){
  228.         print('Detected language: '+arr[8][0][0]); //detected language
  229.     }
  230. }
  231. "
  232.  
  233. sound=0
  234. volume=0.8
  235. if [[ $1 = -h || $1 = --help ]]; then #help
  236.     echo "$help"
  237.     exit
  238. fi
  239. if [[ $1 == -s || $1 == --sound ]];then #sound varibles
  240.     sound=1
  241.     request=($*) #get array
  242.     request[0]="" #remove first parameter
  243.     request=${request[*]} #to string
  244. else
  245.     request="$*"
  246. fi
  247. if [[ $1 = -l || $1 = --list ]]; then #list
  248.     for i in "${!ln_arr[@]}" ;do
  249.         echo -e "$i\t${ln_arr[$i]}"
  250.     done | sort -k2
  251.     exit
  252. fi
  253.  
  254. if [[ ${#request} -eq 0 ]];then #using saved fixed string without parameters
  255.     if [[ -e $FIXED_STRING ]]; then
  256.         request=$(cat $FIXED_STRING 2>/dev/null)
  257.         echo -e "$(tput bold)$(tput setaf 3)$request$(tput sgr0)"
  258.     else
  259.         exit
  260.     fi
  261. fi
  262. #rm $FIXED_STRING 2> /dev/null
  263.  
  264. tchar=${request:0:1} #language detection by the first character
  265. tcharnum=$(printf "%d" "'${tchar}")
  266. if [[ $tcharnum -ge 65 && $tcharnum -le 122 || (( $TLSOURCE == $LATIN_LANG && ! $TLTARGET )) ]]; then #if request is A-Za-z then it is LATIN_LANG... ("en,it" will not be detected here idk how to make it for now)
  267.     # latin to first
  268.     source="$LATIN_LANG" #english or latin alphabet
  269.     target="$FIRST_LANG" #if text is english target language is FIRST_LANG
  270. else    # ANY language to latin
  271.     source="$FIRST_LANG"
  272.     target="$LATIN_LANG"
  273. fi
  274.  
  275. [[ $TLSOURCE ]] && source=$TLSOURCE;
  276. [[ $TLTARGET ]] && target=$TLTARGET; # export TLSOURCE=en TLTARGET=ru; will force the language
  277.  
  278. eng_sound_download(){           # $cachefile,
  279.     [[ $(ls "$PR_DIR"/cache | wc -l) -gt $words_buffer ]] && find "$PR_DIR"/cache -mtime +20 -delete #cache cleaning. remove words older than 20 days.
  280.     rm "$PR_DIR"/tmpcookie 2>/dev/null &
  281.     curl -s -c "$PR_DIR"/tmpcookie --connect-timeout $timeout -m $timeout --user-agent "$useragent" $httpsproxy -x "$httpproxy" -d "login=$flogin&password=$fpass" http://ru.forvo.com/login/ -o/dev/null
  282.     if [[ -e "$PR_DIR"/tmpcookie ]]; then
  283.         slink=$(curl -s -b $PR_DIR/tmpcookie --connect-timeout $timeout -m $timeout --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://ru.forvo.com/word/"$(echo $request | tr ' ' _ )"/ | grep -o '[^"]*/download/mp3/'"$(echo $request | tr ' ' _ )"'/en/[^"]*' |head -n 1 )
  284.         if [[ ${#slink} -gt 5 ]];then
  285.              curl -s -b "$PR_DIR"/tmpcookie --connect-timeout $timeout -m $timeout --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://ru.forvo.com"$slink" > "$cachefile".mp3
  286.             return
  287.         fi
  288.     fi
  289.     [ $sound -eq 1 ] && echo fail to get sound from forvo.com
  290. }
  291.  
  292.  
  293. #----------------------------------------------- MAIN PART ----------------------------------------------------------------
  294.  
  295. if [[ ${#request} -gt 300 ]]; then
  296.     if ! result=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -d "sl=$source" -d "tl=$target" --data-urlencode "text=$request" http://translate.google.com) && [[ $httpproxy != "" ]] #getting google respond for short sentence
  297.     then    # second attempt without https proxy
  298.         httpsproxy=""
  299.         result=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -d "sl=$source" -d "tl=$target" --data-urlencode "text=$request" http://translate.google.com)
  300.     fi
  301.     #encoding=$(awk '/Content-Type: .* charset=/ {sub(/^.*charset=["'\'']?/,""); sub(/[ "'\''].*$/,""); print}' <<<"$result")
  302.     #iconv -f $encoding <<<"$result" | awk 'BEGIN {RS="<div"};/<span[^>]* id=["'\'']?result_box["'\'']?/ {sub(/^.*id=["'\'']?result_box["'\'']?(>| [^>]*>)([ \n\t]*<[^>]*>)*/,"");sub(/<.*$/,"");print}' | html2text -utf8
  303.     #echo -e "\033[32;1m"$(iconv -f "$encoding" <<<"$result" |  awk 'BEGIN {RS="</div>"};/<span[^>]* id=["'\'']?result_box["'\'']?/' | html2text -utf8)"$C_NORMAL_RAW"
  304.     echo -e "\033[32;1m"$(echo "$result" |  awk 'BEGIN {RS="</div>"};/<span[^>]* id=["'\'']?result_box["'\'']?/' | html2text -utf8)"$C_NORMAL_RAW"
  305.  
  306. else
  307.     if [[ $r_words_count -le 4 ]];then
  308.         httpproxy="" ; httpsproxy=""  #don't wanna proxy for 4 words.
  309.         request=$(echo "$request" | tr '[:upper:]' '[:lower:]') #lower request for short sentences
  310.     fi
  311.     cachefile="$PR_DIR/cache/$source-$target"_"$request"
  312.     r_words_count=$(echo "$request"  |wc -w)
  313.     if [[ ! -e "$cachefile" || (( $sound == 1 && ! -e "$cachefile".mp3 )) ]]; then
  314.  
  315.         if ! grespond=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -m $timeout --data-urlencode "text=$request" "http://translate.google.com/translate_a/t?client=t&hl=$target&sl=$source&tl=$target&ie=UTF-8&oe=UTF-8&multires=1&ssel=0&tsel=0&sc=1") && [[ "$httpproxy" != "" ]] #getting google respond for short sentence
  316.         then    # second attempt without https proxy
  317.             #echo https proxy not working. using http proxy.
  318.             httpsproxy=""
  319.             grespond=$(curl -s -i --user-agent "$useragent" $httpsproxy -x "$httpproxy" -m $timeout --data-urlencode "text=$request" "http://translate.google.com/translate_a/t?client=t&hl=$target&sl=$source&tl=$target&ie=UTF-8&oe=UTF-8&multires=1&ssel=0&tsel=0&sc=1") #getting google respond for short sentence
  320.         fi
  321.         [[ ! $(echo "$grespond" | grep -o '\[.*\]') ]] && { echo "can't connect" ; exit; } #small connection check
  322.  
  323.         #echo -n "var myJSONObject = " > "$PR_DIR"/tmpjsobj2
  324.         #echo [$(echo "$grespond" | grep -o '"[^"]*/i[^"]*"')"];" >> "$PR_DIR"/tmpjsobj2
  325.         #echo [$(echo "$grespond" | grep -o '[^"]*/i[^"]*')"];"
  326.         #echo -e "var googlearr = eval(JSON.stringify(myJSONObject));\n print(googlearr);" >> "$PR_DIR"/tmpjsobj2
  327.         #js "$PR_DIR"/tmpjsobj2
  328.         fl_raw=$(echo "$grespond" | grep -o '[^"]*/i[^"]*') #google correction from $grespond
  329.  
  330.         #Highlight
  331.         declare difftest
  332.         [[ $fl_raw ]] &&
  333.             if [[ $r_words_count -le 2 ]];then #for 1-2 words
  334.                 fl=$(echo "$fl_raw" | sed 's/\\u003cb\\u003e\\u003ci\\u003e//g' | sed 's/\\u003c\/i\\u003e\\u003c\/b\\u003e//g' | sed 's/\\u0026//g' | sed "s/\#39;/'/g") #removing shit
  335.                 [[ $(echo "$fl" | tr -d "'") == "$request" ]] && fl="" # skip highlight for ' char
  336.                 difftest="$(cmp -l <(echo -n $request) <(echo -n $fl) 2>/dev/null)" #comparison
  337.                 if [[ $(echo "$difftest" | grep '[0-9]') ]];then
  338.                 echo "$fl" > $FIXED_STRING                 
  339.                     #[[ $source != en ]]&& diffnum=$(($diffnum/2+$diffnum%2))   #MUST BE CHECKED FOR REQUIRED LANGUAGES
  340.                     #[[ ${fl:$diffnum-1:1} == ' ' ]] && let diffnum-- #white space correction
  341.                     for (( i=$(echo "$difftest" | wc -l); i>=1; i--)); do
  342.                         pos=$(($(echo "$difftest" | sed -n $i'p' | sed -E 's/(([^ ]+ )).*/\1/' ) - 1))
  343.                         fl=$(echo "$fl" | sed 's/^\(.\{'$pos'\}\).\(.*\)/\1'$C_RED"${fl:$pos:1}"$C_NORMAL$C_YELLOW'\2/') #highlight difference in one word
  344.                     done
  345.                 fi
  346.                 fl="$C_YELLOW$fl$C_NORMAL"
  347.             else
  348.                 fl=$(echo "$fl_raw" | sed 's/\\u003cb\\u003e\\u003ci\\u003e/'$C_YELLOW'/g' | sed 's/\\u003c\/i\\u003e\\u003c\/b\\u003e/'$C_NORMAL'/g' | sed 's/\\u0026//g' | sed "s/\#39;/'/g" ) #google fixed text
  349.                 echo "$fl" > $FIXED_STRING
  350.             fi
  351. #echo $difftest
  352. #echo $grespond | grep -o '\[.*\]'
  353.  
  354.         echo -n "var myJSONObject = " > "$PR_DIR"/tmpjsobj
  355.         echo -n "$grespond" | grep -o '\[.*\]' >> "$PR_DIR"/tmpjsobj
  356.         echo ";">> "$PR_DIR"/tmpjsobj
  357.         echo -n "$jsscript" >> "$PR_DIR"/tmpjsobj
  358.  
  359.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  360.         echo -n "language_identification(googlearr);" >> "$PR_DIR"/tmpjsobj2
  361.         det_language=$(js "$PR_DIR"/tmpjsobj2 | tail -c3)       # detected language
  362.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  363.         echo -n "translated_result(googlearr);" >> "$PR_DIR"/tmpjsobj2
  364.         t_result=$(js "$PR_DIR"/tmpjsobj2)              # translated text
  365.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  366.         echo -n "translit(googlearr);" >> "$PR_DIR"/tmpjsobj2
  367.         translit=$(js "$PR_DIR"/tmpjsobj2)              # translit
  368.         cp -f "$PR_DIR"/tmpjsobj "$PR_DIR"/tmpjsobj2
  369.         echo -n "dictionary(googlearr);" >> "$PR_DIR"/tmpjsobj2
  370.         dictionary=$(js "$PR_DIR"/tmpjsobj2)                # dictionary
  371.  
  372.         #Language detection
  373.         [[ $det_language != $source && (( $TLSOURCE || $det_language != $ENGLISH_TARGET_LANG )) ]] && echo -e "$C_GRAY* $det_language ${ln_arr[$det_language]}$C_NORMAL"
  374.  
  375.         if [[ $source != $det_language && ! $fl_raw && ((
  376.                 $(echo $request | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ==  $(echo $t_result | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ||
  377.                 $(echo $t_result | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ==  $(echo $translit | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ||
  378.                 ${request:0:1} == ${t_result:0:1}
  379.                 )) && ! $dictionary ]]; then
  380.             [[ $source != "auto" ]] && echo -e "trying with detected language" # I guess google "auto" is not working then we will do it by yourself mutely
  381.             export TLSOURCE=$det_language #TLTARGET=en #source and target language for second attempt
  382.             if [[ $sound == 1 ]]; then $0 -s "$request" ; else $0 "$request" ;fi #second attempt to translate with detected language  #sound will be quiet...
  383.             exit
  384.         fi
  385.  
  386.         [[ $source == "auto" ]] && { source=$det_language; cachefile="$PR_DIR/cache/$source-$target"_"$request"; } #auto in cachefile fix #not necessary
  387.  
  388.  
  389.  
  390.  
  391.  
  392.         declare trans
  393.         if [[ $r_words_count -le 2 && ${#request} -gt 1 && $source == en && ! $(echo "$difftest" | wc -l) -gt 1 ]];then #special check( expample: advise advice)
  394.         #transcription
  395.             trans=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://lingvo-online.ru/en/Translate/en-"$target"/"$request" | grep -o '"[^"]*/transcription\.gif.Text=[^"]*"' | sed 's/.*=\(.*\)"/\1/'| echo -n -e $(sed 's/+/ /g; s/%/\\x/g')) #getting transcription
  396.             [[ $? != 0 ]] &&  { echo "can't get transcription"; } #bashtrap;
  397.         fi
  398.         #"Dictionary part" for english only. but it can be extended for every required language
  399.         if [[ $r_words_count -eq 1 && ${#request} -gt 1 && $source == en && (( ! $fl_raw || $trans )) ]]; then #dictionary
  400.        
  401.             echo -e "$C_GREEN$t_result$C_NORMAL_RAW" > "$cachefile" #google translated text to cache
  402.             [[ $dictionary ]] && echo -e "$dictionary" >> "$cachefile" #google dictionary to cache
  403.  
  404.             #phrases, forms and a vform
  405.             macmill=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" -m $timeout http://www.macmillandictionary.com/dictionary/british/"$(echo $request | tr '[:upper:]' '[:lower:]' | tr ' ' - )" )
  406.             [[ $? != 0 ]] && { echo "cant get phrases" ; bashtrap; }
  407.             #forms
  408.             forms=$(echo $macmill | grep -o "id=\"wordsformslayer-head\".*End of DIV wordforms" | sed 's/INFLECTION-CONTENT/\n/g' | sed -e 's/.*INFLECTION-ENTRY\">\([^<]*\)<.*I-VARIANT-before\">\([^<]*\).*INFLECTION-ENTRY\">\([^<]*\).*/666\1\2\3,/' -e 's/.*INFLECTION-ENTRY\">\([^<]*\)<.*/666\1,/' | grep 666 | cut -c 4-)
  409.             fwc=$(echo "$forms" | wc -l)
  410.             firstf=$(echo "$forms" | head -n 1) #1
  411.             last1f=$(echo "$forms" | tail -n 2 | head -n 1) #end-1
  412.             lastf=$(echo "$forms" | tail -n 1) #end
  413.             [[ (( fwc -eq 5 && (( ${firstf%?}"ed," != $lastf || ${firstf%?}"ed," != $last1f || $lastf != $last1f )) )) ||
  414.                (( fwc -eq 3 && (( ${firstf%?}"er," != $last1f  || ${firstf%?}"est," != $lastf )) )) ||
  415.                (( fwc -eq 2 && (( ${firstf%?}"s," != $lastf )) )) ||
  416.                (( fwc -eq 4 )) ]] && echo -e "$C_CYAN_RAW"forms:$C_NORMAL ${forms%?} >> "$cachefile"
  417.             #vform
  418.             vform=$(echo "$macmill" | grep -o "GREF-ENTRY-before.*</a>" | sed 's/<\/a>/<\/a>\n/g' | head -n 1 | sed 's/.*<a[^>]*>\([^<]*\).*/\1/')
  419.             vform_desc=$(echo "$macmill" | grep "span class=\"GREF-TYPE\"" | sed 's/.*<span class=\"GREF-TYPE\">\([^<]*\)<.*/\1/')
  420.             [[ $vform ]] && echo -e "$C_BLUE_RAW$vform_desc$C_NORMAL_RAW" "$C_BRIGHT_RAW$vform$C_NORMAL_RAW" >> "$cachefile"
  421.             #phrases
  422.             raw_phras=$(echo "$macmill" | grep -o '<li ID.*End of DIV SENSE--></li>' | sed 's/<.\?span[^>]*>//g')
  423.             #div class="P-HEAD" #| sed 's/End of DIV SENSE--><\/li>/\n/g' \ -e 's/.*\"h2\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\'$C_BRIGHT_RAW'\1'$C_NORMAL'\. \2<FUCKINGSHIT>\t\3/' \ | nl -s ' ' | sed -e 's/<FUCKINGSHIT>/\n/' -e 's/^ *//'
  424. #-e 's/.*\"EXAMPLE\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\1<FUCKINGSHIT>\t\2/' \
  425.             phras=$(echo $raw_phras | sed -e 's/<a[^>]*>//g' -e 's/<\/a>//g' | sed -e 's/div class=\"P-HEAD\"/\n?????/g' | grep "?????" | grep -n . | sed 's/^[0-9]*:/&\n/' | sed 's/End of DIV EXAMPLES/\n/g' | sed '/^[0-9]*:/{h;d;};G; s/^\(.*\)\n\([0-9]*:\)/\2 \1/' | sed \
  426. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*\"h2\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2\3'$C_NORMAL'\. \4/' \
  427. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*\"EXAMPLE\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'\. \3/' \
  428. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*\"h2\">\([^<]*\).*class=\"SENSE-BODY\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2\3'$C_NORMAL'\. \4/' \
  429. -e 's/^\([0-9]*\):.*\"h2\">\([^<]*\).*class=\"SENSE-BODY\">\([^<]*\).*/**\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'\. \3/' \
  430. -e 's/^\([0-9]*\):.*\"EXAMPLE\">\([^<]*\).*/**\1 \2/' | grep -o "^\*\*[0-9].*" | sed 's/^\*\*//' ) # doubleh2+exmaple, h2+exmaple, doubleh2+sentence, h2+sentence, only example
  431.             if [[ $phras ]]; then   #phrases
  432.                 echo -e "$C_GRAY_RED_RAW"PHRASES:$C_NORMAL >> "$cachefile"
  433.                 echo -e "$phras" >> "$cachefile"
  434.                 #echo -e "$C_DARK_BLUE_RAW"http://www.macmillandictionary.com/dictionary/american/"$request"$C_NORMAL >> "$cachefile"
  435.             else            #second attempt examples
  436.                 phrases_2=$(echo "$macmill" | sed -e 's/<a[^>]*>//g' -e 's/<\/a>//g' | sed 's/<.\?span[^>]*>//g' | grep -o 'div class="SENSE".*End of DIV SENSE--' | sed 's/End of DIV SENSE--/\n/g' | grep -n . | sed 's/^.:/&\n/' | sed 's/End of DIV EXAMPLES/\n/g' | sed '/^[0-9]:/{h;d;};G; s/^\(.*\)\n\([0-9]:\)/\2 \1/' | grep "<strong>" | sed 's/^\([0-9]\).*<strong>\([^<]*\)<.*class=\"EXAMPLE\">\([^<]*\)<.*/\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'. \3/' | sed 's/^\([0-9]\).*<strong>\([^<]*\)<\/strong>\([^<]*\).*/\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'. \3/' | sed 's/\. / /')
  437.                 if [[ $phrases_2 ]];then
  438.                     echo -e "$C_GRAY_RED_RAW"EXAMPLES:$C_NORMAL >> "$cachefile"
  439.                     echo -e "$phrases_2" >> "$cachefile"
  440.                 fi
  441.             fi
  442.            
  443.             #transcription
  444.             [[ $trans ]] && echo "[$trans]" >> "$cachefile"
  445.  
  446.             cat "$cachefile" 2>/dev/null #output
  447.             echo -e "$C_DARK_BLUE_RAW"http://oxforddictionaries.com/definition/english/$(echo "$request" | sed "s/'/%27/" )$C_NORMAL # just another good english dictionary
  448.  
  449.             if [[ ${#request} -gt 2 || $r_words_count -le 2 ]] ; then
  450.  
  451.                 #saving words
  452.                 ted_words_file=$(cat "$TR_ENG_WORDS")
  453.                 ted_words_file=$(echo "$ted_words_file" | tail -n 70)
  454.                 if [[ ! $(echo "$ted_words_file" | grep "$request" 2>/dev/null) ]]; then
  455.                     echo -e "$ted_words_file\n""$request" > "$TR_ENG_WORDS"
  456.                     #echo -e "$request \t\t\t\t\t\t\t\t\t\t [$trans]" >> "$TR_ENG_WORDS"
  457.                 fi
  458.                
  459.                 #getting sound from forvo.com
  460.                     [[ ! -e "$cachefile".mp3 ]] &&
  461.                         if [[ $sound == 1 || $SOUND_DOWNLOAD_AWS == 1 ]];then
  462.                             eng_sound_download &      # in background
  463.                         fi
  464.                        
  465.             fi
  466.         else #not english dictionary
  467.             echo -e "$C_GREEN$t_result$C_NORMAL_RAW"                    #google translated text output
  468.             [[ $source != en && $r_words_count -le $TRANSLIT_WORDS_MAX && $(echo $t_result | sed 's/ //g' | tr '[:upper:]' '[:lower:]') !=  $(echo $translit | sed 's/ //g' | tr '[:upper:]' '[:lower:]') ]] && echo -e "$translit"                 #google translit    output
  469.             [[ $dictionary ]] && echo -e "$dictionary"                  #google dictionary  output
  470.  
  471.             if [[ $source == en && ! $fl_raw && $r_words_count -le 4 ]]; then
  472.                 #phrases for 2 words
  473.                 if [[ $r_words_count -le 2 ]];then
  474.  
  475.                     #getting sound from forvo.com
  476.                     [[ ! -e "$cachefile".mp3 ]] &&
  477.                         if [[ $sound == 1 || $SOUND_DOWNLOAD_AWS == 1 ]];then
  478.                             eng_sound_download &      # in background
  479.                         fi
  480.  
  481.                     macmill=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://www.macmillandictionary.com/dictionary/british/"$(echo $request | tr ' ' - )" )
  482.                     [[ $? != 0 ]] && { echo "cant getmacmillandictionary.com phrases for string" ; bashtrap; }
  483.                     macmill=$(echo "$macmill" | grep -o 'div class="SENSE".*End of DIV SENSE--' | sed -e 's/<a[^>]*>//g' -e 's/<\/a>//g' | sed 's/End of DIV SENSE--/\n/g' | grep -n . | sed 's/^[0-9]*:/&\n/' | sed 's/End of DIV EXAMPLES/\n/g' | sed '/^[0-9]*:/{h;d;};G; s/^\(.*\)\n\([0-9]*:\)/\2 \1/' | sed \
  484. -e's/^\([0-9]*\):.*p id=\"EXAMPLE\" class=\"EXAMPLE\">\([^<]*\)<.*/*\1 \2/' \
  485. -e 's/^\([0-9]\).*span class=\"BASE\">\([^<]*\).*context=\"DEFINITION-before\"> <\/span>\([^<]*\).*/*\1 \'$C_BRIGHT_RAW'\2'$C_NORMAL'\. \3/' | grep -o "^\*[0-9].*" | sed 's/^*//')
  486.                 fi
  487.                 examples=$macmill
  488.                 if [[ $examples ]];then
  489.                     echo -e "$C_GRAY_RED_RAW"EXAMPLES:$C_NORMAL
  490.                     echo -e "$examples"
  491.                 fi
  492.  
  493.                 #search for ideom for 2-4 words
  494.                 if [[ ! $phras2 ]]; then
  495.                     list=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" http://www.thefreedictionary.com/"$(echo $request | tr ' ' + )")
  496.                     [[ $? != 0 ]] && { echo "cant get thefreedictionary.com phrases for string" ; bashtrap; }
  497.                     list=$(echo "$list" | grep "[fF]ound in:")
  498.                     if [[ $list ]]; then
  499.                         ideomlink=$(echo "$list" | grep -o 'http://idioms[^"]*')
  500.                         if [[ $ideomlink ]]; then
  501.                             raw_ideoms=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" "$ideomlink" | grep -o 'div class="ds-single".*</div><div')
  502.  
  503.                             ideom=$(echo "$raw_ideoms" | sed 's/<i>//' | sed 's/<\/i>//' | sed 's/.*ds-single\">\([^<]*\)<.*/\1/')
  504.                             illustration=$(echo "$raw_ideoms" | sed 's/.*class=illustration>\([^<]*\)<.*/\1/')
  505.                             if [[ $ideom ]]; then
  506.                                 echo -e "$C_GRAY_RED_RAW"Ideom:$C_NORMAL
  507.                                 echo $ideom
  508.                                 [[ $illustration ]] && echo $illustration
  509.                             fi
  510.                         else
  511.                             ency_link=$(echo "$list" | grep -o 'http://encyclopedia2[^"]*')
  512.                             if [[ $ency_link ]]; then
  513.                                 raw_ency=$(curl -s --user-agent "$useragent" $httpsproxy -x "$httpproxy" "$ency_link")                             
  514.                                 ency=$(echo "$raw_ency" | grep '<div class=hw>' | grep -o 'div>.*<br /' | sed -e 's/"//g' | sed -e 's/<b>/'$(tput bold)'/g' -e 's/<\/b>/'$C_NORMAL'/g' | sed 's/div>\([^<]*\).*/\1/')
  515.                                 contr_ency=$(echo "$raw_ency" | grep "Contrast with" | sed 's/.*\">\([^<]*\)<\/a.*/\1/')
  516.                                 if [[ $ency ]]; then
  517.                                     echo -e "$C_GRAY_RED_RAW"Ideom:$C_NORMAL
  518.                                     echo "$ency"
  519.                                     [[ $contr_ency ]] && echo -e  Contrast with: "$C_BRIGHT_RAW"$contr_ency"$C_NORMAL"
  520.                                 fi 
  521.                             fi
  522.                         fi
  523.                     fi
  524.                 fi
  525.             fi
  526.  
  527.         fi #end of english dictionary
  528.         [[ $fl_raw ]] && echo -e "$fl" #empty line... cant fix it (:'d      #google fixed text  output
  529.     else #cache output
  530.         cat "$cachefile" #output
  531.         [[ $source == en ]] && echo -e "$C_DARK_BLUE_RAW"http://oxforddictionaries.com/definition/english/"$request"$C_NORMAL
  532.     fi
  533.  
  534.     #sound 
  535.     if [[ $sound == 1 ]]; then
  536.         wait
  537.         if [[ -e "$cachefile".mp3 ]]; then
  538.             if [[ $(stat -c%s "$cachefile".mp3) -ge 11000 ]]; then #if .mp3 is corrrect
  539.                 stat=$(mpg123 "$cachefile".mp3 2>&1)
  540. #               echo "$stat"
  541.                 [[ ! $(echo "$stat" | grep -o 'Comment') ]] && rm "$cachefile".mp3
  542.             else
  543.                 rm "$cachefile".mp3
  544.                 echo ".mp3 file is corrupt. try again."
  545.             fi
  546.         else
  547.             #echo ".mp3 file not found."
  548.             sleep 1 #to be able Ctrl+C to delete cache file
  549.         fi
  550.     fi
  551. fi
  552.  
  553. exit
  554.  
  555.  
  556. #rest languages for interface. not in list of source language
  557. #hl=ak          Akan
  558. #hl=bem         Bemba
  559. #hl=bh          Bihari
  560. #hl=xx-bork     Bork, bork, bork!
  561. #hl=bs          Bosnian
  562. #hl=br          Breton
  563. #hl=km          Cambodian
  564. #hl=chr         Cherokee
  565. #hl=ny          Chichewa
  566. #hl=zh-TW       Chinese (Traditional)
  567. #hl=co          Corsican
  568. #hl=xx-elmer    Elmer Fudd
  569. #hl=ee          Ewe
  570. #hl=fy          Frisian
  571. #hl=gaa         Ga
  572. #hl=gn          Guarani
  573. #hl=xx-hacker   Hacker
  574. #hl=ha          Hausa
  575. #hl=haw         Hawaiian
  576. #hl=ig          Igbo
  577. #hl=ia          Interlingua
  578. #hl=jw          Javanese
  579. #hl=kk          Kazakh
  580. #hl=rw          Kinyarwanda
  581. #hl=rn          Kirundi
  582. #hl=xx-klingon  Klingon
  583. #hl=kg          Kongo
  584. #hl=kri         Krio (Sierra Leone)
  585. #hl=ku          Kurdish
  586. #hl=ckb         Kurdish (Soranî)
  587. #hl=ky          Kyrgyz
  588. #hl=ln          Lingala
  589. #hl=loz         Lozi
  590. #hl=lg          Luganda
  591. #hl=ach         Luo
  592. #hl=mg          Malagasy
  593. #hl=ml          Malayalam
  594. #hl=mi          Maori
  595. #hl=mr          Marathi
  596. #hl=mfe         Mauritian Creole
  597. #hl=mo          Moldavian
  598. #hl=mn          Mongolian
  599. #hl=sr-ME       Montenegrin
  600. #hl=ne          Nepali
  601. #hl=pcm         Nigerian Pidgin
  602. #hl=nso         Northern Sotho
  603. #hl=nn          Norwegian (Nynorsk)
  604. #hl=oc          Occitan
  605. #hl=or          Oriya
  606. #hl=om          Oromo
  607. #hl=ps          Pashto
  608. #hl=xx-pirate   Pirate
  609. #hl=pt-BR       Portuguese (Brazil)
  610. #hl=pt-PT       Portuguese (Portugal)
  611. #hl=pa          Punjabi
  612. #hl=qu          Quechua
  613. #hl=rm          Romansh
  614. #hl=nyn         Runyakitara
  615. #hl=gd          Scots Gaelic
  616. #hl=sh          Serbo-Croatian
  617. #hl=st          Sesotho
  618. #hl=tn          Setswana
  619. #hl=crs         Seychellois Creole
  620. #hl=sn          Shona
  621. #hl=sd          Sindhi
  622. #hl=si          Sinhalese
  623. #hl=so          Somali
  624. #hl=es-419      Spanish (Latin American)
  625. #hl=su          Sundanese
  626. #hl=tg          Tajik
  627. #hl=tt          Tatar
  628. #hl=ti          Tigrinya
  629. #hl=to          Tonga
  630. #hl=lua         Tshiluba
  631. #hl=tum         Tumbuka
  632. #hl=tk          Turkmen
  633. #hl=tw          Twi
  634. #hl=ug          Uighur
  635. #hl=uz          Uzbek
  636. #hl=wo          Wolof
  637. #hl=xh          Xhosa
  638. #hl=yo          Yoruba
  639. #hl=zu          Zulu
  640.  
  641. #comment="arr=googlearr;
  642. #for (var c = 0; c < arr.length; c++){ //testing
  643. #   if(typeof arr[c] !== 'undefined' && arr[c] !== null){ //dictionary output
  644. #       for (var i = 0; i < arr[c].length; i++){
  645. #           if(typeof arr[c][i] !== 'undefined' && arr[c][i] !== null){
  646. #               for (var e = 0; e < arr[c][i].length; e++){
  647. #                   print(c);
  648. #                   print(arr[c][i][e]);
  649. #                   //print(arr[8][0][1]);
  650. #               }
  651. #           }
  652. #       }
  653. #   }
  654. #}
  655. #   frequency experiment    whitespace=' ' //for x.x and xxx format
  656. #                   num2=Math.round(num*10000)/10;
  657. #                   if (num2 < 10 && num2%1 == 0){
  658. #                       whitespace='   '; //for x format
  659. #                   }
  660. #                   if (num2 >= 10){
  661. #                       num2=Math.round(num2/10)*10;
  662. #                       if (num2 < 100){
  663. #                           whitespace='  '; //for xx format
  664. #                       }
  665. #                   }
  666. #                   print(num2+whitespace+arr[1][a][2][b][0]+' '+arr[1][a][2][b][1]);//frequency + word + variant of speach
  667. #"
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×