Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/bash
- #
- # Common set of functions used by modules
- # Copyright (c) 2010-2011 Plowshare team
- #
- # This file is part of Plowshare.
- #
- # Plowshare is free software: you can redistribute it and/or modify
- # it under the terms of the GNU General Public License as published by
- # the Free Software Foundation, either version 3 of the License, or
- # (at your option) any later version.
- #
- # Plowshare is distributed in the hope that it will be useful,
- # but WITHOUT ANY WARRANTY; without even the implied warranty of
- # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- # GNU General Public License for more details.
- #
- # You should have received a copy of the GNU General Public License
- # along with Plowshare. If not, see <http://www.gnu.org/licenses/>.
- set -o pipefail
- # Global error codes
- # 0 means success or link alive
- ERR_FATAL=1 # Unexpected result (upstream site updated, etc)
- ERR_NOMODULE=2 # No module found for processing request
- ERR_NETWORK=3 # Specific network error (socket reset, curl, etc)
- ERR_LOGIN_FAILED=4 # Correct login/password argument is required
- ERR_MAX_WAIT_REACHED=5 # Refer to plowdown wait timeout (see -t/--timeout command line option)
- ERR_MAX_TRIES_REACHED=6 # Refer to plowdown max tries reached (see --max-retries command line option)
- ERR_CAPTCHA=7 # Captcha solving failure
- ERR_SYSTEM=8 # System failure (missing executable, local filesystem, wrong behavior, etc)
- ERR_LINK_TEMP_UNAVAILABLE=10 # Link alive but temporarily unavailable
- # (also refer to plowdown --no-arbitrary-wait command line option)
- ERR_LINK_PASSWORD_REQUIRED=11 # Link alive but requires a password
- ERR_LINK_NEED_PERMISSIONS=12 # Link alive but requires some authentication (premium link)
- # or operation not allowed for anonymous user
- ERR_LINK_DEAD=13 #
- ERR_FATAL_MULTIPLE=100 # 100 + (n) with n = first error code (when multiple arguments)
- # Global variables used (defined in other .sh)
- # - VERBOSE Verbose log level (0=none, 1, 2, 3, 4)
- # - INTERFACE Network interface (used by curl)
- # - LIMIT_RATE Network speed (used by curl)
- # - GLOBAL_COOKIES User provided cookie
- # - LIBDIR Absolute path to plowshare's libdir
- #
- # Global variables defined here:
- # - PS_TIMEOUT Timeout (in seconds) for one URL download
- # - PS_RETRY_LIMIT Number of tries for loops (mainly for captchas)
- # - RECAPTCHA_SERVER Server URL
- #
- # Logs are sent to stderr stream.
- # Policies:
- # - error: modules errors (when return 1), lastest plowdown curl call
- # - notice: core messages (ocr, wait, timeout, retries), lastest plowdown curl call
- # - debug: modules messages, curl (intermediate) calls
- # - report: debug plus curl content (html pages, cookies)
- # log_report for a file
- # $1: filename
- logcat_report() {
- if test -s "$1"; then
- local STRING=$(sed -e 's/^[[:space:]]*//; s/[[:space:]]*$//' \
- -e 's/^/rep:/' "$1")
- test $(verbose_level) -ge 4 && stderr "$STRING"
- fi
- return 0
- }
- # This should not be called within modules
- log_report() {
- test $(verbose_level) -ge 4 && stderr "rep: $@"
- return 0
- }
- log_debug() {
- test $(verbose_level) -ge 3 && stderr "dbg: $@"
- return 0
- }
- # This should not be called within modules
- log_notice() {
- test $(verbose_level) -ge 2 && stderr "$@"
- return 0
- }
- log_error() {
- test $(verbose_level) -ge 1 && stderr "$@"
- return 0
- }
- ## ----------------------------------------------------------------------------
- ##
- ## All helper functions below can be called by modules
- ## (see documentation...)
- ##
- # Wrapper for curl: debug and infinite loop control
- # $1..$n are curl arguments
- curl() {
- local -a OPTIONS=(--insecure --speed-time 600 --connect-timeout 300)
- # Check if caller as specified a User-Agent, if so don't put one
- local exist=0
- for e; do
- if [ "$e" = '-A' -o "$e" = '--user-agent' ]; then
- exist=1
- break
- fi
- done
- if [ "$exist" -eq 0 ]; then
- OPTIONS[5]='--user-agent'
- OPTIONS[6]='Mozilla/5.0 (X11; Linux x86_64; rv:6.0) Gecko/20100101 Firefox/6.0'
- fi
- local DRETVAL=0
- # no verbose unless debug level; don't show progress meter for report level too
- test $(verbose_level) -ne 3 && OPTIONS=("${OPTIONS[@]}" "--silent")
- test -n "$INTERFACE" && OPTIONS=("${OPTIONS[@]}" "--interface" "$INTERFACE")
- test -n "$LIMIT_RATE" && OPTIONS=("${OPTIONS[@]}" "--limit-rate" "$LIMIT_RATE")
- if test -z "$GLOBAL_COOKIES"; then
- set -- $(type -P curl) "${OPTIONS[@]}" "$@"
- else
- set -- $(type -P curl) "-b $GLOBAL_COOKIES" "${OPTIONS[@]}" "$@"
- fi
- if test $(verbose_level) -lt 4; then
- "$@" || DRETVAL=$?
- else
- local TEMPCURL=$(create_tempfile)
- log_report "$@"
- "$@" --show-error 2>&1 | tee "$TEMPCURL" || DRETVAL=$?
- FILESIZE=$(get_filesize "$TEMPCURL")
- log_report "Received $FILESIZE bytes"
- log_report "=== CURL BEGIN ==="
- logcat_report "$TEMPCURL"
- log_report "=== CURL END ==="
- rm -rf "$TEMPCURL"
- fi
- case "$DRETVAL" in
- 0)
- ;;
- # Partial file / HTTP retrieve error / Operation timeout
- 18 | 22 | 28)
- log_error "curl retrieve error"
- return $ERR_NETWORK
- ;;
- # Write error
- 23)
- log_error "write failed, disk full?"
- return $ERR_SYSTEM
- ;;
- *)
- log_error "curl failed ($DRETVAL)"
- return $ERR_NETWORK
- ;;
- esac
- return 0
- }
- # Force debug verbose level (unless -v0/-q specified)
- curl_with_log() {
- local TEMP_VERBOSE=$(verbose_level)
- if [ "$TEMP_VERBOSE" -eq 0 ]; then
- TEMP_VERBOSE=0
- elif [ "$TEMP_VERBOSE" -lt 3 ]; then
- TEMP_VERBOSE=3
- fi
- VERBOSE=$TEMP_VERBOSE curl "$@"
- }
- # Substring replacement (replace all matches)
- #
- # stdin: input string
- # $1: substring to find (this is not a regexp)
- # $2: replacement string (this is not a regexp)
- replace() {
- S="$(cat)"
- # We must escape '\' character
- FROM="${1//\\/\\\\}"
- echo "${S//$FROM/$2}"
- }
- # Delete leading and trailing spaces, tabs, \r, ...
- # stdin: input string (can be multiline)
- # stdout: result string
- strip() {
- sed 's/^[[:space:]]*//; s/[[:space:]]*$//'
- }
- # Return uppercase string : tr '[:lower:]' '[:upper:]'
- # Note: Busybox "tr" command may not have classes support (CONFIG_FEATURE_TR_CLASSES)
- uppercase() {
- tr '[a-z]' '[A-Z]'
- }
- # Return lowercase string : tr '[:upper:]' '[:lower:]'
- lowercase() {
- tr '[A-Z]' '[a-z]'
- }
- # Grep first line of a text
- # stdin: input string (multiline)
- first_line() {
- # equivalent to `sed -e 1q`
- head -n1
- }
- # Grep last line of a text
- # stdin: input string (multiline)
- last_line() {
- # equivalent to `sed -ne '$p'`
- tail -n1
- }
- # Grep nth line of a text
- # stdin: input string (multiline)
- # $1: line number (start at index 1)
- nth_line() {
- # equivalent to `sed -e "${1}q;d"`
- sed -ne "${1}p"
- }
- # Check if a string ($2) matches a regexp ($1)
- # This is case sensitive.
- #
- # $? is zero on success
- match() {
- grep -q "$1" <<< "$2"
- }
- # Check if a string ($2) matches a regexp ($1)
- # This is not case sensitive.
- #
- # $? is zero on success
- matchi() {
- grep -iq "$1" <<< "$2"
- }
- # Get lines that match filter+match regular expressions and extract string from it.
- #
- # stdin: text data
- # $1: POSIX-regexp to filter (get only the first matching line).
- # $2: POSIX-regexp to match (use parenthesis) on the matched line.
- parse_all() {
- local STRING=$(sed -n "/$1/s/^.*$2.*$/\1/p")
- test "$STRING" && echo "$STRING" ||
- { log_error "parse failed: sed -n \"/$1/$2\""; return $ERR_FATAL; }
- }
- # Like parse_all, but get only first match
- parse() {
- parse_all "$@" | head -n1
- }
- # Like parse_all, but get only last match
- parse_last() {
- parse_all "$@" | tail -n1
- }
- # Like parse, but hide output to stderr
- parse_quiet() {
- parse "$@" 2>/dev/null
- }
- # Get lines that first filter regex, then apply match regex on the line after
- #
- # stdin: text data
- # $1: POSIX-regexp to filter (get only the first matching line).
- # $2: POSIX-regexp to match (use parenthesis) on the matched line.
- parse_line_after_all() {
- local STRING=$(sed -n "/$1/{n;s/^.*$2.*$/\1/p}")
- test "$STRING" && echo "$STRING" ||
- { log_error "parse failed: sed -n \"/$1/$2\""; return $ERR_FATAL; }
- }
- # Like parse_line_after_all, but get only first match
- parse_line_after() {
- parse_line_after_all "$@" | head -n1
- }
- # Grep first "Location" (of http header)
- #
- # stdin: result of curl request (with -i/--include, -D/--dump-header or
- # or -I/--head flag)
- grep_http_header_location() {
- sed -n 's/^[Ll]ocation:[[:space:]]\+\([^ ]*\)/\1/p' 2>/dev/null | tr -d "\r"
- }
- grep_http_header_content_location() {
- sed -n 's/^[Cc]ontent-[Ll]ocation:[[:space:]]\+\([^ ]*\)/\1/p' 2>/dev/null | tr -d "\r"
- }
- grep_http_header_content_type() {
- sed -n 's/^[Cc]ontent-[Tt]ype:[[:space:]]\+\([^ ]*\)/\1/p' 2>/dev/null | tr -d "\r"
- }
- # Grep first "Content-Disposition" (of http header)
- #
- # stdin: same as grep_http_header_location() below
- # stdout: attachement filename
- grep_http_header_content_disposition() {
- parse "[Cc]ontent-[Dd]isposition:" 'filename="\(.*\)"' 2>/dev/null
- }
- # Extract a specific form from a HTML content.
- # We assume here that start marker <form> and end marker </form> are one separate lines.
- # HTML comments are just ignored. But it's enough for our needs.
- #
- # $1: (X)HTML data
- # $2: (optionnal) Nth <form> (default is 1)
- # stdout: result
- grep_form_by_order() {
- local DATA="$1"
- local N=${2:-"1"}
- while [ "$N" -gt "1" ]; do
- ((N--))
- DATA=$(echo "$DATA" | sed -ne '/<\/form>/,$p' | sed -e '1s/<\/form>/<_form>/1')
- done
- # FIXME: sed will be greedy, if other forms are remaining they will be returned
- echo "$DATA" | sed -ne '/<form /,/<\/form>/p'
- }
- # Extract a named form from a HTML content.
- # If several forms have the same name, take first one.
- #
- # $1: (X)HTML data
- # $2: "name" attribute of <form> marker
- # stdout: result
- grep_form_by_name() {
- local DATA="$1"
- if [ -n "$2" ]; then
- # FIXME: sed will be greedy, if other forms are remaining they will be returned
- echo "$DATA" | sed -ne "/<[Ff][Oo][Rr][Mm][[:space:]].*name=\"\?$2\"\?/,/<\/[Ff][Oo][Rr][Mm]>/p"
- fi
- }
- # Extract a id-specified form from a HTML content.
- # If several forms have the same id, take first one.
- #
- # $1: (X)HTML data
- # $2: "id" attribute of <form> marker
- # stdout: result
- grep_form_by_id() {
- local DATA="$1"
- if [ -n "$2" ]; then
- # FIXME: sed will be greedy, if other forms are remaining they will be returned
- echo "$DATA" | sed -ne "/<[Ff][Oo][Rr][Mm][[:space:]].*id=\"\?$2\"\?/,/<\/[Ff][Oo][Rr][Mm]>/p"
- fi
- }
- # Split into several lines html markers.
- # Insert a new line after ending marker.
- #
- # stdin: (X)HTML data
- # stdout: result
- break_html_lines() {
- sed 's/\(<\/[^>]*>\)/\1\n/g'
- }
- # Split into several lines html markers.
- # Insert a new line after each (beginning or ending) marker.
- #
- # stdin: (X)HTML data
- # stdout: result
- break_html_lines_alt() {
- sed 's/\(<[^>]*>\)/\1\n/g'
- }
- # Return value of html attribute
- parse_attr() {
- parse "$1" "$2=[\"']\?\([^\"'>]*\)"
- }
- # Like parse_attr, but hide output to stderr
- parse_attr_quiet() {
- parse_attr "$@" 2>/dev/null
- }
- # Return value of html attribute
- parse_all_attr() {
- parse_all "$1" "$2=[\"']\?\([^\"'>]*\)"
- }
- # Retreive "action" attribute (URL) from a <form> marker
- #
- # stdin: (X)HTML data (idealy, call grep_form_by_xxx before)
- # stdout: result
- parse_form_action() {
- parse '<[Ff][Oo][Rr][Mm]' 'action="\([^"]*\)"'
- }
- # Retreive "value" attribute from a named <input> marker
- #
- # $1: name attribute of <input> marker
- # stdin: (X)HTML data
- # stdout: result (can be null string if <input> has no value attribute)
- parse_form_input_by_name() {
- parse_quiet "<input\([[:space:]]*[^ ]*\)*name=[\"']\?$1[\"']\?" "value=[\"']\?\([^'\">]*\)"
- }
- # Retreive "value" attribute from a typed <input> marker
- #
- # $1: type attribute of <input> marker (for example: "submit")
- # stdin: (X)HTML data
- # stdout: result (can be null string if <input> has no value attribute)
- parse_form_input_by_type() {
- parse_quiet "<input\([[:space:]]*[^ ]*\)*type=[\"']\?$1[\"']\?" "value=[\"']\?\([^'\">]*\)"
- }
- # Retreive "id" attributes from typed <input> marker(s)
- parse_all_form_input_by_type_with_id() {
- parse_all "<input\([[:space:]]*[^ ]*\)*type=[\"']\?$1[\"']\?" "id=[\"']\?\([^'\">]*\)" 2>/dev/null
- }
- # Get accessor for cookies
- # Example: LANG=$(parse_cookie "lang" < "$COOKIES")
- parse_cookie() {
- parse_quiet "\t$1\t[^\t]*\$" "\t$1\t\(.*\)"
- }
- # Return base of URL
- # Example: http://www.host.com/a/b/c/d => http://www.host.com
- # Note: Avoid using Bash regexp or `expr` for portability purposes
- #
- # $1: URL
- basename_url() {
- sed -e 's/\(https\?:\/\/[^\/]*\).*/\1/' <<<"$1"
- }
- # Return basename of file path
- # Example: /usr/bin/foo.bar => foo.bar
- #
- # $1: filename
- basename_file() {
- # `basename -- "$1"` may be screwed on some BusyBox versions
- echo "${1##*/}"
- }
- # HTML entities will be translated
- #
- # stdin: data
- # stdout: data (converted)
- html_to_utf8() {
- if check_exec 'recode'; then
- log_report "html_to_utf8: use recode"
- recode html..utf8
- elif check_exec 'perl'; then
- log_report "html_to_utf8: use perl"
- perl -n -mHTML::Entities \
- -e 'BEGIN { eval{binmode(STDOUT,q[:utf8]);}; }; print HTML::Entities::decode_entities($_);'
- else
- log_notice "recode binary not found, pass-through"
- cat
- fi
- }
- # Encode a text to include into an url.
- # - Reserved Characters (18): !*'();:@&=+$,/?#[]
- # - Check for percent (%) & space character
- #
- # - Unreserved Characters: ALPHA / DIGIT / "-" / "." / "_" / "~"
- # - Unsafe characters (RFC2396) should not be percent-encoded anymore: <>{}|\^`
- #
- # stdin: data (example: relative URL)
- # stdout: data (should complain RFC3986)
- uri_encode_strict() {
- sed -e 's/\%/%25/g' -e 's/\x20/%20/g' \
- -e 's/\x21/%21/g' -e 's/\x2A/%2A/g' -e 's/\x27/%27/g' \
- -e 's/\x28/%28/g' -e 's/\x29/%29/g' -e 's/\x3B/%3B/g' \
- -e 's/\x3A/%3A/g' -e 's/\x40/%40/g' -e 's/\x26/%26/g' \
- -e 's/\x3D/%3D/g' -e 's/\x2B/%2B/g' -e 's/\$/%24/g' \
- -e 's/\x2C/%2C/g' -e 's|/|%2F|g' -e 's/\x3F/%3F/g' \
- -e 's/\x23/%23/g' -e 's/\[/%5B/g' -e 's/\]/%5D/g'
- }
- # Encode a complete url.
- # - check for space character and squares brackets
- # - do not check for "reserved characters" (use "uri_encode_strict" for that)
- #
- # Bad encoded URL request can lead to HTTP error 400.
- # curl doesn't do any checks, whereas wget convert provided url.
- #
- # stdin: data (example: absolute URL)
- # stdout: data (nearly complain RFC3986)
- uri_encode() {
- sed -e 's/\x20/%20/g' -e 's/\[/%5B/g' -e 's/\]/%5D/g'
- }
- # Decode a complete url.
- # - check for space character and round/squares brackets
- # - reserved characters: only coma is checked
- #
- # stdin: data (example: absolute URL)
- # stdout: data (nearly complain RFC3986)
- uri_decode() {
- sed -e 's/%20/\x20/g' -e 's/%5B/\[/g' -e 's/%5D/\]/g' \
- -e 's/%2C/,/g' -e 's/%28/(/g' -e 's/%29/)/g' -e 's/%2B/+/g'
- }
- # Retrieves size of file
- #
- # $1: filename
- # stdout: file length (in bytes)
- get_filesize() {
- local SIZE=`stat -c %s "$1" 2>/dev/null`
- if [ -z "$SIZE" ]; then
- log_error "stat binary not found"
- echo "-1"
- else
- echo "$SIZE"
- fi
- }
- # Create a tempfile and return path
- #
- # $1: Suffix
- create_tempfile() {
- SUFFIX=$1
- FILE="${TMPDIR:-/tmp}/$(basename_file $0).$$.$RANDOM$SUFFIX"
- :> "$FILE" || return $ERR_SYSTEM
- echo "$FILE"
- }
- # User password entry
- #
- # stdout: entered password (can be null string)
- # $? is non zero if no password
- prompt_for_password() {
- local PASSWORD
- log_notice "No password specified, enter it now"
- stty -echo
- read -p "Enter password: " PASSWORD
- stty echo
- echo "$PASSWORD"
- test -n "$PASSWORD" || return $ERR_LINK_PASSWORD_REQUIRED
- }
- # Login and return cookie.
- # A non empty cookie file does not means that login is successful.
- #
- # $1: String 'username:password' (password can contain semicolons)
- # $2: Cookie filename (see create_tempfile() modules)
- # $3: Postdata string (ex: 'user=\$USER&password=\$PASSWORD')
- # $4: URL to post
- # $5: Additional curl arguments (optional)
- # stdout: html result (can be null string)
- # $? is zero on success
- post_login() {
- local AUTH=$1
- local COOKIE=$2
- local POSTDATA=$3
- local LOGINURL=$4
- local CURL_ARGS=$5
- if test "$GLOBAL_COOKIES"; then
- REGEXP=$(echo "$LOGINURL" | grep -o "://[^/]*" | grep -o "[^.]*\.[^.]*$")
- if grep -q "^\.\?$REGEXP" "$GLOBAL_COOKIES" 2>/dev/null; then
- log_debug "cookies for site ($REGEXP) found in cookies file, login skipped"
- return
- fi
- log_debug "cookies not found for site ($REGEXP), continue login process"
- fi
- # Seem faster than
- # IFS=":" read USER PASSWORD <<< "$AUTH"
- USER=$(echo "${AUTH%%:*}" | uri_encode_strict)
- PASSWORD=$(echo "${AUTH#*:}" | uri_encode_strict)
- if [ -z "$PASSWORD" -o "$AUTH" == "$PASSWORD" ]; then
- PASSWORD=$(prompt_for_password) || true
- fi
- log_notice "Starting login process: $USER/$(sed 's/./*/g' <<< "$PASSWORD")"
- DATA=$(eval echo $(echo "$POSTDATA" | sed "s/&/\\\\&/g"))
- # Yes, no quote around $CURL_ARGS
- local RESULT=$(curl --cookie-jar "$COOKIE" --data "$DATA" $CURL_ARGS "$LOGINURL")
- # For now "-z" test is kept.
- # There is no known case of a null $RESULT on successful login.
- if [ -z "$RESULT" -o ! -s "${GLOBAL_COOKIES:-$COOKIE}" ]; then
- log_error "login request failed"
- return $ERR_LOGIN_FAILED
- fi
- log_report "=== COOKIE BEGIN ==="
- logcat_report "$COOKIE"
- log_report "=== COOKIE END ==="
- echo "$RESULT"
- return 0
- }
- # Execute javascript code
- #
- # stdin: js script
- # stdout: script results
- # $?: boolean
- javascript() {
- local JS_PRG TEMPSCRIPT
- JS_PRG=$(detect_javascript) || return
- TEMPSCRIPT=$(create_tempfile) || return
- cat > $TEMPSCRIPT
- log_report "interpreter:$JS_PRG"
- log_report "=== JAVASCRIPT BEGIN ==="
- logcat_report "$TEMPSCRIPT"
- log_report "=== JAVASCRIPT END ==="
- $JS_PRG "$TEMPSCRIPT"
- rm -rf "$TEMPSCRIPT"
- return 0
- }
- # Dectect if a JavaScript interpreter is installed
- #
- # stdout: path of executable
- # $?: boolean (0 means found)
- detect_javascript() {
- if ! check_exec 'js'; then
- log_notice "Javascript interpreter not found"
- return $ERR_SYSTEM
- fi
- type -P 'js'
- }
- # Dectect if a Perl interpreter is installed
- #
- # stdout: path of executable
- # $?: boolean (0 means found)
- detect_perl() {
- if ! check_exec 'perl'; then
- log_notice "Perl interpreter not found"
- return $ERR_SYSTEM
- fi
- type -P 'perl'
- }
- # Wait some time
- # Related to --timeout plowdown command line option
- #
- # $1: Sleep duration
- # $2: Unit (seconds | minutes)
- wait() {
- local VALUE=$1
- local UNIT=$2
- if test "$VALUE" = '0'; then
- log_debug "wait called with null duration"
- return
- fi
- if [ "$UNIT" = "minutes" ]; then
- UNIT_SECS=60
- UNIT_STR=minutes
- else
- UNIT_SECS=1
- UNIT_STR=seconds
- fi
- local TOTAL_SECS=$((VALUE * UNIT_SECS))
- timeout_update $TOTAL_SECS || return
- local REMAINING=$TOTAL_SECS
- local MSG="Waiting $VALUE $UNIT_STR..."
- local CLEAR=" \b\b\b\b\b"
- if test -t 2; then
- while [ "$REMAINING" -gt 0 ]; do
- log_notice -ne "\r$MSG $(splitseconds $REMAINING) left${CLEAR}"
- sleep 1
- (( REMAINING-- ))
- done
- log_notice -e "\r$MSG done${CLEAR}"
- else
- log_notice "$MSG"
- sleep $TOTAL_SECS
- fi
- }
- # Related to --max-retries plowdown command line option
- retry_limit_not_reached() {
- test -z "$PS_RETRY_LIMIT" && return
- log_notice "Tries left: $PS_RETRY_LIMIT"
- (( PS_RETRY_LIMIT-- ))
- test "$PS_RETRY_LIMIT" -ge 0 || return $ERR_MAX_TRIES_REACHED
- }
- # Related to --no-arbitrary-wait plowdown command line option
- no_arbitrary_wait() {
- if test "$NOARBITRARYWAIT"; then
- log_debug "File temporarily unavailable"
- return $ERR_LINK_TEMP_UNAVAILABLE
- fi
- log_debug "Arbitrary wait"
- return 0
- }
- # OCR of an image.
- #
- # $1: optional varfile
- # stdin: image (binary)
- # stdout: result OCRed text
- ocr() {
- local OPT_CONFIGFILE="$LIBDIR/tesseract/plowshare_nobatch"
- local OPT_VARFILE="$LIBDIR/tesseract/$1"
- test -f "$OPT_VARFILE" || OPT_VARFILE=''
- # Tesseract somewhat "peculiar" arguments requirement makes impossible
- # to use pipes or process substitution. Create temporal files
- # instead (*sigh*).
- TIFF=$(create_tempfile ".tif")
- TEXT=$(create_tempfile ".txt")
- convert - tif:- > $TIFF
- LOG=$(tesseract $TIFF ${TEXT/%.txt} $OPT_CONFIGFILE $OPT_VARFILE 2>&1)
- if [ $? -ne 0 ]; then
- rm -f $TIFF $TEXT
- log_error "$LOG"
- return $ERR_SYSTEM
- fi
- cat $TEXT
- rm -f $TIFF $TEXT
- }
- # $1: local image filename (with full path). No specific image format expected.
- # $2 (optional): view method
- # stdout: captcha answer (or nothing depending $2)
- #
- # Note: reCAPTCHA image are 300x57.
- captcha_process() {
- local FILENAME="$1"
- local METHOD_VIEW=
- local METHOD_SOLVE=
- local TEXT1='Leave this field blank and hit enter to get another captcha image'
- local TEXT2='Enter captcha response (drop punctuation marks, case insensitive): '
- if [ -z "$METHOD_VIEW" ]; then
- # X11 server installed ?
- if [ -n "$DISPLAY" ]; then
- if check_exec 'display'; then
- METHOD_VIEW=Xdisplay
- else
- log_notice "no X11 image viewer found, to display captcha image"
- fi
- fi
- if [ -z "$METHOD_VIEW" ]; then
- log_debug "no X server available, try ascii display"
- # libcaca
- if check_exec img2txt; then
- METHOD_VIEW=img2txt
- # terminal image view (perl script using Image::Magick)
- elif check_exec tiv; then
- METHOD_VIEW=tiv
- # libaa
- elif check_exec aview; then
- METHOD_VIEW=aview
- else
- log_notice "no ascii viewer found to display captcha image"
- METHOD_VIEW=none
- fi
- fi
- fi
- # Try to maximize the image size on terminal
- local MAX_OUTPUT_WIDTH MAX_OUTPUT_HEIGHT
- if [ "${METHOD_VIEW:0:1}" != "X" ]; then
- if check_exec tput; then
- MAX_OUTPUT_WIDTH=`tput cols`
- MAX_OUTPUT_HEIGHT=`tput lines`
- if check_exec identify; then
- local DIMENSION=$(identify -quiet "$FILENAME" | cut -d' ' -f3)
- local W=${DIMENSION%x*}
- local H=${DIMENSION#*x}
- [ "$W" -lt "$MAX_OUTPUT_WIDTH" ] && MAX_OUTPUT_WIDTH=$W
- [ "$H" -lt "$MAX_OUTPUT_HEIGHT" ] && MAX_OUTPUT_HEIGHT=$H
- fi
- else
- MAX_OUTPUT_WIDTH=150
- MAX_OUTPUT_HEIGHT=57
- fi
- fi
- local PRGPID=
- # How to display image
- case "$METHOD_VIEW" in
- none)
- log_debug "image: $FILENAME"
- ;;
- aview)
- local IMG_PNM=$(create_tempfile)
- convert "$FILENAME" -negate -depth 8 pnm:$IMG_PNM
- aview -width $MAX_OUTPUT_WIDTH -height $MAX_OUTPUT_HEIGHT \
- -kbddriver stdin -driver stdout "$IMG_PNM" 2>/dev/null <<< "q" | \
- sed -e '1d;/\x0C/,/\x0C/d' | grep -v "^[[:space:]]*$" 1>&2
- rm -f "$IMG_PNM"
- ;;
- tiv)
- tiv -a -w $MAX_OUTPUT_WIDTH -h $MAX_OUTPUT_HEIGHT "$FILENAME" 1>&2
- ;;
- img2txt)
- img2txt -W $MAX_OUTPUT_WIDTH -H $MAX_OUTPUT_HEIGHT "$FILENAME" 1>&2
- ;;
- Xdisplay)
- display "$FILENAME" &
- PRGPID=$!
- ;;
- *)
- log_error "unknown method: $METHOD_VIEW"
- ;;
- esac
- [ -z "$METHOD_SOLVE" ] && METHOD_SOLVE=prompt
- # How to solve captcha
- case "$METHOD_SOLVE" in
- none)
- ;;
- prompt)
- log_notice $TEXT1
- read -p "$TEXT2" RESPONSE
- [ -n "$PRGPID" ] && disown $(kill -9 $PRGPID) 2>&1 1>/dev/null
- echo "$RESPONSE"
- ;;
- *)
- log_error "unknown method: $METHOD_SOLVE"
- ;;
- esac
- }
- ##
- ## reCAPTCHA functions (can be called from modules)
- ## Main engine: http://api.recaptcha.net/js/recaptcha.js
- ##
- RECAPTCHA_SERVER="http://www.google.com/recaptcha/api/"
- # $1: reCAPTCHA site public key
- # stdout: image path
- recaptcha_load_image() {
- local URL="${RECAPTCHA_SERVER}challenge?k=${1}&ajax=1"
- log_debug "reCaptcha URL: $URL"
- local VARS=$(curl -L "$URL")
- if [ -n "$VARS" ]; then
- local server=$(echo "$VARS" | parse_quiet 'server' "server[[:space:]]\?:[[:space:]]\?'\([^']*\)'")
- local challenge=$(echo "$VARS" | parse_quiet 'challenge' "challenge[[:space:]]\?:[[:space:]]\?'\([^']*\)'")
- log_debug "reCaptcha server: $server"
- log_debug "reCaptcha challenge: $challenge"
- # Image dimension: 300x57
- local FILENAME="${TMPDIR:-/tmp}/recaptcha.${challenge}.jpg"
- local CAPTCHA_URL="${server}image?c=${challenge}"
- log_debug "reCaptcha image URL: $CAPTCHA_URL"
- curl "$CAPTCHA_URL" -o "$FILENAME"
- log_debug "reCaptcha image: $FILENAME"
- echo "$FILENAME"
- fi
- }
- # $1: reCAPTCHA image filename
- # stdout: challenge (string)
- recaptcha_get_challenge_from_image() {
- basename_file "$1" | cut -d. -f2
- }
- # $1: reCAPTCHA site public key
- # $2: reCAPTCHA image filename
- # stdout: new image path
- recaptcha_reload_image() {
- FILENAME="$2"
- if [ -n "$FILENAME" ]; then
- local challenge=$(recaptcha_get_challenge_from_image "$FILENAME")
- local server="$RECAPTCHA_SERVER"
- local STATUS=$(curl "${server}reload?k=$1&c=${challenge}&reason=r&type=image&lang=en")
- local challenge=$(echo "$STATUS" | parse_quiet 'finish_reload' "('\([^']*\)")
- local FILENAME="${TMPDIR:-/tmp}/recaptcha.${challenge}.jpg"
- local CAPTCHA_URL="${server}image?c=${challenge}"
- log_debug "reCaptcha image URL: $CAPTCHA_URL"
- curl "$CAPTCHA_URL" -o "$FILENAME"
- log_debug "reCaptcha new image: $FILENAME"
- echo "$FILENAME"
- fi
- }
- ## ----------------------------------------------------------------------------
- ##
- ## Miscellaneous functions that can be called from core:
- ## download.sh, upload.sh, delete.sh, list.sh
- ##
- # Remove all temporal files created by the script
- # (with create_tempfile)
- remove_tempfiles() {
- rm -f "${TMPDIR:-/tmp}/$(basename_file $0).$$.*"
- }
- # Exit callback (task: clean temporal files)
- set_exit_trap() {
- trap "remove_tempfiles" EXIT
- }
- # Check existance of executable in path
- # Better than "which" (external) executable
- #
- # $1: Executable to check
- # $?: zero means not found
- check_exec() {
- type -P $1 >/dev/null || return 1 && return 0
- }
- # Related to --timeout plowdown command line option
- timeout_init() {
- PS_TIMEOUT=$1
- }
- # Related to --max-retries plowdown command line option
- retry_limit_init() {
- PS_RETRY_LIMIT=$1
- }
- # Show help info for options
- #
- # $1: options
- # $2: indent string
- print_options() {
- local OPTIONS=$1
- while read OPTION; do
- test "$OPTION" || continue
- IFS="," read VAR SHORT LONG VALUE HELP <<< "$OPTION"
- STRING=$2
- test "$SHORT" && {
- STRING="$STRING-${SHORT%:}"
- test "$VALUE" && STRING="$STRING $VALUE"
- }
- test "$LONG" -a "$SHORT" && STRING="$STRING, "
- test "$LONG" && {
- STRING="$STRING--${LONG%:}"
- test "$VALUE" && STRING="$STRING=$VALUE"
- }
- echo "$STRING: $HELP"
- done <<< "$OPTIONS"
- }
- # Show usage info for modules
- #
- # $1: module name list (one per line)
- # $2: option family name (string, example:UPLOAD)
- print_module_options() {
- while read MODULE; do
- OPTIONS=$(get_module_options "$MODULE" "$2")
- if test "$OPTIONS"; then
- echo
- echo "Options for module <$MODULE>:"
- echo
- print_options "$OPTIONS" ' '
- fi
- done <<< "$1"
- }
- # Get all modules options with specified family name.
- # Note: All lines are prefix with "!" character.
- #
- # $1: module name list (one per line)
- # $2: option family name (string, example:UPLOAD)
- get_all_modules_options() {
- while read MODULE; do
- get_module_options "$MODULE" "$2" | while read OPTION; do
- if test "$OPTION"; then echo "!$OPTION"; fi
- done
- done <<< "$1"
- }
- # Get module name from URL link
- #
- # $1: url
- # $2: module name list (one per line)
- get_module() {
- while read MODULE; do
- local M=$(uppercase <<< "$MODULE")
- local VAR="MODULE_${M}_REGEXP_URL"
- if match "${!VAR}" "$1"; then
- echo $MODULE
- break;
- fi
- done <<< "$2"
- return 0
- }
- # Straighforward options and arguments processing using getopt style
- # $1: program name (used for error message printing)
- # $2: command-line arguments list
- #
- # Example:
- # $ set -- -a user:password -q arg1 arg2
- # $ eval "$(process_options module "
- # AUTH,a:,auth:,USER:PASSWORD,Help for auth
- # QUIET,q,quiet,,Help for quiet" "$@")"
- # $ echo "$AUTH / $QUIET / $1 / $2"
- # user:password / 1 / arg1 / arg2
- process_options() {
- local NAME=$1
- local OPTIONS=$2
- shift 2
- # Strip spaces in options
- OPTIONS=$(echo "$OPTIONS" | strip | drop_empty_lines)
- # Function is called from a module which has no option
- test -z "$OPTIONS" && return 0
- while read VAR; do
- if test "${VAR:0:1}" = "!"; then
- VAR=${VAR:1}
- fi
- # faster than `cut -d',' -f1`
- unset "${VAR%%,*}"
- done <<< "$OPTIONS"
- local SHORT_OPTS=$(echo "$OPTIONS" | cut -d',' -f2)
- local LONG_OPTS=$(echo "$OPTIONS" | cut -d',' -f3)
- local ARGUMENTS="$(getopt -o "$SHORT_OPTS" --long "$LONG_OPTS" -n "$NAME" -- "$@")"
- # To correctly process whitespace and quotes.
- eval set -- "$ARGUMENTS"
- local -a UNUSED_OPTIONS=()
- while true; do
- test "$1" = "--" && { shift; break; }
- while read OPTION; do
- IFS="," read VAR SHORT LONG VALUE HELP <<< "$OPTION"
- UNUSED=0
- if test "${VAR:0:1}" = "!"; then
- UNUSED=1
- VAR=${VAR:1}
- fi
- if test "$1" = "-${SHORT%:}" -o "$1" = "--${LONG%:}"; then
- if test "${SHORT:${#SHORT}-1:1}" = ":" -o \
- "${LONG:${#LONG}-1:1}" = ":"; then
- test -z "$VALUE" && \
- stderr "process_options ($VAR): VALUE should not be empty!"
- if test "$UNUSED" = 0; then
- echo "$VAR=$(quote "$2")"
- else
- if test "${1:0:2}" = "--"; then
- UNUSED_OPTIONS=("${UNUSED_OPTIONS[@]}" "$1=$2")
- else
- UNUSED_OPTIONS=("${UNUSED_OPTIONS[@]}" "$1" "$2")
- fi
- fi
- shift
- else
- if test "$UNUSED" = 0; then
- echo "$VAR=1"
- else
- UNUSED_OPTIONS=("${UNUSED_OPTIONS[@]}" "$1")
- fi
- fi
- break
- fi
- done <<< "$OPTIONS"
- shift
- done
- echo "$(declare -p UNUSED_OPTIONS)"
- echo "set -- $(quote "$@")"
- }
- # Get module list according to capability
- # Note1: use global variable LIBDIR
- # Note2: VERBOSE (log_debug) not initialised yet
- #
- # $1: keyword to grep (must not contain '|' char)
- # stdout: return module list (one name per line)
- grep_list_modules() {
- local CONFIG="$LIBDIR/modules/config"
- if [ ! -f "$CONFIG" ]; then
- stderr "can't find config file"
- return $ERR_SYSTEM
- fi
- sed -ne "/^[^#].*|[[:space:]]*$1[[:space:]]*|/p" $CONFIG | \
- cut -d'|' -f1 | strip
- }
- # $1: section name in ini-style file ("General" will be considered too)
- # $2: command-line arguments list
- # Note: VERBOSE (log_debug) not initialised yet
- process_configfile_options() {
- local CONFIG OPTIONS SECTION LINE NAME VALUE OPTION
- CONFIG="$HOME/.config/plowshare/plowshare.conf"
- test ! -f "$CONFIG" && CONFIG="/etc/plowshare.conf"
- test -f "$CONFIG" || return 0
- # Strip spaces in options
- OPTIONS=$(echo "$2" | strip | drop_empty_lines)
- SECTION=$(sed -ne "/\[$1\]/,/^\[/p" -ne "/\[General\]/,/^\[/p" "$CONFIG" | \
- sed -e '/^\(#\|\[\|[[:space:]]*$\)/d')
- if [ -n "$SECTION" -a -n "$OPTIONS" ]; then
- while read LINE; do
- NAME=$(echo "${LINE%%=*}" | strip)
- VALUE=$(echo "${LINE#*=}" | strip)
- # Look for optional double quote (protect leading/trailing spaces)
- if [ "${VALUE:0:1}" = '"' -a "${VALUE:(-1):1}" = '"' ]; then
- VALUE="${VALUE%?}"
- VALUE="${VALUE:1}"
- fi
- # Look for 'long_name' in options list
- OPTION=$(echo "$OPTIONS" | grep ",${NAME}:\?," | sed '1q') || true
- if [ -n "$OPTION" ]; then
- local VAR="${OPTION%%,*}"
- eval "$VAR=$(quote "$VALUE")"
- fi
- done <<< "$SECTION"
- fi
- }
- # $1: section name in ini-style file ("General" will be considered too)
- # $2: module name
- # $3: option family name (string, example:DOWNLOAD)
- process_configfile_module_options() {
- local CONFIG OPTIONS SECTION OPTION LINE VALUE
- CONFIG="$HOME/.config/plowshare/plowshare.conf"
- test ! -f "$CONFIG" && CONFIG="/etc/plowshare.conf"
- test -f "$CONFIG" || return 0
- log_report "use $CONFIG"
- # Strip spaces in options
- OPTIONS=$(get_module_options "$2" "$3" | strip | drop_empty_lines)
- SECTION=$(sed -ne "/\[$1\]/,/^\[/p" -ne "/\[General\]/,/^\[/p" "$CONFIG" | \
- sed -e '/^\(#\|\[\|[[:space:]]*$\)/d')
- if [ -n "$SECTION" -a -n "$OPTIONS" ]; then
- local M=$(echo "$2" | lowercase)
- # For example:
- # AUTH,a:,auth:,USER:PASSWORD,Free or Premium account"
- while read OPTION; do
- IFS="," read VAR SHORT LONG VALUE_HELP <<< "$OPTION"
- SHORT=$(sed -e 's/:$//' <<< "$SHORT")
- LONG=$(sed -e 's/:$//' <<< "$LONG")
- # Look for 'module/option_name' (short or long) in section list
- LINE=$(echo "$SECTION" | grep "^$M/\($SHORT\|$LONG\)[[:space:]]*=" | sed -n '$p') || true
- if [ -n "$LINE" ]; then
- VALUE=$(echo "${LINE#*=}" | strip)
- # Look for optional double quote (protect leading/trailing spaces)
- if [ "${VALUE:0:1}" = '"' -a "${VALUE:(-1):1}" = '"' ]; then
- VALUE="${VALUE%?}"
- VALUE="${VALUE:1}"
- fi
- eval "$VAR=$(quote "$VALUE")"
- log_debug "$M: take --$LONG option from configuration file"
- fi
- done <<< "$OPTIONS"
- fi
- }
- # Get system information
- log_report_info() {
- if test $(verbose_level) -ge 4; then
- log_report '=== SYSTEM INFO BEGIN ==='
- log_report "[mach] `uname -a`"
- log_report "[bash] `echo $BASH_VERSION`"
- if check_exec 'curl'; then
- log_report "[curl] `$(type -P curl) --version | sed 1q`"
- else
- log_report '[curl] not found!'
- fi
- log_report '=== SYSTEM INFO END ==='
- fi
- }
- ## ----------------------------------------------------------------------------
- ##
- ## Private ('static') functions
- ## Can be called from this script only.
- ##
- verbose_level() {
- echo ${VERBOSE:-0}
- }
- stderr() {
- echo "$@" >&2;
- }
- quote() {
- for ARG in "$@"; do
- echo -n "$(declare -p ARG | sed "s/^declare -- ARG=//") "
- done | sed "s/ $//"
- }
- # Delete blank lines
- # stdin: input (multiline) string
- # stdout: result string
- drop_empty_lines() {
- sed '/^[ ]*$/d'
- }
- # Look for a configuration module variable
- # Example: MODULE_ZSHARE_DOWNLOAD_OPTIONS (result can be multiline)
- # $1: module name
- # $2: option family name (string, example:UPLOAD)
- # stdout: options list (one per line)
- get_module_options() {
- local MODULE=$(uppercase <<< "$1")
- local VAR="MODULE_${MODULE}_${2}_OPTIONS"
- echo "${!VAR}"
- }
- # Example: 12345 => "3h25m45s"
- # $1: duration (integer)
- splitseconds() {
- local DIV_H=$(( $1 / 3600 ))
- local DIV_M=$(( ($1 % 3600) / 60 ))
- local DIV_S=$(( $1 % 60 ))
- [ "$DIV_H" -eq 0 ] || echo -n "${DIV_H}h"
- [ "$DIV_M" -eq 0 ] || echo -n "${DIV_M}m"
- [ "$DIV_S" -eq 0 ] && echo || echo "${DIV_S}s"
- }
- # called by wait
- timeout_update() {
- local WAIT=$1
- test -z "$PS_TIMEOUT" && return
- log_notice "Time left to timeout: $PS_TIMEOUT secs"
- if [[ "$PS_TIMEOUT" -lt "$WAIT" ]]; then
- log_debug "timeout reached (asked $WAIT secs to wait, but remaining time is $PS_TIMEOUT)"
- return $ERR_MAX_WAIT_REACHED
- fi
- (( PS_TIMEOUT -= WAIT ))
- }
- # Check if a string ($2) matches a regexp ($1)
- # and return the content inside the first parentheses.
- # This is case sensitive.
- match_capture() {
- if [[ $2 =~ $1 ]]; then
- echo ${BASH_REMATCH[1]}
- else
- echo $2
- fi
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement