mansz81

backlinkchecker.sh

Mar 8th, 2022 (edited)
243
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 9.97 KB | None | 0 0
  1. #!/bin/bash
  2. # Script is used to check if a specified link exists in a list of web pages.
  3. # This is useful for SEO experts when you need to validate that your backlinks
  4. # still exist on the web pages where you set them or bought them.
  5. # Author: Ramil Valitov [email protected]
  6. # First release: 13.08.2018
  7. # Git: https://github.com/rvalitov/backlink-checker
  8. # We use some code from:
  9. #https://natelandau.com/bash-scripting-utilities/
  10. #https://github.com/tlatsas/bash-spinner
  11.  
  12. #OPTIONS
  13.  
  14. #Maximum timeout in seconds
  15. TOTAL_TIMEOUT="5"
  16.  
  17. #Number of retries if connection fails
  18. WEB_RETRIES="3"
  19.  
  20. #SCRIPT VARS
  21. E_WGET=$(type -P wget)
  22. E_GREP=$(type -P grep)
  23. SUCCESS_COUNT=0
  24. FAIL_COUNT=0
  25. LOG_COUNT=0
  26. VERBOSE_LOG=0
  27. GREP_MODE="-F"
  28. APPEND_LOG=0
  29.  
  30. #HELPER FUNCTIONS
  31. #Library with UI functions
  32. # We use some code from:
  33. #https://natelandau.com/bash-scripting-utilities/
  34. #https://github.com/tlatsas/bash-spinner
  35.  
  36. #
  37. #Set Colors
  38. #
  39.  
  40. bold=$(tput bold)
  41. underline=$(tput sgr 0 1)
  42. reset=$(tput sgr0)
  43.  
  44. purple=$(tput setaf 171)
  45. red=$(tput setaf 1)
  46. green=$(tput setaf 76)
  47. tan=$(tput setaf 3)
  48. blue=$(tput setaf 38)
  49.  
  50. #
  51. # Headers and  Logging
  52. #
  53.  
  54. e_header() {
  55.   printf "\n${bold}${purple}==========  %s  ==========${reset}\n" "$@"
  56. }
  57. e_arrow() {
  58.   printf "➜ $@\n"
  59. }
  60. e_success() {
  61.   printf "${green}✔ %s${reset}\n" "$@"
  62. }
  63. e_error() {
  64.   printf "${red}✖ %s${reset}\n" "$@"
  65. }
  66. e_warning() {
  67.   printf "${tan}➜ %s${reset}\n" "$@"
  68. }
  69. e_underline() {
  70.   printf "${underline}${bold}%s${reset}\n" "$@"
  71. }
  72. e_bold() {
  73.   printf "${bold}%s${reset}\n" "$@"
  74. }
  75. e_note() {
  76.   printf "${underline}${bold}${blue}Note:${reset}  ${blue}%s${reset}\n" "$@"
  77. }
  78.  
  79. #####################
  80. # Example:
  81. #seek_confirmation "Do you want to print a success message?"
  82. #if is_confirmed; then
  83. #  e_success "Here is a success message"
  84. #else
  85. #  e_error "You did not ask for a success message"
  86. #fi
  87.  
  88. seek_confirmation() {
  89.   printf "\n${bold}$@${reset}"
  90.   read -r -p " (y/n) " -n 1
  91.   printf "\n"
  92. }
  93.  
  94. seek_confirmation_yes() {
  95.   printf "\n${bold}$@${reset}"
  96.   read -r -p " (y/n)[Y] " -n 1
  97.   printf "\n"
  98.   if [[ -z "$REPLY" ]]; then
  99.     REPLY="y"
  100.   fi
  101. }
  102.  
  103. # Test whether the result of an 'ask' is a confirmation
  104. is_confirmed() {
  105.   if [[ "$REPLY" =~ ^[Yy]$ ]]; then
  106.     return 0
  107.   fi
  108.   return 1
  109. }
  110.  
  111. ####################
  112. # Example:
  113. #Check for Git
  114. #if type_exists 'git'; then
  115. #  e_success "Git good to go"
  116. #else
  117. #  e_error "Git should be installed. It isn't. Aborting."
  118. #  exit 1
  119. #fi
  120. #
  121. #if is_os "darwin"; then
  122. #  e_success "You are on a mac"
  123. #else
  124. #  e_error "You are not on a mac"
  125. #  exit 1
  126. #fi
  127.  
  128. type_exists() {
  129.   if [ $(type -P "$1") ]; then
  130.     return 0
  131.   fi
  132.   return 1
  133. }
  134.  
  135. is_os() {
  136.   if [[ "${OSTYPE}" == $1* ]]; then
  137.     return 0
  138.   fi
  139.   return 1
  140. }
  141.  
  142. # spinner.sh
  143. #
  144. # Display an awesome 'spinner' while running your long shell commands
  145. #
  146. # Do *NOT* call _spinner function directly.
  147. # Use {start,stop}_spinner wrapper functions
  148.  
  149. # usage:
  150. #   1. source this script in your's
  151. #   2. start the spinner:
  152. #       start_spinner [display-message-here]
  153. #   3. run your command
  154. #   4. stop the spinner:
  155. #       stop_spinner [your command's exit status]
  156. #
  157. # Also see: test.sh
  158.  
  159. function _spinner() {
  160.   # $1 start/stop
  161.   #
  162.   # on start: $2 display message
  163.   # on stop : $2 process exit status
  164.   #           $3 spinner function pid (supplied from stop_spinner)
  165.  
  166.   local on_success="DONE"
  167.   local on_fail="FAIL"
  168.   local green="\e[1;32m"
  169.   local red="\e[1;31m"
  170.   local nc="\e[0m"
  171.  
  172.   case $1 in
  173.   start)
  174.     # calculate the column where spinner and status msg will be displayed
  175.     let column=$(tput cols)-${#2}-8
  176.     # display message and position the cursor in $column column
  177.     echo -ne "${2}"
  178.     printf "%${column}s"
  179.  
  180.     # start spinner
  181.     i=1
  182.     sp='\|/-'
  183.     delay=${SPINNER_DELAY:-0.15}
  184.  
  185.     while :; do
  186.       printf "\b${sp:i++%${#sp}:1}"
  187.       sleep "$delay"
  188.     done
  189.     ;;
  190.   stop)
  191.     if [[ -z ${3} ]]; then
  192.       echo "spinner is not running.."
  193.       exit 1
  194.     fi
  195.  
  196.     kill "$3" >/dev/null 2>&1
  197.  
  198.     # inform the user uppon success or failure
  199.     echo -en "\b["
  200.     if [[ $2 -eq 0 ]]; then
  201.       echo -en "${green}${on_success}${nc}"
  202.     else
  203.       echo -en "${red}${on_fail}${nc}"
  204.     fi
  205.     echo -e "]"
  206.     ;;
  207.   *)
  208.     echo "invalid argument, try {start/stop}"
  209.     exit 1
  210.     ;;
  211.   esac
  212. }
  213.  
  214. function start_spinner() {
  215.   # $1 : msg to display
  216.   _spinner "start" "${1}" &
  217.   # set global spinner pid
  218.   _sp_pid=$!
  219.   disown
  220. }
  221.  
  222. function stop_spinner() {
  223.   # $1 : command exit status
  224.   _spinner "stop" "$1" $_sp_pid
  225.   unset _sp_pid
  226. }
  227.  
  228. #MAIN CODE
  229. for ((i = 1; i <= $#; i++)); do
  230.   case ${!i} in
  231.   "-v")
  232.     VERBOSE_LOG=1
  233.     ;;
  234.   "-append")
  235.     APPEND_LOG=1
  236.     ;;
  237.   "-input")
  238.     ((i++))
  239.     URLS_FILE=${!i}
  240.     ;;
  241.   "-found-log")
  242.     ((i++))
  243.     SUCCESS_FILE=${!i}
  244.     ;;
  245.   "-missing-log")
  246.     ((i++))
  247.     FAILURE_FILE=${!i}
  248.     ;;
  249.   "-log")
  250.     ((i++))
  251.     OUTPUT_FILE=${!i}
  252.     ;;
  253.   "-link")
  254.     ((i++))
  255.     SEARCH_LINK=${!i}
  256.     ;;
  257.   "-mode")
  258.     ((i++))
  259.     GREP_MODE=${!i}
  260.     ;;
  261.   "-user-agent")
  262.     ((i++))
  263.     USER_AGENT=${!i}
  264.     ;;
  265.   *)
  266.     e_warning "Unknown argument ${!i}"
  267.     exit 1
  268.     ;;
  269.   esac
  270. done
  271.  
  272. if [[ -z $URLS_FILE ]] || [[ -z $SEARCH_LINK ]]; then
  273.   e_error "Required arguments missing."
  274.   echo "${bold}SYNOPSIS${reset}"
  275.   echo -e "\t${bold}$0${reset} ${bold}-input${reset} ${underline}FILE${reset} ${bold}-link${reset} ${underline}LINK${reset} [OPTIONS]"
  276.   echo
  277.   echo "${bold}DESCRIPTION${reset}"
  278.   echo -e "\tScript is used to check if a specified ${underline}LINK${reset} exists in a list of web pages specified in a ${underline}FILE${reset}, one URL per line. The script is useful for SEO experts when you need to validate that your backlinks still exist on the web pages where you set them or bought them."
  279.   echo
  280.  
  281.   echo "${bold}OPTIONS${reset}"
  282.  
  283.   echo -e "\t${bold}-v${reset}"
  284.   echo -e "\t\tActivates verbose mode"
  285.  
  286.   echo -e "\t${bold}-mode${reset} ${underline}LETTER${reset}"
  287.   echo -e "\t\tThe ${underline}LETTER${reset} defines how ${underline}LINK${reset} is interpreted."
  288.   echo -e "\t\tWe use grep for search, for complete info refer to Matcher Selection of the grep manual."
  289.   echo -e "\t\tUsually grep supports the following modes:"
  290.   echo -e "\t\t${bold}-E${reset}"
  291.   echo -e "\t\tInterpret ${underline}LINK${reset} as an extended regular expression (ERE)."
  292.   echo -e "\t\t${bold}-F${reset}"
  293.   echo -e "\t\tInterpret ${underline}LINK${reset} as a fixed string (instead of regular"
  294.   echo -e "\t\texpression). This is the default."
  295.   echo -e "\t\t${bold}-G${reset}"
  296.   echo -e "\t\tInterpret ${underline}LINK${reset} as a basic regular expression (BRE)."
  297.   echo -e "\t\t${bold}-P${reset}"
  298.   echo -e "\t\tInterpret ${underline}LINK${reset} as a Perl-compatible regular expression (PCRE)."
  299.  
  300.   echo -e "\t${bold}-log${reset} ${underline}LOG${reset}"
  301.   echo -e "\t\tSaves the log to file ${underline}LOG${reset}."
  302.  
  303.   echo -e "\t${bold}-found-log${reset} ${underline}LOG${reset}"
  304.   echo -e "\t\tSaves URLs where the ${underline}LINK${reset} was found to file ${underline}LOG${reset}."
  305.  
  306.   echo -e "\t${bold}-missing-log${reset} ${underline}LOG${reset}"
  307.   echo -e "\t\tSaves URLs where the ${underline}LINK${reset} was not found to file ${underline}LOG${reset}."
  308.  
  309.   echo -e "\t${bold}-append${reset}"
  310.   echo -e "\t\tAll log files will be appended, otherwise they will be overwritten."
  311.  
  312.   echo -e "\t${bold}-user-agent${reset} ${underline}AGENT${reset}"
  313.   echo -e "\t\tSets user-agent string to ${underline}AGENT${reset}."
  314.  
  315.   exit 1
  316. fi
  317.  
  318. if [[ ! -f $E_WGET ]]; then
  319.   e_error "Failed to find wget. Please, install the related package."
  320.   exit 1
  321. fi
  322. if [[ ! -f $E_GREP ]]; then
  323.   e_error "Failed to find grep. Please, install the related package."
  324.   exit 1
  325. fi
  326. if [[ ! -f $URLS_FILE ]]; then
  327.   e_error "The specified file $URLS_FILE not found"
  328.   exit 1
  329. fi
  330.  
  331. function SaveToLog() {
  332.   if [[ -z $1 ]] || [[ -z $2 ]]; then
  333.     echo "Internal error. Invalid parameters in save log function."
  334.     return 1
  335.   fi
  336.  
  337.   unset FILENAME
  338.  
  339.   case $1 in
  340.   "SUCCESS")
  341.     FILENAME=$SUCCESS_FILE
  342.     ((SUCCESS_COUNT++))
  343.     OVERWRITE=$SUCCESS_COUNT
  344.     ;;
  345.   "FAIL")
  346.     FILENAME=$FAILURE_FILE
  347.     ((FAIL_COUNT++))
  348.     OVERWRITE=$FAIL_COUNT
  349.     ;;
  350.   "LOG")
  351.     FILENAME=$OUTPUT_FILE
  352.     ((LOG_COUNT++))
  353.     OVERWRITE=$LOG_COUNT
  354.     ;;
  355.   *)
  356.     echo "Internal error. Invalid log type."
  357.     return 1
  358.     ;;
  359.   esac
  360.  
  361.   if [[ -z $FILENAME ]]; then
  362.     return 0
  363.   fi
  364.  
  365.   if [[ $APPEND_LOG == 0 ]] && [[ $OVERWRITE == 1 ]]; then
  366.     echo "$2" >"$FILENAME"
  367.   else
  368.     echo "$2" >>"$FILENAME"
  369.   fi
  370.  
  371.   return 0
  372. }
  373.  
  374. function CheckWebsiteLink() {
  375.   if [[ -z $1 ]]; then
  376.     echo "Internal error. No server specified."
  377.     return 1
  378.   fi
  379.   if [[ $VERBOSE_LOG -gt 0 ]]; then
  380.     start_spinner "Checking $1"
  381.     sleep 1
  382.   fi
  383.   if [[ -n $USER_AGENT ]]; then
  384.     RESPONSE=$($E_WGET -O- -nv -q --timeout=$TOTAL_TIMEOUT --tries=$WEB_RETRIES --user-agent="$USER_AGENT" "$1" 2>&1)
  385.   else
  386.     RESPONSE=$($E_WGET -O- -nv -q --timeout=$TOTAL_TIMEOUT --tries=$WEB_RETRIES "$1" 2>&1)
  387.   fi
  388.   THE_STATUS=$?
  389.   if [[ $THE_STATUS != 0 ]]; then
  390.     if [[ $VERBOSE_LOG -gt 0 ]]; then
  391.       stop_spinner 1
  392.     fi
  393.     SaveToLog "LOG" "$1 failed to donwload link. Error code $THE_STATUS. Response: $RESPONSE"
  394.     return 1
  395.   fi
  396.   if [[ $VERBOSE_LOG -gt 0 ]]; then
  397.     stop_spinner 0
  398.   fi
  399.   SEARCH_RESULT=$(echo "$RESPONSE" | $E_GREP "$GREP_MODE" "$SEARCH_LINK" 2>&1)
  400.   if [[ -n $SEARCH_RESULT ]]; then
  401.     SaveToLog "LOG" "$1 OK"
  402.     return 0
  403.   fi
  404.   SaveToLog "LOG" "$1 NOT FOUND"
  405.   return 1
  406. }
  407.  
  408. while IFS= read -r line; do
  409.   if [[ -n $line ]]; then
  410.     CheckWebsiteLink "$line"
  411.     THE_STATUS=$?
  412.  
  413.     if [[ $THE_STATUS != 0 ]]; then
  414.       SaveToLog "FAIL" "$line"
  415.       e_warning "$line link NOT found"
  416.     else
  417.       SaveToLog "SUCCESS" "$line"
  418.       e_success "$line link found"
  419.     fi
  420.   fi
  421. done < <($E_GREP "" $URLS_FILE)
  422.  
  423. if [[ $VERBOSE_LOG -gt 0 ]]; then
  424.   echo "All operations complete"
  425. fi
  426. exit 0
  427.  
Add Comment
Please, Sign In to add comment