Advertisement
yojimbos_pastebin

search.sh

Jul 20th, 2018
305
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Bash 6.19 KB | None | 0 0
  1. #!/usr/bin/env bash
  2.  
  3. # search.sh
  4.  
  5. # INTRODUCTION
  6.  
  7. # Lines that are comments are prefaced with the symbol '#'.  This script uses
  8. # 'surfraw' to search through a list of terms that are of interest.  A file of
  9. # search terms may be given as a command line argument; otherwise the script
  10. # will look for the file named 'search_terms.txt' that must be located in the
  11. # local directory.
  12.  
  13. # The results are sent to a date and time stamped directory as identified by
  14. # search term and search engine that can be opened in any html compliant graphical
  15. # web browser as a local file.  The links so contained may be activated from
  16. # there.
  17.  
  18. # The procedure 'surfraw' was first written by Julian Assange and is today
  19. # maintained by others.  It contains (has built-in) 100+ engines called 'elvi'.
  20. # New 'elvi' may be added; all that is needed is the search url of a web site.
  21. # For example, for Zerohedge.com a search for 'Clinton 2020' generates the url:
  22.  
  23. # https://www.zerohedge.com/search-content?search_api_fulltext=Clinton+2020&sort_by=search_api_relevance
  24.  
  25. # This may be used to create a new 'elvi'.  Thus any web site or sites of interest
  26. # may be interrogated with this script if the search url can be obtained.  For more
  27. # information, see:
  28.  
  29. # https://www.techrepublic.com/blog/linux-and-open-source/how-to-write-your-own-elvi-for-surfraw/
  30.  
  31. # DEPENDENCIES
  32.  
  33. # bash, surfraw, lynx, sed and a graphical web browser (like firefox). Each of these
  34. # should be easily installed on any linux system or Mac OS (this has not been tested).
  35.  
  36. # FILE FORMATS
  37.  
  38. # Search terms are placed in a file, one term per line.  No empty lines are permitted.
  39. # For example the file 'search_terms.txt' may contain:
  40.  
  41. # Trump Putin Summit
  42. # Clinton 2020 campaign
  43. # Thailand Cave Rescue
  44. # Prime Minister May Brexit
  45. # Nigel Farage UKIP
  46.  
  47. # And, for example, the file 'search_engines.txt' may contain:
  48.  
  49. # dmoz
  50. # duckduckgo
  51. # google
  52. # bing
  53. # cnn
  54. # bbcnews
  55. # yandex
  56.  
  57. # The engine names contained by the file 'search_engines.txt' must have a corresponding
  58. # 'elvi' in 'surfraw'.
  59.  
  60. # LICENSE AND WARRANTY
  61.  
  62. # This is free for use or modification under the MIT License terms.  No warranty is
  63. # expressed or implied. No copyright is claimed at this time.
  64.  
  65. # INSTALLATION, USAGE AND CAUTIONS
  66.  
  67. # This file should be saved as 'search.sh'.  To make it executable, do 'chmod +x search.sh'.
  68. # Otherwise it may be run via the command 'bash search.sh'.  To use the executable form, do:
  69.  
  70. #   ./search.sh
  71. #
  72. # Or, when using a custom search term file name, do:
  73. #
  74. #   ./search.sh my_search_terms.txt
  75.  
  76. # The program will create a date and time stamped directory that produces one html page per
  77. # search term/search engine based upon the contents of the file 'search_terms.txt' and the
  78. # file 'search_engines.txt'.  Click on these to view the search results in a browser.
  79.  
  80. # A word of caution on usage.  Certain engines will block access if this procedure is used
  81. # too frequently.  Google in particular is on this list.  So if you find yourself locked out,
  82. # you have been warned.
  83.  
  84. # ENVIRONMENT AND GLOBALS
  85.  
  86. export SURFRAW_graphical='no'
  87. export SURFRAW_text_browser='lynx -source'
  88. export SURFRAW_results=100 # this may not work on all engines.
  89.  
  90. # PROCEDURES
  91.  
  92. main(){
  93.  
  94.     local destination_html_file=
  95.     local search_engines_file='search_engines.txt'
  96.     local search_terms_file='search_terms.txt'
  97.     local destination_folder="$(date +%Y-%m-%d\ %T)"
  98.  
  99.     # If a file name is supplied then use it.  Otherwise default to the file name
  100.     # 'search_terms.txt' that must be in the local directory.  If no file is found
  101.     # then exit.
  102.  
  103.     # First set the search terms file.
  104.     if [[ -n $1 ]]
  105.     then
  106.         search_terms_file="$1"
  107.     fi
  108.  
  109.     # Next make sure the search terms file exists.
  110.     if [[ ! -f $search_terms_file ]]
  111.     then
  112.         echo "Oops!  Could not find the file of search terms, exiting ..."
  113.         exit 1
  114.     fi
  115.  
  116.     # Next make sure the search engines file exists.
  117.     if [[ ! -f $search_engines_file ]]
  118.     then
  119.         echo "Oops!  Could not find the file of search engines, exiting ..."
  120.         exit 1
  121.     fi
  122.  
  123.     # Next create the search results output directory.
  124.     if [[ ! -d $destination_folder ]]
  125.     then
  126.         if mkdir "$destination_folder" &> /dev/null
  127.         then
  128.             echo "Creating directory $destination_folder ..."
  129.         else
  130.             echo "Oops!  Could not create search directory, exiting ..."
  131.             exit 1
  132.         fi
  133.     fi
  134.  
  135.     # Finally read the search terms/engines files into an array.
  136.     IFS=$'\n' read -d '' -r -a search_terms < "$search_terms_file"
  137.     IFS=$'\n' read -d '' -r -a search_engines < "$search_engines_file"
  138.  
  139.     # Now do the search and send the results to a time-stamped html file
  140.     # ordered by search term.
  141.     for st in "${search_terms[@]}"
  142.     do
  143.         for se in "${search_engines[@]}"
  144.         do
  145.  
  146.             destination_html_file="$destination_folder"'/'"$st"' ('"$se"').html'
  147.             surfraw $se $st > "$destination_html_file"
  148.  
  149.             # Duckduckgo has some strange prefixes on links; these must be
  150.             # removed.
  151.             if [[ $se == duckduckgo ]]
  152.             then
  153.                 echo "Postprocessing $se html output ..."
  154.                 sed -e 's/\/l\/?kh=-1&amp;uddg=//g' "$destination_html_file" > "$destination_html_file".tmp &&
  155.                 mv "$destination_html_file".tmp "$destination_html_file" &> /dev/null &&
  156.                     echo "Postprocessing $se has succeeded ..." ||
  157.                     echo "Postprocessing $se has failed ..."
  158.             fi
  159.  
  160.             # Links must have certain URL codes changed to ASCII or
  161.             # clicking on them may fail.
  162.             echo "Making essential URL ASCII conversions ..."
  163.             sed -e 's/%3A/:/g' -e 's/%2F/\//g' "$destination_html_file" > "$destination_html_file".tmp &&
  164.             mv "$destination_html_file".tmp "$destination_html_file" &> /dev/null &&
  165.                 echo "Conversion has succeeded ..." ||
  166.                 echo "Conversion has failed ..."
  167.  
  168.         done  
  169.     done
  170.  
  171. }
  172.  
  173. # INVOKE MAIN PROCEDURE
  174.  
  175. main $@ && echo "Our work is done, exiting ..." || echo "Exiting, but there was some problem ..."
  176.  
  177. # end of file
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement