Advertisement
Guest User

Untitled

a guest
Aug 23rd, 2019
810
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.19 KB | None | 0 0
  1. Simple bash-script for searching raw HTML sources
  2. Uses Unix's Commons: awk, grep, head, tail
  3.  
  4. Will run for cygWin (winOS) as well, but make sure
  5. you've got the path right to your archive (arcDir)
  6.  
  7.  
  8. [code]
  9. #!/bin/bash
  10.  
  11.  
  12. # Local archive folder
  13. # --> Edit here for your own local path
  14. arcDir="/pathto/archives/8chan/qresearch/.zfs/snapshot/grab-n-snap-20190713-0227/qresearch/res"
  15. # For CygWin, path could be for example:
  16. # arcDir="/cygdrive/D/archive/pol/res"
  17.  
  18. # SearchTerm provided by user -- make sure it's there and not too short
  19. srcTerm="${1}"
  20. if [ "${srcTerm}" == "" ]; then
  21. echo "Script searches for expressions in a HTML-archive"
  22. echo "No search term given -- script will exit. Next time"
  23. echo "try e.g.: ./searchArchive \"puzzle ever\""
  24. exit
  25. else
  26. if [ $(echo -n "${srcTerm}" | wc | awk '{print $3}') -eq 1 ]; then
  27. echo "# Search term is 1 character long -- that's not smart..."
  28. echo "# Script refuses to search for \"${srcTerm}\""
  29. exit
  30. elif [ $(echo -n "${srcTerm}" | wc | awk '{print $3}') -le 3 ]; then
  31. echo "# Warning! Length of search term is <= 3, likely giving a large number of search results."
  32. fi
  33. fi
  34.  
  35. # Count number of HTML-files in archive
  36. nf=
  37.  
  38. ls ${arcDir}/*.html 2>/dev/null | wc -l
  39.  
  40.  
  41. if [ ${nf} -eq 0 ]; then
  42. echo "# Error! No HTML-files found in \"${arcDir}\""
  43. echo "# Please check if archivePath (\"arcDir=...\") is set correct."
  44. exit
  45. fi
  46. echo "# Searching for \"${srcTerm}\" in \"${arcDir}\" (${nf} files)"
  47.  
  48. # TempFile and store all HTML-files in there
  49. tmp="tmp.log"
  50. ls ${arcDir}/*.html > ${tmp}
  51.  
  52. # Make some substitutions in searchTerm, like escaping spaces & "."
  53. srcstr=
  54.  
  55. echo "${srcTerm}" | sed 's/\ /\\\ /g' | sed 's/\./\\\./g'
  56.  
  57.  
  58.  
  59. # Read tempFile line by line
  60. while read ifile; do
  61. # Count number of occurrences in current source file
  62. n=
  63.  
  64. cat "${ifile}" | grep -iob "${srcstr}" | wc -l
  65.  
  66.  
  67. # If count is > 0, list occurrences
  68. if [ ${n} -gt 0 ]; then
  69. echo ">${n} occurrences in \"${ifile}\":"
  70. j=0
  71. for off in
  72.  
  73. cat "${ifile}" | grep -iob "${srcstr}" | awk -F":" '{print $1+1}' | tr '\n' ' '
  74.  
  75. ; do
  76. j=
  77.  
  78. expr $j + 1
  79.  
  80.  
  81. echo -n " (${j}) "
  82. tail -c+${off} "${ifile}" | head -c640
  83. echo ""
  84. done
  85. fi
  86. done < ${tmp}
  87.  
  88. # Clean up
  89. rm -f ${tmp}
  90. [/code]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement