Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- Simple bash-script for searching raw HTML sources
- Uses Unix's Commons: awk, grep, head, tail
- Will run for cygWin (winOS) as well, but make sure
- you've got the path right to your archive (arcDir)
- [code]
- #!/bin/bash
- # Local archive folder
- # --> Edit here for your own local path
- arcDir="/pathto/archives/8chan/qresearch/.zfs/snapshot/grab-n-snap-20190713-0227/qresearch/res"
- # For CygWin, path could be for example:
- # arcDir="/cygdrive/D/archive/pol/res"
- # SearchTerm provided by user -- make sure it's there and not too short
- srcTerm="${1}"
- if [ "${srcTerm}" == "" ]; then
- echo "Script searches for expressions in a HTML-archive"
- echo "No search term given -- script will exit. Next time"
- echo "try e.g.: ./searchArchive \"puzzle ever\""
- exit
- else
- if [ $(echo -n "${srcTerm}" | wc | awk '{print $3}') -eq 1 ]; then
- echo "# Search term is 1 character long -- that's not smart..."
- echo "# Script refuses to search for \"${srcTerm}\""
- exit
- elif [ $(echo -n "${srcTerm}" | wc | awk '{print $3}') -le 3 ]; then
- echo "# Warning! Length of search term is <= 3, likely giving a large number of search results."
- fi
- fi
- # Count number of HTML-files in archive
- nf=
- ls ${arcDir}/*.html 2>/dev/null | wc -l
- if [ ${nf} -eq 0 ]; then
- echo "# Error! No HTML-files found in \"${arcDir}\""
- echo "# Please check if archivePath (\"arcDir=...\") is set correct."
- exit
- fi
- echo "# Searching for \"${srcTerm}\" in \"${arcDir}\" (${nf} files)"
- # TempFile and store all HTML-files in there
- tmp="tmp.log"
- ls ${arcDir}/*.html > ${tmp}
- # Make some substitutions in searchTerm, like escaping spaces & "."
- srcstr=
- echo "${srcTerm}" | sed 's/\ /\\\ /g' | sed 's/\./\\\./g'
- # Read tempFile line by line
- while read ifile; do
- # Count number of occurrences in current source file
- n=
- cat "${ifile}" | grep -iob "${srcstr}" | wc -l
- # If count is > 0, list occurrences
- if [ ${n} -gt 0 ]; then
- echo ">${n} occurrences in \"${ifile}\":"
- j=0
- for off in
- cat "${ifile}" | grep -iob "${srcstr}" | awk -F":" '{print $1+1}' | tr '\n' ' '
- ; do
- j=
- expr $j + 1
- echo -n " (${j}) "
- tail -c+${off} "${ifile}" | head -c640
- echo ""
- done
- fi
- done < ${tmp}
- # Clean up
- rm -f ${tmp}
- [/code]
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement