Advertisement
Guest User

Simple grepper (text searcher)

a guest
Feb 21st, 2021
113
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 5.69 KB | None | 0 0
  1. #!/usr/bin/env python
  2.  
  3. #######################################################################
  4. #
  5. #   The following license supersedes all notices in the source code.
  6. #
  7. #   Copyright (c) 2021 Kurt Dekker/PLBM Games All rights reserved.
  8. #
  9. #   @kurtdekker - http://www.plbm.com
  10. #  
  11. #   Redistribution and use in source and binary forms, with or without
  12. #   modification, are permitted provided that the following conditions are
  13. #   met:
  14. #  
  15. #   Redistributions of source code must retain the above copyright notice,
  16. #   this list of conditions and the following disclaimer.
  17. #  
  18. #   Redistributions in binary form must reproduce the above copyright
  19. #   notice, this list of conditions and the following disclaimer in the
  20. #   documentation and/or other materials provided with the distribution.
  21. #  
  22. #   Neither the name of the Kurt Dekker/PLBM Games nor the names of its
  23. #   contributors may be used to endorse or promote products derived from
  24. #   this software without specific prior written permission.
  25. #  
  26. #   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  27. #   IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
  28. #   TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
  29. #   PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
  30. #   HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  31. #   SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
  32. #   TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  33. #   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
  34. #   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  35. #   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  36. #   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  37. #
  38. #####################################################################
  39. #
  40. # I cannot believe how irritating it is to search all *.TXT files in
  41. # a posix environment, with huge clumsy find/for/grep combo chains.
  42. #
  43. # This is intended to make a nice simple way of doing things.
  44. #
  45. #####################################################################
  46. #
  47. import os, sys, fnmatch
  48. from string import maketrans
  49. import re
  50.  
  51. def main():
  52.     argcopy = list(sys.argv[1:])
  53.  
  54.     option_showfile = 0         # put the file ON the same line
  55.     option_fileonly = 0         # only display the filename
  56.     option_matchcase = 0
  57.     option_count = 0
  58.  
  59.     filepatterns = "*"
  60.  
  61.     if len(argcopy) < 2:
  62.         print "Usage: %s [-f] [!]searchtext filepattern[,pattern2,pattern3]" % sys.argv[0]
  63.         print " where ! will search for lines NOT containing searchtext"
  64.         print " options: -f display filename"
  65.         print " options: -l display filename only"
  66.         print " options: -c count occurrences only"
  67.         print " options: -m enforce correct case"
  68.         print
  69.         print "Example: (always escape wildcards on Posix systems)"
  70.         print "$ grp kurt \*.txt"
  71.         print "$ grp MyFunctionName \*.cs"
  72.         exit()
  73.  
  74.     while len(argcopy[0]) > 1 and argcopy[0][0] == '-':
  75.         if len(argcopy[0]) > 1:
  76.             if argcopy[0][1] == 'f':
  77.                 option_showfile = 1
  78.                 print >> sys.stderr, "option_showfile enabled"
  79.  
  80.             if argcopy[0][1] == 'l':
  81.                 option_fileonly = 1
  82.                 print >> sys.stderr, "option_fileonly enabled"
  83.  
  84.             if argcopy[0][1] == 'c':
  85.                 option_count = 1
  86.                 option_fileonly = 1
  87.                 print >> sys.stderr, "option_count enabled"
  88.  
  89.             if argcopy[0][1] == 'm':
  90.                 option_matchcase = 1
  91.                 print >> sys.stderr, "option_matchcase enabled"
  92.  
  93.         argcopy = argcopy[1:]
  94.  
  95.     searchtext = argcopy[0]
  96.  
  97.     argcopy = argcopy[1:]
  98.  
  99.     invert = 0
  100.     if len(searchtext) > 0:
  101.         if searchtext[0] == '!':
  102.             searchtext = searchtext[1:]
  103.             invert = 1
  104.  
  105.     filepatterns = argcopy[0].lower()
  106.  
  107.     if (not option_matchcase):
  108.         searchtext = searchtext.lower()
  109.  
  110.     matches = []
  111.     for root, dirnames, filenames in os.walk('.'):
  112.         fnpairs = [(a.lower(),a) for a in filenames]
  113.         for filepattern in filepatterns.split( ","):
  114.             files = [fo for (fl,fo) in fnpairs if fnmatch.fnmatch(fl, filepattern)]
  115.             for n in xrange( len( files)):
  116.                 oneline = os.path.join(root, files[n])
  117.                 if ((oneline.find( ".hg") < 0) and
  118.                     (oneline.find( ".git") < 0) and
  119.                     (oneline.find( ".DS_Store") < 0) and
  120.                     (oneline.find( ".DSStore") < 0) and
  121.                     True):
  122.                     matches.append(oneline)
  123.  
  124.     tabtospacett = maketrans( '\t', ' ')
  125.  
  126.     anyfoundatall = 0
  127.  
  128.     output = []
  129.  
  130.     reignorecaseflags = re.IGNORECASE
  131.     if option_matchcase:
  132.         reignorecaseflags = 0
  133.     reflags = reignorecaseflags
  134.     rematcher = re.compile( ".*" + searchtext + ".*", reflags)
  135.  
  136.     for fn in matches:
  137.         didfilename = 0
  138.         foundany = 0
  139.         count = 0
  140.         try:
  141.             with open( fn) as fp:
  142.                 for oneline in [a[:-1] for a in fp.readlines()]:
  143.                     foundmatch = oneline.lower().find( searchtext) >= 0
  144.  
  145.                     rematched = rematcher.match( oneline)
  146.                     if rematched:
  147.                         foundmatch = 1
  148.  
  149.                     if option_matchcase:
  150.                         foundmatch = oneline.find( searchtext) >= 0
  151.  
  152.                     if invert:
  153.                         foundmatch = not foundmatch
  154.  
  155.                     if foundmatch:
  156.                         count += 1
  157.                         foundany = 1
  158.                         anyfoundatall = 1
  159.  
  160.                         if option_showfile or not didfilename:
  161.                             didfilename = 1
  162.                             output.append( "File: %s: " % fn)
  163.  
  164.                         if not option_fileonly:
  165.                             while len(oneline) and (oneline[-1] in ('\n', '\r',)):
  166.                                 oneline = oneline[:-1]
  167.                             output.append( oneline.translate(tabtospacett))
  168.         except:
  169.             print "Unable to open " + fn + " for reading..."
  170.  
  171.         if invert:
  172.             if not foundany:
  173.                 output.append( "File: %s:" % fn)
  174.  
  175.         if option_count:
  176.             if count > 0:
  177.                 output.append( "%u" % count)
  178.  
  179.     if not anyfoundatall:
  180.         print >> sys.stderr, "Don't forget to quote glob chars on Posix shells."
  181.     else:
  182.         if option_fileonly:
  183.             outout = sorted(output)
  184.  
  185.         for o in output:
  186.             print o
  187.  
  188. if __name__ == "__main__":
  189.     main()
  190.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement