Guest User

Untitled

a guest
Feb 20th, 2018
274
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.03 KB | None | 0 0
  1. #!/bin/env ruby
  2. ############################################################################
  3. # textilewc: Count words in a Textile file, if somewhat crudely.
  4. # By Tammy Cravit, tammy@tammycravit.us
  5. #
  6. # $Revision$ $Date$
  7. #
  8. # If a single file is provided on the command line, it displays the number
  9. # of words in the file. If multiple files are given on the command line, it
  10. # generates a listing similar to the output of wc(1), except that directories
  11. # and non-Textile files are silently skipped with no warning.
  12. #
  13. # And yes, this is more complicated than it probably needs to be
  14. ############################################################################
  15. # IMPORTANT NOTE:
  16. #
  17. # In my directories, I use the prefix character _ to denote a template file,
  18. # such as "_Character Template.textile". The definition of textile_file?
  19. # will SILENTLY ignore Textile files whose names start with an underscore.
  20. #
  21. # You can override this behavior on the command line with the -a option. The
  22. # default behavior can be changed by modifying the initialize method to set
  23. # @hide_leading_underscores to false. If you call the TextileWordCounter
  24. # class from your own code, you can change this setting with the method
  25. # hide_leading_underscores!(boolValue)
  26. ############################################################################
  27.  
  28. require 'rubygems'
  29. gem 'RedCloth', ">= 0"
  30. require 'redcloth'
  31.  
  32. ############################################################################
  33. # The main Textile word counter class
  34. ############################################################################
  35.  
  36. class TextileWordCounter
  37. def initialize
  38. @hide_leading_underscores = true
  39. end
  40.  
  41. # Return true to exclude files with leading _ characters in their names
  42. # from the count. I use a leading _ to denote a template file, so I want
  43. # them excluded when I run textilewc with a wildcard.
  44. def hide_leading_underscores?
  45. @hide_leading_underscores
  46. end
  47.  
  48. def hide_leading_underscores!(val)
  49. @hide_leading_underscores = val
  50. end
  51.  
  52. # Helper method to determine if a file is a Textile document.
  53. def textile_file?(file)
  54. if hide_leading_underscores?
  55. file =~ /\.textile$/i && file !~ /^_/
  56. else
  57. file =~ /\.textile$/i
  58. end
  59. end
  60.  
  61. # Count the words in a Textile string. Used by count_words_file and (by
  62. # extension) count_words_files
  63. def count_words(filecontent)
  64. html_content = RedCloth.new(filecontent).to_html
  65. s = html_content.gsub(/<\/?[^>]*>/, "") # Strip HTML tags
  66. s = s.gsub(/\&\#[0123456789]+\;/, "") # Strip HTML entity chars
  67. s.split.length
  68. end
  69.  
  70. # Count the words in a single file. This can be invoked directly, or
  71. # from within count_words_multifiles.
  72. def count_words_file(file)
  73. if File.exists?(file)
  74. buff = IO.readlines(file).join("\n")
  75. fsize = count_words(buff)
  76. printf "%7d %s\n", fsize, file
  77. fsize
  78. else
  79. 0
  80. end
  81. end
  82.  
  83. # Count words in multiple files
  84. def count_words_files(filelist)
  85. total_count = 0
  86.  
  87. filelist.each {
  88. |file|
  89. unless File.directory?(file)
  90. if textile_file?(file)
  91. fsize = count_words_file(file)
  92. total_count += fsize
  93. end
  94. end
  95. }
  96. if filelist.count > 1
  97. printf "%7d total\n", total_count
  98. end
  99. end
  100.  
  101. def CommandLineDriver(args)
  102. if (args.count == 0)
  103. puts <<END
  104. ***************************************************************
  105. #{$0}: Count words in one or more Textile files.
  106. By Tammy Cravit, tammy@tammycravit.us
  107. $Revision$
  108. ***************************************************************
  109.  
  110. Usage: #{$0} [-a] <file> [file] ...
  111.  
  112. The default behavior of the script is to ignore files whose
  113. names begin with an underscore. To include these files in the
  114. count, include the -a option.
  115. END
  116. else
  117. if args.count == 1
  118. if File.exists?(args[0])
  119. count_words_file(args[0])
  120. else
  121. printf "%s: not found\n", args[0]
  122. end
  123. else
  124. if args[0] == "-a"
  125. hide_leading_underscores!(false)
  126. count_words_files(args.slice(1, args.count - 1))
  127. else
  128. count_words_files(args)
  129. end
  130. end
  131. end
  132. end
  133. end
  134.  
  135. ############################################################################
  136. # Driver routine - display a message if no files are specified;
  137. # otherwise, count the words in the subset of specified files which
  138. # are Textile files.
  139. ############################################################################
  140.  
  141. module Kernel
  142.  
  143. # Runs the passed block of code, if the calling file is the one being executed.
  144. def on_execute
  145. calling_file = caller.first.split(':').first
  146. if File.expand_path(calling_file) == File.expand_path($0)
  147. yield
  148. end
  149. end
  150.  
  151. end
  152.  
  153. on_execute do
  154. TextileWordCounter.new.CommandLineDriver(ARGV)
  155. end
Add Comment
Please, Sign In to add comment