Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #!/bin/env ruby
- ############################################################################
- # textilewc: Count words in a Textile file, if somewhat crudely.
- # By Tammy Cravit, tammy@tammycravit.us
- #
- # $Revision$ $Date$
- #
- # If a single file is provided on the command line, it displays the number
- # of words in the file. If multiple files are given on the command line, it
- # generates a listing similar to the output of wc(1), except that directories
- # and non-Textile files are silently skipped with no warning.
- #
- # And yes, this is more complicated than it probably needs to be
- ############################################################################
- # IMPORTANT NOTE:
- #
- # In my directories, I use the prefix character _ to denote a template file,
- # such as "_Character Template.textile". The definition of textile_file?
- # will SILENTLY ignore Textile files whose names start with an underscore.
- #
- # You can override this behavior on the command line with the -a option. The
- # default behavior can be changed by modifying the initialize method to set
- # @hide_leading_underscores to false. If you call the TextileWordCounter
- # class from your own code, you can change this setting with the method
- # hide_leading_underscores!(boolValue)
- ############################################################################
- require 'rubygems'
- gem 'RedCloth', ">= 0"
- require 'redcloth'
- ############################################################################
- # The main Textile word counter class
- ############################################################################
- class TextileWordCounter
- def initialize
- @hide_leading_underscores = true
- end
- # Return true to exclude files with leading _ characters in their names
- # from the count. I use a leading _ to denote a template file, so I want
- # them excluded when I run textilewc with a wildcard.
- def hide_leading_underscores?
- @hide_leading_underscores
- end
- def hide_leading_underscores!(val)
- @hide_leading_underscores = val
- end
- # Helper method to determine if a file is a Textile document.
- def textile_file?(file)
- if hide_leading_underscores?
- file =~ /\.textile$/i && file !~ /^_/
- else
- file =~ /\.textile$/i
- end
- end
- # Count the words in a Textile string. Used by count_words_file and (by
- # extension) count_words_files
- def count_words(filecontent)
- html_content = RedCloth.new(filecontent).to_html
- s = html_content.gsub(/<\/?[^>]*>/, "") # Strip HTML tags
- s = s.gsub(/\&\#[0123456789]+\;/, "") # Strip HTML entity chars
- s.split.length
- end
- # Count the words in a single file. This can be invoked directly, or
- # from within count_words_multifiles.
- def count_words_file(file)
- if File.exists?(file)
- buff = IO.readlines(file).join("\n")
- fsize = count_words(buff)
- printf "%7d %s\n", fsize, file
- fsize
- else
- 0
- end
- end
- # Count words in multiple files
- def count_words_files(filelist)
- total_count = 0
- filelist.each {
- |file|
- unless File.directory?(file)
- if textile_file?(file)
- fsize = count_words_file(file)
- total_count += fsize
- end
- end
- }
- if filelist.count > 1
- printf "%7d total\n", total_count
- end
- end
- def CommandLineDriver(args)
- if (args.count == 0)
- puts <<END
- ***************************************************************
- #{$0}: Count words in one or more Textile files.
- By Tammy Cravit, tammy@tammycravit.us
- $Revision$
- ***************************************************************
- Usage: #{$0} [-a] <file> [file] ...
- The default behavior of the script is to ignore files whose
- names begin with an underscore. To include these files in the
- count, include the -a option.
- END
- else
- if args.count == 1
- if File.exists?(args[0])
- count_words_file(args[0])
- else
- printf "%s: not found\n", args[0]
- end
- else
- if args[0] == "-a"
- hide_leading_underscores!(false)
- count_words_files(args.slice(1, args.count - 1))
- else
- count_words_files(args)
- end
- end
- end
- end
- end
- ############################################################################
- # Driver routine - display a message if no files are specified;
- # otherwise, count the words in the subset of specified files which
- # are Textile files.
- ############################################################################
- module Kernel
- # Runs the passed block of code, if the calling file is the one being executed.
- def on_execute
- calling_file = caller.first.split(':').first
- if File.expand_path(calling_file) == File.expand_path($0)
- yield
- end
- end
- end
- on_execute do
- TextileWordCounter.new.CommandLineDriver(ARGV)
- end
Add Comment
Please, Sign In to add comment