Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- class Lexer
- Token = Struct.new(:name, :pattern)
- def initialize
- @tokens = Array.new
- end
- def add_token(*args)
- @tokens << Token.new(*args)
- end
- def lex(data)
- copy, result = data.dup, Array.new
- until copy.empty?
- @tokens.find { |t|
- copy.sub!(t.pattern, "") && (t.name.nil? || result << [t.name, $&])
- } || raise(ArgumentError, "Malformed data could not be lexed correctly: #{data}. Copy is: #{copy.inspect}")
- end
- result
- end
- end
- class CommandParser
- def self.snippet(str)
- self.parse(self.lex(str))
- end
- def self.lex(str)
- lexer = Lexer.new
- [ [ :fun_begin, /\A(?:\w|\.)+\(/ ],
- [ :assignment, /\A[\w.]+\s*<?=\s*/ ],
- [ :term, /\A[$\w.]+(?=,|)/ ],
- [ :term, /\A\.\.\./ ],
- [ :comma, /\A,\s*/ ],
- [ :fun_end, /\A\)/ ],
- [ :quoted, /\A".*?(?!\\)"/ ],
- [ :operator, /\A\s*([\+\-\*\/!^]|&&|\|\||<=?|>=?|~)\s*/ ],
- [ :number, /\A\d+(\.\d+)?/ ],
- [ :brace_begin, /\A\{/ ],
- [ :brace_end, /\A\}/ ],
- [ :bracket_begin, /\A\[/ ],
- [ :list_separator, /\A:/ ],
- [ nil, /\A\s+/ ] ].each do |name, regex|
- lexer.add_token(name, regex)
- end
- lexer.lex(str)
- end
- def self.parse(data)
- snippet = ""
- snippet_counter = -1
- stack = []
- until data.empty?
- type,match = data.shift
- if !stack.empty? && stack.last == :assignment && type.to_s =~ /comma|end$/ then
- snippet << "}"
- stack.pop
- end
- case type
- when :fun_begin, :brace_begin, :bracket_begin
- stack << :group
- snippet << "${#{snippet_counter+=1}:#{match}${#{snippet_counter+=1}:"
- when :assignment
- stack << :assignment
- snippet << "#{match}${#{snippet_counter+=1}:"
- when :comma
- snippet << ("}${#{snippet_counter+=1}:" + match)
- when :brace_end, :bracket_end, :fun_end
- snippet << "}#{match}}"
- stack.pop
- when :quoted
- snippet << "${#{snippet_counter+=1}:\"${#{snippet_counter+=1}:#{match[1..-2].gsub("}","\\}")}}\"}"
- else
- snippet << match
- end
- end
- raise "Too many levels: #{snippet}" unless stack.length == 0
- snippet[4..-2]
- end
- end
- require 'pp'
- data=DATA.read.split("\n")
- # 100.times
- # d = data[rand(2000)]
- data.each do |d|
- CommandParser.snippet(d)
- end
- __END__
- read.table(file, header = FALSE, sep = "", quote = "\"'", dec = ".", row.names, col.names, as.is = FALSE, na.strings = "NA", colClasses = NA, nrows = -1, skip = 0, check.names = TRUE, fill = !blank.lines.skip, strip.white = FALSE, blank.lines.skip = TRUE, comment.char = "#", allowEscapes = FALSE)
Add Comment
Please, Sign In to add comment