Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- require 'normalizer'
- require 'ruby-prof'
- describe Normalizer do
- #before(:all) do
- # RubyProf.start
- #end
- #after(:all) do
- # result = RubyProf.stop
- # printer = RubyProf::FlatPrinter.new(result)
- # printer.print(STDOUT, 0)
- #end
- it "should return nil when trying to replace synonyms with a empty or nil value" do
- Normalizer.replace_synonyms("").should==""
- Normalizer.replace_synonyms(nil).should==nil
- end
- it "replacing keywords in a term that has no synonyms should return the same keyword" do
- Normalizer.replace_synonyms("sea dog").should=="sea dog"
- end
- it "should replace a single word keyword that translates into a two word synonym" do
- Normalizer.replace_synonyms("se").should=="special edition"
- Normalizer.replace_synonyms("se car").should=="special edition car"
- Normalizer.replace_synonyms("Chevy HD Truck").should=="Chevy heavy duty Truck"
- Normalizer.replace_synonyms("Ford WT").should=="Ford work truck"
- Normalizer.replace_synonyms("Ford Work Truck").should=="Ford wt"
- Normalizer.replace_synonyms("10'th ann editio n").should=="10'th anniversary edition"
- Normalizer.replace_synonyms("10'th anniversary edition").should=="10'th ann edition"
- Normalizer.replace_synonyms("includes tech pkg").should=="includes technology pkg"
- Normalizer.replace_synonyms("includes technology pkg").should=="includes tech pkg"
- end
- it "should replace a single word keyword that translates into a two word synonym,without regard for case" do
- #slow
- Normalizer.replace_synonyms("SE").should=="special edition"
- end
- it "should replace multiple terms in a keyword" do
- #slow
- Normalizer.replace_synonyms("This is a special edition car").should=="This is a se car"
- end
- it "should replace single word synonyms that appear at end of keyword" do
- #slow
- Normalizer.replace_synonyms("Buy this limited").should=="Buy this ltd"
- end
- it "should replace double word synonyms that appear at end of keyword" do
- #slow
- Normalizer.replace_synonyms("Buy this special edition").should=="Buy this se"
- end
- it "should replace double word synonyms that appear at end of keyword" do
- #slow
- Normalizer.replace_synonyms("special edition car").should=="se car"
- end
- it "should replace a double word keyword regardless of case" do
- #slow
- Normalizer.replace_synonyms("Special Edition car").should=="se car"
- end
- it "should replace a double word that has a term that is also in a single word synonym" do
- #slow
- Normalizer.replace_synonyms("Limited Edition car").should=="le car"
- end
- it "should return nil if keyword or terms are nil or empty" do
- #slow
- Normalizer.normalize(nil,["bar","foo","baz"]).should=={:match=>nil, :match_type=>:empty}
- Normalizer.normalize("",["bar","foo","baz"]).should=={:match=>nil, :match_type=>:empty}
- Normalizer.normalize("foo",nil).should=={:match=>nil, :match_type=>:empty}
- Normalizer.normalize("foo",[]).should=={:match=>nil, :match_type=>:empty}
- end
- it "should return an exact match if an exact match is found" do
- Normalizer.normalize("foo",["bar","foo","baz"]).should=={:match=>"foo", :match_type=>:exact_match}
- end
- it "should return an exact match if an exact match is found, even if the case is different" do
- Normalizer.normalize("FOo",["bar","FOO","baz"]).should=={:match=>"FOO", :match_type=>:exact_match}
- end
- it "should return an exact match even if there is a difference of a dash in the keyword" do
- Normalizer.normalize("GS-R",["GS","GSR","GSR VTEC"]).should=={:match=>"GSR", :match_type=>:exact_match}
- end
- it "should return an exact match even if there is a difference of a slash in the keyword" do
- Normalizer.normalize("GS/R",["GS","GSR","GSR VTEC"]).should=={:match=>"GSR", :match_type=>:exact_match}
- end
- it "should return an exact match even if there is a difference of a dash in the keyword, without regard for case" do
- Normalizer.normalize("GS-R",["GS","GSR","GSR VTEC"]).should=={:match=>"GSR", :match_type=>:exact_match}
- Normalizer.normalize("gs-r",["GS","GSR","GSR VTEC"]).should=={:match=>"GSR", :match_type=>:exact_match}
- end
- it "should return an exact match even if there is a difference of a dash in one of the terms" do
- Normalizer.normalize("GSR",["GS","GS-R","GSR VTEC"]).should=={:match=>"GS-R", :match_type=>:exact_match}
- end
- it "should return the shortest term that that they keyword fits completely inside" do
- Normalizer.normalize("foo bar",["foo","bar","foo bar bellyhoo baz","foo bar baz"]).should=={:match=>"foo bar baz", :match_type=>:sub_match}
- end
- it "should return the shortest term that the keyword fits completely inside of without regard for case" do
- Normalizer.normalize("foo BAR",["foo","bar","foo bar bellyhoo baz","foo bar baz"]).should=={:match=>"foo bar baz", :match_type=>:sub_match}
- end
- it "should return the shortest term that the keyword fits completely inside a term, even if the ketword is separated by dashes" do
- Normalizer.normalize("foo-bar",["foo","bar","foo bar bellyhoo baz","foo bar baz"]).should=={:match=>"foo bar baz", :match_type=>:sub_match}
- end
- it "should return the longest term that fits completely inside the keyword" do
- Normalizer.normalize("VR6 Luxury",["VR6", "TDI", "VR6 Sport", "VR6 Lux", "V8", "V6"]).should=={:match=>"VR6 Lux", :match_type=>:sub_match}
- end
- it "should return the longest term that fits completely inside the keyword regardless of case" do
- Normalizer.normalize("Vr6 luxury",["VR6", "TDI", "VR6 Sport", "VR6 Lux", "V8", "V6"]).should=={:match=>"VR6 Lux", :match_type=>:sub_match}
- end
- it "should return longest term that fits completely inside the keyword, even if the term is multiple words and is not contiguous inside the keyword" do
- Normalizer.normalize("HD Ext 143.5\" WB C6P LS",["LS","HD","HD LS"]).should=={:match=>"HD LS", :match_type=>:sub_match}
- end
- it "should return longest term that fits completely inside the keyword, even if the term is multiple words and is not contiguous inside the keyword, without regard for case" do
- Normalizer.normalize("Hd Ext 143.5\" WB C6P lS",["LS","hD","HD ls"]).should=={:match=>"HD ls", :match_type=>:sub_match}
- end
- it "should return longest term that fits completely inside the keyword, even if the term is multiple words and is not contiguous inside the keyword, without regard for dashes" do
- Normalizer.normalize("LTZ 4x4 Z71",["LTZ", "LTZ Z-71"]).should=={:match=>"LTZ Z-71", :match_type=>:sub_match}
- end
- it "should return longest term that fits completely inside the keyword, even if the term is multiple words and is not contiguous inside the keyword, without regard for spaces" do
- Normalizer.normalize("LTZ 4x4 Z 71",["LTZ", "LTZ Z-71"]).should=={:match=>"LTZ Z-71", :match_type=>:sub_match}
- end
- it "should return longest term that fits completely inside the keyword, even if the term is multiple words and is not contiguous inside the keyword, without regard for dashes" do
- Normalizer.normalize("LTZ Z-71",["LTZ", "LTZ 4x4 Z 71"]).should=={:match=>"LTZ 4x4 Z 71", :match_type=>:sub_match}
- end
- it "should return the longest term that fits completely inside the keyword, even if there is a dash in the term or keyword" do
- Normalizer.normalize("VR6-Luxury",["VR6", "TDI", "VR6 Sport", "VR6 Sport Matic Lux", "VR6 Lux", "V8", "V6"]).should=={:match=>"VR6 Lux", :match_type=>:sub_match}
- Normalizer.normalize("VR6 Luxury",["VR6", "TDI", "VR6 Sport", "VR6 Sport Matic Lux", "VR6-Lux", "V8", "V6"]).should=={:match=>"VR6-Lux", :match_type=>:sub_match}
- end
- it "should return closest match when no direct matches can be found" do
- Normalizer.normalize("Ferd",["BMW","Buick","Chevrolet","Ford"]).should=={:match=>"Ford", :match_type=>:closest_match}
- end
- it "should check synonyms if an exact match cannot be made" do
- Normalizer.normalize("GSR Limited Edition",["GSR","GSR SE","GSR X","GSR LE"]).should=={:match=>"GSR LE", :match_type=>:exact_match}
- end
- it "should not use synonyms if they do not result in a better match" do
- Normalizer.normalize("GSR Limited Edition",["GSR","GSR SE","GSR X","GSR LTD"]).should=={:match=>"GSR", :match_type=>:sub_match}
- end
- it "should return the term that has the most common terms when the original match and synonym match are both sub matches" do
- Normalizer.normalize("GSR Limited Edition Deluxe",["GSR","GSR SE","GSR X","GSR LE"]).should=={:match=>"GSR LE", :match_type=>:sub_match}
- Normalizer.normalize("Limited Edition Deluxe GSR",["GSR","GSR SE","GSR X","GSR LTD","GSR LE"]).should=={:match=>"GSR LE", :match_type=>:sub_match}
- end
- it "should use the closest term with synonyms" do
- Normalizer.normalize("3.7L Technology Pkg AWD",["Touring Pkg", "Tech Pkg", "Sport Pkg", "Tech/Entertainment Pkg", "Sport/Entertainment Pkg", "3.5", "Navigation", "3.7", "Advance"] ).should=={:match=>"Tech Pkg", :match_type=>:sub_match}
- end
- it "should return exact match if all words in the keyword match a term, even if they are out of order." do
- Normalizer.normalize("CrewCab LT",["LT","Crew Cab","LT Crewcab"] ).should=={:match=>"LT Crewcab", :match_type=>:exact_match}
- Normalizer.normalize("Crew-Cab LT",["LT","Crew Cab","LT Crewcab"] ).should=={:match=>"LT Crewcab", :match_type=>:exact_match}
- Normalizer.normalize("Crew Cab LT",["LT","Crew Cab","LT Crew-cab"] ).should=={:match=>"LT Crew-cab", :match_type=>:exact_match}
- end
- it "should return a sub match when there is an exact match on some terms and the others fit inside a term" do
- Normalizer.normalize("CrewCab LT1",["LT","Crew Cab","LT Crewcab"] ).should=={:match=>"LT Crewcab", :match_type=>:sub_match}
- Normalizer.normalize("Crew-Cab LT1",["LT","Crew Cab","LT Crewcab"] ).should=={:match=>"LT Crewcab", :match_type=>:sub_match}
- Normalizer.normalize("Crew Cab LT1",["LT","Crew Cab","LT Crew-cab"] ).should=={:match=>"LT Crew-cab", :match_type=>:sub_match}
- Normalizer.normalize("Crew Cab L-T1",["LT","Crew Cab","LT Crewcab"] ).should=={:match=>"LT Crewcab", :match_type=>:sub_match}
- Normalizer.normalize("Crew Cab L-T 1",["LT","Crew Cab","LT Crew cab"] ).should=={:match=>"LT Crew cab", :match_type=>:sub_match}
- end
- it "should return the term that has the most matching tokens" do
- Normalizer.normalize("Outback Extreme LTD 30th Ann",["Outback", "L 25th Anniversary", "Outback Limited 30th Anniversary", "Outback Limited"] ).should=={:match=>"Outback Limited 30th Anniversary",:match_type=>:sub_match}
- end
- it "Time Test" do
- Normalizer.normalize("Silverado-DRW 143.5 WB 2500 Ext C6P HD LS",["1500 Pickups", "1500 Sport Pickups", "1500 Work Pickups", "2500 Chassis-Cabs", "2500 Pickups", "3500 Chassis-Cabs", "3500 HD Chassis-Cabs", "3500 Pickups", "Aluminum Step Van", "APV Cargo", "Astro", "Avalanche", "Aveo", "Beretta", "Blazer", "C 3500 HD", "C/K 1500", "C/K 2500", "C/K 3500", "Camaro", "Camaro Police Pkg", "Caprice", "Cavalier", "CC4500", "CC5500", "CC6500", "CC6H042", "CC7500", "CC7H042", "CC7H064", "CC8500", "CC8500 Tandem", "Celebrity", "CF6B042", "CF7B042", "CF7B064", "Chevy Cargo Van", "Chevy Sportvan", "Chevy Van", "Classic", "Classic Chevy Van", "Classic G Commercial Cutaway", "Classic G RV Cutaway", "Classic Sportvan", "Cobalt", "Colorado", "Commercial Cutaway Van", "Commercial/RV Cutaway Van", "Corsica", "Corvette", "Cruze", "CT6500", "CT7500", "CT8500", "CT8500 Tandem", "Equinox", "Express Van", "G Commercial Cutaway", "G Hi-Cube", "G RV Cutaway", "Geo Metro", "Geo Spectrum", "Geo Tracker", "HHR", "Hi-Cube Van", "Impala", "Impala Police", "Impala SS", "K Blazer", "Lumina", "Lumina APV", "Malibu", "Malibu Classic", "Malibu Hybrid", "Malibu Maxx", "Metro", "Monte Carlo", "Motor Home Chassis", "New Tahoe", "Nova", "Prizm", "RV Cutaway Van", "S-10", "S-10 Blazer", "S/T Blazer", "S/T Pickup", "Silverado 1500", "Silverado 2500", "Silverado 3500", "Silverado SS", "Spectrum", "Sport Van", "Sprint", "SSR", "Steel Value", "Suburban", "Tahoe", "Tahoe Police", "Tracker", "TrailBlazer", "Traverse", "Uplander", "Venture", "W3500", "W3S042", "W3S042i", "W4500", "Express 1500", "Express 2500", "Express 3500", "Chevelle", "C40", "Bel Air", "Corvair", "Citation", "Aveo5", "1500", "2500", "Volt", "Silverado 1500 HD", "Silverado 2500 HD", "Silverado 3500 HD"] ).should=={:match=>"Silverado 2500 HD", :match_type=>:sub_match}
- end
- end
Add Comment
Please, Sign In to add comment