package com.adition.pig.test; import java.io.IOException; import java.util.Iterator; import java.util.Properties; import java.util.Map.Entry; import org.junit.Test; import junit.framework.Assert; import static org.junit.Assert.*; import org.junit.runner.RunWith; import org.junit.runners.JUnit4; import org.junit.AfterClass; import org.junit.Before; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.adition.pig.filtering.string.CONTAINS; import org.apache.pig.test.Util; import org.apache.pig.test.MiniCluster; import org.apache.pig.EvalFunc; import org.apache.pig.builtin.TRIM; import org.apache.pig.PigServer; import org.apache.pig.ExecType; import org.apache.pig.data.Tuple; import org.apache.pig.data.TupleFactory; import org.apache.pig.backend.executionengine.ExecException; import org.apache.pig.impl.io.FileLocalizer; import org.apache.pig.test.Util; @RunWith(JUnit4.class) public class TestCONTAINS { private static final EvalFunc contains_ = new CONTAINS(); protected final Log log = LogFactory.getLog(getClass()); private static MiniCluster cluster = MiniCluster.buildCluster(); private PigServer pigServer = null; private Properties props = null; String INPUT_FILE = "TestCONTAINS-testFilteringCluster-input.txt"; public void setup() { FileLocalizer.setInitialized(false); props = new Properties(); for (Entry entry : cluster.getProperties().entrySet()) { props.put(entry.getKey(), entry.getValue()); } props.setProperty("mapred.max.split.size", "20"); String[] inputs = { "1\t23\t42\thttp://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=flowers", "2\t123\t42\thttp://www.google.com/url&url=http%3A%2F%2Fwww.zeit.de%2Findex.php&q=towers", "3\t223\t142\thttp://www.google.com/url&url=http%3A%2F%2Fwww.nix-wie-weg.de&q=mallorca", "4\t323\t242\thttp://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama", "5\t423\t342\thttp://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama", "6\t523\t442\thttp://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident" }; try { Util.createInputFile(cluster, INPUT_FILE, inputs); } catch (Exception e) { } } @AfterClass public static void shutdown() { cluster.shutDown(); } @Test public void testNullDoesNotContainNull() throws IOException { Tuple testTuple = Util.buildTuple(null, null); assertFalse("null does not contain null", contains_.exec(testTuple)); } @Test public void testNullDoesNotContainString() throws IOException { Tuple testTuple = Util.buildTuple(null, "anything"); assertFalse("null does not contain 'anything'", contains_.exec(testTuple)); } @Test public void testEmptyStringContainsEmptyString() throws IOException { Tuple testTuple = Util.buildTuple("", ""); assertTrue("Empty string contains empty string", contains_.exec(testTuple)); } @Test public void testNonEmptyStringContainsEmptyString() throws IOException { Tuple testTuple = Util.buildTuple("A string", ""); assertTrue("Non-empty string contains empty string", contains_.exec(testTuple)); } @Test public void testStringNotContained1() throws IOException { Tuple testTuple = Util.buildTuple("A string", "Ring"); assertFalse("'A string' does not contain 'Ring'", contains_.exec(testTuple)); } @Test public void testStringNotContained2() throws IOException { Tuple testTuple = Util.buildTuple("A string", "hugo"); assertFalse("'A string' does not contain 'hugo'", contains_.exec(testTuple)); } @Test public void testStringContained1() throws IOException { Tuple testTuple = Util.buildTuple("A string", "A"); assertTrue("'A string' contains 'A'", contains_.exec(testTuple)); } @Test public void testStringContained2() throws IOException { Tuple testTuple = Util.buildTuple("A string", " stri"); assertTrue("'A string' contains ' stri'", contains_.exec(testTuple)); } @Test public void testFilteringCluster() throws ExecException { this.setup(); String[] expected = { "(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)", "(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)" }; pigServer = new PigServer(ExecType.MAPREDUCE, props); try { Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE + "' AS (id:int, grp:int, additional:int, referer:chararray); " + "b = FILTER a BY " + "com.adition.pig.filtering.string.CONTAINS(referer, 'obama');"); Iterator iter = pigServer.openIterator("b"); int counter = 0; while (iter.hasNext()){ assertEquals(expected[counter++].toString(), iter.next().toString()); } assertEquals(expected.length, counter); } catch (Exception e) { e.printStackTrace(); Assert.fail(); } pigServer.shutdown(); } @Test public void testFilteringClusterWithOR1() throws ExecException { this.setup(); String[] expected = { "(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)", "(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)", "(6,523,442,http://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident)" }; pigServer = new PigServer(ExecType.MAPREDUCE, props); try { Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE + "' AS (id:int, grp:int, additional:int, referer:chararray); " + "b = FILTER a BY " + "com.adition.pig.filtering.string.CONTAINS(referer, 'obama') OR " + "com.adition.pig.filtering.string.CONTAINS(referer, 'praesident');"); Iterator iter = pigServer.openIterator("b"); int counter = 0; while (iter.hasNext()){ assertEquals(expected[counter++].toString(), iter.next().toString()); } assertEquals(expected.length, counter); } catch (Exception e) { e.printStackTrace(); Assert.fail(); } pigServer.shutdown(); } @Test public void testFilteringClusterWithOR2() throws ExecException { this.setup(); String[] expected = { "(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)", "(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)", "(6,523,442,http://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident)" }; pigServer = new PigServer(ExecType.MAPREDUCE, props); try { Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE + "' AS (id:int, grp:int, additional:int, referer:chararray); " + "b = FILTER a BY " + "com.adition.pig.filtering.string.CONTAINS(referer, 'obama') OR " + "id == 6;"); Iterator iter = pigServer.openIterator("b"); int counter = 0; while (iter.hasNext()){ assertEquals(expected[counter++].toString(), iter.next().toString()); } assertEquals(expected.length, counter); } catch (Exception e) { e.printStackTrace(); Assert.fail(); } pigServer.shutdown(); } }