package com.adition.pig.test;
import java.io.IOException;
import java.util.Iterator;
import java.util.Properties;
import java.util.Map.Entry;
import org.junit.Test;
import junit.framework.Assert;
import static org.junit.Assert.*;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;
import org.junit.AfterClass;
import org.junit.Before;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import com.adition.pig.filtering.string.CONTAINS;
import org.apache.pig.test.Util;
import org.apache.pig.test.MiniCluster;
import org.apache.pig.EvalFunc;
import org.apache.pig.builtin.TRIM;
import org.apache.pig.PigServer;
import org.apache.pig.ExecType;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.backend.executionengine.ExecException;
import org.apache.pig.impl.io.FileLocalizer;
import org.apache.pig.test.Util;
@RunWith(JUnit4.class)
public class TestCONTAINS {
private static final EvalFunc<Boolean> contains_ = new CONTAINS();
protected final Log log = LogFactory.getLog(getClass());
private static MiniCluster cluster = MiniCluster.buildCluster();
private PigServer pigServer = null;
private Properties props = null;
String INPUT_FILE = "TestCONTAINS-testFilteringCluster-input.txt";
public void setup() {
FileLocalizer.setInitialized(false);
props = new Properties();
for (Entry<Object, Object> entry : cluster.getProperties().entrySet()) {
props.put(entry.getKey(), entry.getValue());
}
props.setProperty("mapred.max.split.size", "20");
String[] inputs = {
"1\t23\t42\thttp://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=flowers",
"2\t123\t42\thttp://www.google.com/url&url=http%3A%2F%2Fwww.zeit.de%2Findex.php&q=towers",
"3\t223\t142\thttp://www.google.com/url&url=http%3A%2F%2Fwww.nix-wie-weg.de&q=mallorca",
"4\t323\t242\thttp://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama",
"5\t423\t342\thttp://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama",
"6\t523\t442\thttp://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident"
};
try {
Util.createInputFile(cluster, INPUT_FILE, inputs);
} catch (Exception e) {
}
}
@AfterClass
public static void shutdown() {
cluster.shutDown();
}
@Test
public void testNullDoesNotContainNull() throws IOException {
Tuple testTuple = Util.buildTuple(null, null);
assertFalse("null does not contain null", contains_.exec(testTuple));
}
@Test
public void testNullDoesNotContainString() throws IOException {
Tuple testTuple = Util.buildTuple(null, "anything");
assertFalse("null does not contain 'anything'", contains_.exec(testTuple));
}
@Test
public void testEmptyStringContainsEmptyString() throws IOException {
Tuple testTuple = Util.buildTuple("", "");
assertTrue("Empty string contains empty string", contains_.exec(testTuple));
}
@Test
public void testNonEmptyStringContainsEmptyString() throws IOException {
Tuple testTuple = Util.buildTuple("A string", "");
assertTrue("Non-empty string contains empty string", contains_.exec(testTuple));
}
@Test
public void testStringNotContained1() throws IOException {
Tuple testTuple = Util.buildTuple("A string", "Ring");
assertFalse("'A string' does not contain 'Ring'", contains_.exec(testTuple));
}
@Test
public void testStringNotContained2() throws IOException {
Tuple testTuple = Util.buildTuple("A string", "hugo");
assertFalse("'A string' does not contain 'hugo'", contains_.exec(testTuple));
}
@Test
public void testStringContained1() throws IOException {
Tuple testTuple = Util.buildTuple("A string", "A");
assertTrue("'A string' contains 'A'", contains_.exec(testTuple));
}
@Test
public void testStringContained2() throws IOException {
Tuple testTuple = Util.buildTuple("A string", " stri");
assertTrue("'A string' contains ' stri'", contains_.exec(testTuple));
}
@Test
public void testFilteringCluster() throws ExecException {
this.setup();
String[] expected = {
"(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)",
"(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)"
};
pigServer = new PigServer(ExecType.MAPREDUCE, props);
try {
Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE +
"' AS (id:int, grp:int, additional:int, referer:chararray); " +
"b = FILTER a BY " +
"com.adition.pig.filtering.string.CONTAINS(referer, 'obama');");
Iterator<Tuple> iter = pigServer.openIterator("b");
int counter = 0;
while (iter.hasNext()){
assertEquals(expected[counter++].toString(), iter.next().toString());
}
assertEquals(expected.length, counter);
} catch (Exception e) {
e.printStackTrace();
Assert.fail();
}
pigServer.shutdown();
}
@Test
public void testFilteringClusterWithOR1() throws ExecException {
this.setup();
String[] expected = {
"(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)",
"(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)",
"(6,523,442,http://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident)"
};
pigServer = new PigServer(ExecType.MAPREDUCE, props);
try {
Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE +
"' AS (id:int, grp:int, additional:int, referer:chararray); " +
"b = FILTER a BY " +
"com.adition.pig.filtering.string.CONTAINS(referer, 'obama') OR " +
"com.adition.pig.filtering.string.CONTAINS(referer, 'praesident');");
Iterator<Tuple> iter = pigServer.openIterator("b");
int counter = 0;
while (iter.hasNext()){
assertEquals(expected[counter++].toString(), iter.next().toString());
}
assertEquals(expected.length, counter);
} catch (Exception e) {
e.printStackTrace();
Assert.fail();
}
pigServer.shutdown();
}
@Test
public void testFilteringClusterWithOR2() throws ExecException {
this.setup();
String[] expected = {
"(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)",
"(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)",
"(6,523,442,http://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident)"
};
pigServer = new PigServer(ExecType.MAPREDUCE, props);
try {
Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE +
"' AS (id:int, grp:int, additional:int, referer:chararray); " +
"b = FILTER a BY " +
"com.adition.pig.filtering.string.CONTAINS(referer, 'obama') OR " +
"id == 6;");
Iterator<Tuple> iter = pigServer.openIterator("b");
int counter = 0;
while (iter.hasNext()){
assertEquals(expected[counter++].toString(), iter.next().toString());
}
assertEquals(expected.length, counter);
} catch (Exception e) {
e.printStackTrace();
Assert.fail();
}
pigServer.shutdown();
}
}