Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- package com.adition.pig.test;
- import java.io.IOException;
- import java.util.Iterator;
- import java.util.Properties;
- import java.util.Map.Entry;
- import org.junit.Test;
- import junit.framework.Assert;
- import static org.junit.Assert.*;
- import org.junit.runner.RunWith;
- import org.junit.runners.JUnit4;
- import org.junit.AfterClass;
- import org.junit.Before;
- import org.apache.commons.logging.Log;
- import org.apache.commons.logging.LogFactory;
- import com.adition.pig.filtering.string.CONTAINS;
- import org.apache.pig.test.Util;
- import org.apache.pig.test.MiniCluster;
- import org.apache.pig.EvalFunc;
- import org.apache.pig.builtin.TRIM;
- import org.apache.pig.PigServer;
- import org.apache.pig.ExecType;
- import org.apache.pig.data.Tuple;
- import org.apache.pig.data.TupleFactory;
- import org.apache.pig.backend.executionengine.ExecException;
- import org.apache.pig.impl.io.FileLocalizer;
- import org.apache.pig.test.Util;
- @RunWith(JUnit4.class)
- public class TestCONTAINS {
- private static final EvalFunc<Boolean> contains_ = new CONTAINS();
- protected final Log log = LogFactory.getLog(getClass());
- private static MiniCluster cluster = MiniCluster.buildCluster();
- private PigServer pigServer = null;
- private Properties props = null;
- String INPUT_FILE = "TestCONTAINS-testFilteringCluster-input.txt";
- public void setup() {
- FileLocalizer.setInitialized(false);
- props = new Properties();
- for (Entry<Object, Object> entry : cluster.getProperties().entrySet()) {
- props.put(entry.getKey(), entry.getValue());
- }
- props.setProperty("mapred.max.split.size", "20");
- String[] inputs = {
- "1\t23\t42\thttp://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=flowers",
- "2\t123\t42\thttp://www.google.com/url&url=http%3A%2F%2Fwww.zeit.de%2Findex.php&q=towers",
- "3\t223\t142\thttp://www.google.com/url&url=http%3A%2F%2Fwww.nix-wie-weg.de&q=mallorca",
- "4\t323\t242\thttp://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama",
- "5\t423\t342\thttp://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama",
- "6\t523\t442\thttp://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident"
- };
- try {
- Util.createInputFile(cluster, INPUT_FILE, inputs);
- } catch (Exception e) {
- }
- }
- @AfterClass
- public static void shutdown() {
- cluster.shutDown();
- }
- @Test
- public void testNullDoesNotContainNull() throws IOException {
- Tuple testTuple = Util.buildTuple(null, null);
- assertFalse("null does not contain null", contains_.exec(testTuple));
- }
- @Test
- public void testNullDoesNotContainString() throws IOException {
- Tuple testTuple = Util.buildTuple(null, "anything");
- assertFalse("null does not contain 'anything'", contains_.exec(testTuple));
- }
- @Test
- public void testEmptyStringContainsEmptyString() throws IOException {
- Tuple testTuple = Util.buildTuple("", "");
- assertTrue("Empty string contains empty string", contains_.exec(testTuple));
- }
- @Test
- public void testNonEmptyStringContainsEmptyString() throws IOException {
- Tuple testTuple = Util.buildTuple("A string", "");
- assertTrue("Non-empty string contains empty string", contains_.exec(testTuple));
- }
- @Test
- public void testStringNotContained1() throws IOException {
- Tuple testTuple = Util.buildTuple("A string", "Ring");
- assertFalse("'A string' does not contain 'Ring'", contains_.exec(testTuple));
- }
- @Test
- public void testStringNotContained2() throws IOException {
- Tuple testTuple = Util.buildTuple("A string", "hugo");
- assertFalse("'A string' does not contain 'hugo'", contains_.exec(testTuple));
- }
- @Test
- public void testStringContained1() throws IOException {
- Tuple testTuple = Util.buildTuple("A string", "A");
- assertTrue("'A string' contains 'A'", contains_.exec(testTuple));
- }
- @Test
- public void testStringContained2() throws IOException {
- Tuple testTuple = Util.buildTuple("A string", " stri");
- assertTrue("'A string' contains ' stri'", contains_.exec(testTuple));
- }
- @Test
- public void testFilteringCluster() throws ExecException {
- this.setup();
- String[] expected = {
- "(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)",
- "(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)"
- };
- pigServer = new PigServer(ExecType.MAPREDUCE, props);
- try {
- Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE +
- "' AS (id:int, grp:int, additional:int, referer:chararray); " +
- "b = FILTER a BY " +
- "com.adition.pig.filtering.string.CONTAINS(referer, 'obama');");
- Iterator<Tuple> iter = pigServer.openIterator("b");
- int counter = 0;
- while (iter.hasNext()){
- assertEquals(expected[counter++].toString(), iter.next().toString());
- }
- assertEquals(expected.length, counter);
- } catch (Exception e) {
- e.printStackTrace();
- Assert.fail();
- }
- pigServer.shutdown();
- }
- @Test
- public void testFilteringClusterWithOR1() throws ExecException {
- this.setup();
- String[] expected = {
- "(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)",
- "(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)",
- "(6,523,442,http://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident)"
- };
- pigServer = new PigServer(ExecType.MAPREDUCE, props);
- try {
- Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE +
- "' AS (id:int, grp:int, additional:int, referer:chararray); " +
- "b = FILTER a BY " +
- "com.adition.pig.filtering.string.CONTAINS(referer, 'obama') OR " +
- "com.adition.pig.filtering.string.CONTAINS(referer, 'praesident');");
- Iterator<Tuple> iter = pigServer.openIterator("b");
- int counter = 0;
- while (iter.hasNext()){
- assertEquals(expected[counter++].toString(), iter.next().toString());
- }
- assertEquals(expected.length, counter);
- } catch (Exception e) {
- e.printStackTrace();
- Assert.fail();
- }
- pigServer.shutdown();
- }
- @Test
- public void testFilteringClusterWithOR2() throws ExecException {
- this.setup();
- String[] expected = {
- "(4,323,242,http://www.google.com/url&url=http%3A%2F%2Fwww.tagesschau.de&q=barack%20obama)",
- "(5,423,342,http://www.google.com/url&url=http%3A%2F%2Fwww.bild.de&q=obama)",
- "(6,523,442,http://www.google.com/url&url=http%3A%2F%2Fwww.example.com%2Fmypage.htm&q=praesident)"
- };
- pigServer = new PigServer(ExecType.MAPREDUCE, props);
- try {
- Util.registerMultiLineQuery(pigServer, "a = LOAD '" + INPUT_FILE +
- "' AS (id:int, grp:int, additional:int, referer:chararray); " +
- "b = FILTER a BY " +
- "com.adition.pig.filtering.string.CONTAINS(referer, 'obama') OR " +
- "id == 6;");
- Iterator<Tuple> iter = pigServer.openIterator("b");
- int counter = 0;
- while (iter.hasNext()){
- assertEquals(expected[counter++].toString(), iter.next().toString());
- }
- assertEquals(expected.length, counter);
- } catch (Exception e) {
- e.printStackTrace();
- Assert.fail();
- }
- pigServer.shutdown();
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement