Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- package org.apache.bigtop.bigpetstore.generator;
- import java.util.Date
- import org.apache.bigtop.bigpetstore.generator.util.State
- import org.apache.bigtop.bigpetstore.util.Pair
- import org.apache.bigtop.bigpetstore.util.StringUtils
- import org.apache.commons.lang3.{StringUtils => commonsStringUtils}
- import java.util.Arrays.asList
- import java.util.Random
- import scala.collection.Iterator
- import com.sun.org.apache.xml.internal.serializer.ToStream
- import java.util.{Iterator => JavaIterator}
- import scala.collection.JavaConversions.asJavaIterator
- /**
- * This class generates our data. Over time we will use it to embed bias which
- * can then be teased out, i.e. by clustering/classifiers. For example:
- *
- * certain products <--> certain years or days
- */
- class TransactionIteratorFactory(private val records: Int, private val state: State) {
- assert(records > 0, "Number of records must be greater than 0 to generate a data iterator!")
- val MINUTES_IN_DAY = 60 * 24
- val MAX_PRICE = 10000
- private val random = new Random(state.hashCode)
- // Note: This behavior is changed from the earlier java version in that the iterator
- // is recreated every-time this method is called. In the java version, calling this
- // method again would result in getting a spent iterator.
- def data: JavaIterator[TransactionIteratorFactory.KeyVal[String, String]] = {
- new TransactionIteratorFactory.DataIterator(records, state, random)
- }
- }
- object TransactionIteratorFactory {
- class KeyVal[K, V](val key: K, val value: V)
- private class DataIterator(records: Int, state: State, r: Random)
- extends Iterator[KeyVal[String, String]] {
- /** Add some decimals to the price */
- private def fudgePrice(product: String, i: Int) = {
- if (product.contains("dog")) {
- i + .50f;
- } else if (product.contains("cat")) {
- i - .50f;
- } else if (product.contains("fish")) {
- i - .25f;
- } else {
- i + .10f;
- }
- }
- private var elementsProcducedCount = 0
- private var repeatCount = 0
- def hasNext = elementsProcducedCount < records
- def next(): TransactionIteratorFactory.KeyVal[String,String] = {
- var firstName: String = null
- var lastName: String = null
- val date = DataForger.randomDateInPastYears(50);
- /** Some customers come back for more :) We repeat a name up to ten times */
- if (repeatCount > 0) {
- repeatCount -= 1
- } else {
- firstName = DataForger.firstName(r)
- lastName = DataForger.lastName(r)
- repeatCount = (r.nextGaussian * 10f) toInt
- }
- val productPrice = state.randProduct();
- val fudgedPrice = fudgePrice(productPrice.getFirst(), productPrice.getSecond()).toString
- val key = commonsStringUtils.join(asList("BigPetStore", "storeCode_" + state.name(),
- elementsProcducedCount.toString), ",")
- val value = commonsStringUtils.join(asList(firstName, lastName, date, fudgedPrice,
- productPrice.getFirst()), ",")
- elementsProcducedCount += 1
- new TransactionIteratorFactory.KeyVal(key, value)
- }
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement