Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- /**
- * Created by nacta_000 on 25.06.2015.
- */
- import org.apache.commons.math3.geometry.euclidean.oned.Interval
- import org.apache.spark.SparkContext
- import org.apache.spark.SparkContext._
- import org.apache.spark.SparkConf
- import org.joda.time.{DateTime, Seconds}
- import org.apache.lucene.search.spell.LevensteinDistance
- import scala.io.Source
- //import org.scala_tools.time.Imports._
- object SparkApp {
- def checkInterval(date1: String, date2: String, interval: Int) : Boolean = {
- val dt1 = new DateTime(date1)
- val dt2 = new DateTime(date2)
- val seconds = Seconds.secondsBetween(dt1, dt2).getSeconds
- if (seconds <= interval)
- true
- else
- false
- }
- def distance(str1: String, str2:String):Float={
- val lDistance = new LevensteinDistance
- return lDistance.getDistance(str1, str2)
- }
- def changeRegion(grz: String):String ={
- val newGrz = """(?=\d\d$)""".r.replaceFirstIn(grz, "1")
- newGrz
- }
- /*
- def compareGrz(grz1: String, grz2: String) : Boolean = {
- grz1.count(str=>str == '1')
- grz1.map(c=>(c,grz1.indexOf(c)))
- //apache.livinstainIndex
- math.abs(grz1.length - grz2.length) match{
- case 0 => compareCharByChar(grz1, grz2)
- case 1 =>
- if (grz1.length < grz2.length){
- val newGrz = changeRegion(grz1)
- if (newGrz == grz2)
- true
- else
- false
- }
- else {
- val newGrz = changeRegion(grz2)
- if (newGrz == grz1)
- true
- else
- false
- }
- case _ => false
- }
- }*/
- def main(args: Array[String]) {
- val interval = 7
- val conf = new SparkConf().setAppName("Test App").setMaster("local")
- val sc = new SparkContext(conf)
- val sqlContext = new org.apache.spark.sql.SQLContext(sc)
- //import sqlContext.implicits._
- case class Data(Date: String, Number: String, CameraId: Int, SetId:Int) {
- }
- val df = sqlContext.read.json("first50.json").map(p => Data(p.getString(2), p.getString(3).trim,p.getString(1).toInt,p.getString(4).toInt))
- val groupedRdd = df.groupBy(_.SetId);
- val result = groupedRdd.collect()
- for(value <- result) {
- println(value)
- }
- //println(df.collect())
- //println(checkInterval("2015-06-01T00:00:18Z","2015-06-01T00:00:26Z", interval))
- /* val lines = sc.textFile("H:\\first50.json")
- val lineLengths = lines.map(s => 1)
- val totalLength = lineLengths.reduce((a, b) => a + b)
- println(totalLength)*/
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement