Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import com.google.common.base.Strings
- import org.apache.spark.{SparkConf, SparkContext}
- import scala.collection.mutable
- import scala.util.control.Breaks._
- case class QueryLogOut(q: Option[String], smseq: Option[String],
- udid : Option[String], city: Option[String],
- mem_guid: Option[String], datetime: Option[String], typename: Option[String])
- object SearchToItem {
- val dic=new mutable.HashMap[String,String]
- val dicRowNum=new mutable.HashMap[String,String]
- val dicType=new mutable.HashMap[String,String]
- val SEARCH_TO_CART_RELATION_NUM=1
- def main(args:Array[String]) {
- val sparkConf = new SparkConf().set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
- .set("spark.kryoserializer.buffer.max", "192M")
- val sc = new SparkContext(sparkConf)
- val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)
- val newSql=
- """select udid,
- | page_col,
- | col_content,
- | query,
- | city,
- | rownum,
- | mem_guid,
- | datetime,
- | type
- | from temp.search2cart
- | """.stripMargin
- val searchToItemDF=hiveContext.sql(newSql)
- val searchToItemNewRDD=searchToItemDF.repartition(300,searchToItemDF.col("udid")).
- map( r => proessSearchToItemNew(Option(r.get(0)),Option(r.get(1)),Option(r.get(2)),Option(r.get(3))
- ,Option(r.get(4)),Option(r.get(5)),Option(r.get(6)),Option(r.get(7)),Option(r.get(8))))
- .filter(_.q.getOrElse("")!="")
- val searchToItemNewDF = hiveContext.createDataFrame(searchToItemNewRDD)
- .rdd.map { r => r.mkString("\t") }
- .saveAsTextFile("/data/search2item")
- sc.stop()
- }
- def proessSearchToItemNew(udid_o: Option[Any], page_col_o: Option[Any], col_pos_content_o :
- Option[Any], query_o: Option[Any], city_o: Option[Any], rownum_o: Option[Any] ,
- mem_guid_o : Option[Any], datetime_o: Option[Any], typename_o: Option[Any]
- ):QueryLogOut={
- val udid=udid_o.getOrElse("").toString
- val page_col=page_col_o.getOrElse("").toString
- val col_pos_content=col_pos_content_o.getOrElse("").toString
- val query=query_o.getOrElse("").toString
- val city=city_o.getOrElse("").toString
- val rowNum=rownum_o.getOrElse("0").toString
- val mem_guid=mem_guid_o.getOrElse("").toString
- val datetime=datetime_o.getOrElse("").toString
- val typename=typename_o.getOrElse("").toString
- var queryLogOut= QueryLogOut(Option(""),Option(""),Option(""),Option(""),Option(""),Option(""),Option(""))
- breakable {
- page_col match {
- case "3010" =>{ //#点击「加入购物车」按钮,把商品加入购物车
- val userSearchType=udid + "3035"
- val userCartType=udid + page_col
- if (dic.contains(userSearchType) & dicRowNum.contains(userSearchType)){
- var typename = dicType.getOrElse(userSearchType,"")
- var queryword= dic.getOrElse(userSearchType,"")
- if(isInvalidProcessLog(userSearchType,rowNum)){
- if(dicRowNum.contains(userCartType) & (rowNum.toInt - dicRowNum.getOrElse(userCartType,"0").toInt) == SEARCH_TO_CART_RELATION_NUM){
- queryword=dic.getOrElse(userCartType,"")
- typename=dicType.getOrElse(userCartType,"")
- dic.put(userCartType,queryword)
- }else{
- queryword=""
- break()
- }
- }
- if (!Strings.isNullOrEmpty(col_pos_content)){
- dic.put(userCartType,queryword)
- dicRowNum.put(userCartType,rowNum)
- dicType.put(userCartType,typename)
- queryLogOut=
- QueryLogOut(Option(queryword),Option(col_pos_content),Option(udid),Option(city),Option(mem_guid),Option(datetime),Option(typename))
- }
- }
- }
- case "3036" =>{ //#浏览商品
- val itemType=udid + "3036"
- val typeNum="-3036"
- queryLogOut=processItemPage(udid,page_col,col_pos_content,query,city,rowNum,mem_guid,datetime,typename,itemType,typeNum)
- }
- case _ =>
- }
- }
- queryLogOut
- }
- def processItemPage(udid: String, page_col: String, col_pos_content : String, query: String,
- city: String, rownum: String, mem_guid : String, datetime: String, typename: String
- , ItemType: String, typeNum: String):QueryLogOut= {
- val userItemType=udid + "3036"
- val userGoodsItemType=udid + "4011"
- val userSearchType=udid + "3035"
- val userCartType=udid + "3010"
- // var returnBoolean=false
- var typename=""
- var queryword=""
- breakable {
- if(dic.contains(userSearchType) & dicRowNum.contains(userSearchType)){
- queryword=dic.getOrElse(userSearchType,"")
- typename = dicType.getOrElse(userSearchType,"")
- if(Strings.isNullOrEmpty(queryword) | "NULL".equals(queryword) | "3".equals(typename) ){
- queryword=""
- break()
- }
- if (isInvalidProcessLog(userSearchType,rownum) ){
- if (isInvalidProcessLog(userItemType,rownum)){
- if (isInvalidProcessLog(userGoodsItemType,rownum)){
- if(isInvalidProcessLog(userCartType,rownum)){
- queryword=""
- break()
- }else{
- queryword=dic.getOrElse(userCartType,"")
- typename = dicType.getOrElse(userCartType,"")
- }
- }else{
- queryword=dic.getOrElse(userGoodsItemType,"")
- typename = dicType.getOrElse(userGoodsItemType,"")
- }
- }else{
- queryword=dic.getOrElse(userItemType,"")
- typename = dicType.getOrElse(userItemType,"")
- }
- }
- if (!Strings.isNullOrEmpty(col_pos_content)){
- dic.put(ItemType,queryword)
- dicRowNum.put(ItemType,rownum)
- dicType.put(ItemType,typename)
- typename=typename+typeNum // 1-3036 2-3036 etc.
- }else{
- queryword=""
- }
- }
- }
- QueryLogOut(Option(queryword),Option(col_pos_content),Option(udid),Option(city),Option(mem_guid),Option(datetime),Option(typename))
- }
- def getType(query: String):String={
- if ((query.length==6 && query.startsWith("C") & isAllDigits(query.substring(1))) | (query.length==8 & query.substring(0,2)=="CC" & isAllDigits(query.substring(2))))
- return "2"
- else
- return "1"
- }
- def isAllDigits(x: String) = x forall Character.isDigit
- def isValidDigitsRowNum(rowNum: String):Boolean = {
- !Strings.isNullOrEmpty(rowNum) & isAllDigits(rowNum)
- }
- def isInvalidQuery(query: String):Boolean = {
- Strings.isNullOrEmpty(query) | isValidSMSEQ(query)
- }
- def isValidSMSEQ(sm_seq: String):Boolean = {
- (sm_seq.length == 17 & sm_seq.indexOf("CM") == 6) |
- (sm_seq.length >= 9 & isAllDigits(sm_seq))
- }
- def isInvalidProcessLog(typeName: String,rowNum: String):Boolean = {
- ( !isValidDigitsRowNum (rowNum) | Strings.isNullOrEmpty(typeName) | !dicRowNum.contains(typeName)
- | (rowNum.toInt - dicRowNum.getOrElse(typeName,"0").toInt) > SEARCH_TO_CART_RELATION_NUM)
- }
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement