Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- mica.select(concat_ws(':',mica.DieX, mica.DieY)).show()
- or use UDF:
- func = udf(lambda c1,c2: c1 +":" + c2, StringType())
- mica.withColumn("fid", func(col('LotId'),col('WaferId'))).show()
- Define an UDF
- def position_X(s):
- if s in ["0","1","2","3","4","5","6","7","8","9"]:
- return 'P0'+s
- elif s in ["-1","-2","-3","-4","-5","-6","-7","-8","-9"]:
- return 'N0'+s[1]
- elif s >= "10":
- return 'P' + s
- else:
- return 'N' + s[1:]
- spark.udf.register("position_X",position_X)
- def position_Y(s):
- if len(s) == 1:
- return '0' + s
- else:
- return s
- spark.udf.register("position_Y",position_Y)
- from pyspark.sql.functions import *
- from pyspark.sql.functions import udf,col
- from pyspark.sql.types import *
- pdf = pdf.select('*',substring("LotId",1,7) ,substring("WaferId",6,2)).drop("LotId","WaferId")
- pdf = pdf.withColumnRenamed('substring(LotId, 1, 7)', 'LotId')
- pdf = pdf.withColumnRenamed('substring(WaferId, 6, 2)', 'WaferId')
- Die_X = udf(position_X, StringType())
- pdf = pdf.withColumn("DieX", Die_X(col('DieX')))
- Die_Y = udf(position_Y, StringType())
- pdf = pdf.withColumn("DieY", Die_Y(col('DieY')))
- pdf = pdf.withColumn('Fid',concat_ws(':',pdf.LotId, pdf.WaferId,pdf.DieX,pdf.DieY)).drop("DieX","DieY","LotId","WaferId")
Add Comment
Please, Sign In to add comment