Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- //I get an Object from Hbase here
- val objectRDD : RDD[HbaseRecord] = ...
- //I convert the RDD[HbaseRecord] into RDD[Row]
- val rowRDD : RDD[Row] = objectRDD.map(
- hbaseRecord => {
- // Simple types
- val uuid : String = hbaseRecord.uuid
- val timestamp : String = hbaseRecord.timestamp
- // Maps
- val name = Row(hbaseRecord.nameMap.firstName.getOrElse(""),
- hbaseRecord.nameMap.middleName.getOrElse(""),
- hbaseRecord.nameMap.lastName.getOrElse(""))
- // Parsing maps of maps
- val contactsMap = hbaseRecord.contactsMap
- val homeContactMap = contactsMap.get("HOME")
- val homeContact = Row(homeContactMap.contactType,
- homeContactMap.areaCode,
- homeContactMap.number)
- val workContactMap = contactsMap.get("WORK")
- val workContact = Row(workContactMap.contactType,
- workContactMap.areaCode,
- workContactMap.number)
- val contacts = Row(homeContact,workContact)
- Row(uuid, timestamp, name, contacts)
- }
- )
- //Here I define the schema
- val schema = new StructType()
- .add("uuid",StringType)
- .add("timestamp", StringType)
- .add("name", new StructType()
- .add("firstName",StringType)
- .add("middleName",StringType)
- .add("lastName",StringType)
- .add("contacts", new StructType(
- Array(
- StructField("contactType", StringType),
- StructField("areaCode", StringType),
- StructField("number", StringType)
- )))
- //Now I try to create a Dataframe using the RDD[Row] and the schema
- val dataFrame = sqlContext.createDataFrame(rowRDD , schema)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement