Advertisement
Guest User

Untitled

a guest
Apr 17th, 2015
199
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. # Specify server locations in a SOLR_LOCATOR variable; used later in
  2. # variable substitutions:
  3. SOLR_LOCATOR : {
  4.   # Name of solr collection
  5.   collection : IDEAL_tweets
  6.  
  7.   # ZooKeeper ensemble
  8.   zkHost : "127.0.0.1:2181/solr"  
  9. }
  10. morphlines : [
  11.   {
  12.     id : morphline1
  13.     importCommands : ["org.kitesdk.morphline.**", "com.ngdata.**", "com.cloudera.cdk.morphline.**", "org.apache.solr.**"]
  14.  
  15.     commands : [
  16.       {
  17.         extractHBaseCells {
  18.           mappings : [
  19.             {
  20.               inputColumn : "original:tweet_id"
  21.               outputField : "id"
  22.               type : string
  23.               source : value
  24.             }
  25.     {
  26.               inputColumn : "original:text_clean"
  27.               outputField : "text"
  28.               type : string
  29.               source : value
  30.             }
  31.           ]
  32.           }
  33.        }
  34.    
  35.       # This command deletes record fields that are unknown to Solr
  36.       # schema.xml. Solr throws an exception on any attempt to load a
  37.       # document that contains a field that is not specified in schema.xml.
  38.       {
  39.         sanitizeUnknownSolrFields {
  40.           # Location from which to fetch Solr schema
  41.           solrLocator : ${SOLR_LOCATOR}
  42.         }
  43.       }
  44.       # convert timestamp field to native Solr timestamp format
  45.       # such as 2012-09-06T07:14:34Z to 2012-09-06T07:14:34.000Z
  46.       {
  47.         convertTimestamp {
  48.           field : created_at
  49.           inputFormats : ["yyyy-MM-dd'T'HH:mm:ss'Z'", "yyyy-MM-dd"]
  50.           inputTimezone : UTC
  51.           outputFormat : "yyyy-MM-dd'T'HH:mm:ss.SSS'Z'"                                
  52.           outputTimezone : UTC
  53.         }
  54.       }
  55.       {
  56.           split {
  57.               inputField : "description"
  58.               outputField : "title"
  59.               separator : "|"
  60.            }
  61.        }
  62.          {
  63.           logTrace {
  64.                   format : "output record: {}", args : ["@{}"]
  65.            }
  66.        }
  67.     ]
  68.   }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement