Advertisement
Ladies_Man

#HADOOP Lab6 (HBase) FillTable

Jan 25th, 2016
126
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Java 5.29 KB | None | 0 0
  1. import org.apache.hadoop.conf.Configuration;
  2. import org.apache.hadoop.hbase.HBaseConfiguration;
  3. import org.apache.hadoop.hbase.TableName;
  4. import org.apache.hadoop.hbase.client.*;
  5. import org.apache.hadoop.hbase.util.Bytes;
  6.  
  7. import java.io.BufferedReader;
  8. import java.io.FileReader;
  9. import java.io.IOException;
  10.  
  11. public class FillTable {
  12.     //col family
  13.     private static final byte[] CF = "data".getBytes();
  14.     //cols
  15.     private static final byte[] ATTR0_YEAR = "year".getBytes();
  16.     private static final byte[] ATTR1_QUARTER = "quarter".getBytes();
  17.     private static final byte[] ATTR2_MONTH = "month".getBytes();
  18.     private static final byte[] ATTR3_DAY_OF_MONTH = "day_of_month".getBytes();
  19.     private static final byte[] ATTR4_DAY_OF_WEEK = "day_of_week".getBytes();
  20.     private static final byte[] ATTR5_FL_DATE = "fl_date".getBytes();
  21.     private static final int FL_DATE = 5;
  22.     private static final byte[] ATTR6_UNIQUE_CARRIER = "unique_carrier".getBytes();
  23.     private static final byte[] ATTR7_AIRLINE_ID = "airline_id".getBytes();
  24.     private static final int AIRLINE_ID = 7;
  25.     private static final byte[] ATTR8_CARRIER = "carrier".getBytes();
  26.     private static final byte[] ATTR9_TAIL_NUM = "tail_num".getBytes();
  27.     private static final byte[] ATTR10_FL_NUM = "fl_num".getBytes();
  28.     private static final byte[] ATTR11_ORIGIN_AIRPORT_ID = "origin_airport_id".getBytes();
  29.     private static final byte[] ATTR12_ORIGIN_AIRPORT_SEQ_ID = "origin_airport_seq_id".getBytes();
  30.     private static final byte[] ATTR13_ORIGIN_CITY_MARKET_ID = "origin_city_market_id".getBytes();
  31.     private static final byte[] ATTR14_DEST_AIRPORT_ID = "dest_airport_id".getBytes();
  32.     private static final byte[] ATTR15_WHEELS_ON = "wheels_on".getBytes();
  33.     private static final byte[] ATTR16_ARR_TIME = "arr_time".getBytes();
  34.     private static final byte[] ATTR17_ARR_DELAY = "arr_delay".getBytes();
  35.     private static final byte[] ATTR18_ARR_DELAY_NEW = "arr_delay_new".getBytes();
  36.     private static final byte[] ATTR19_CANCELLED = "cancelled".getBytes();
  37.     private static final byte[] ATTR20_CANCELLATION_CODE = "cancellation_code".getBytes();
  38.     private static final byte[] ATTR21_AIR_TIME = "air_time".getBytes();
  39.     private static final byte[] ATTR22_DISTANCE = "distance".getBytes();
  40.  
  41.  
  42.     public static void main(String[] args) throws IOException {
  43.  
  44.         Configuration config = HBaseConfiguration.create();
  45.         config.set("hbase.zookeper.quorum", "localhost");
  46.  
  47.         //hbase> create "flights", "data"
  48.         Connection connection = ConnectionFactory.createConnection(config);
  49.         Table table = connection.getTable(TableName.valueOf("flights"));
  50.  
  51.         String data_path = "/home/anthony/hadlab6/664600583_T_ONTIME_sample.csv";
  52.         BufferedReader reader = new BufferedReader(new FileReader(data_path));
  53.  
  54.         int row_num = 0;
  55.         while (true) {
  56.  
  57.             String line = reader.readLine();
  58.             if (null == line) {
  59.                 break;
  60.             }
  61.  
  62.             String[] columns = line.replace("\"", "").split(",");
  63.  
  64.             if (!columns[0].equals("YEAR")) {
  65.  
  66.                 Put put = new Put(Bytes.toBytes(
  67.                         columns[FL_DATE] + "_" + columns[AIRLINE_ID] + "_" + row_num));
  68.  
  69.                 put.addColumn(CF, ATTR0_YEAR, Bytes.toBytes(columns[0]));
  70.                 put.addColumn(CF, ATTR1_QUARTER, Bytes.toBytes(columns[1]));
  71.                 put.addColumn(CF, ATTR2_MONTH, Bytes.toBytes(columns[2]));
  72.                 put.addColumn(CF, ATTR3_DAY_OF_MONTH, Bytes.toBytes(columns[3]));
  73.                 put.addColumn(CF, ATTR4_DAY_OF_WEEK, Bytes.toBytes(columns[4]));
  74.                 put.addColumn(CF, ATTR5_FL_DATE, Bytes.toBytes(columns[5]));
  75.                 put.addColumn(CF, ATTR6_UNIQUE_CARRIER, Bytes.toBytes(columns[6]));
  76.                 put.addColumn(CF, ATTR7_AIRLINE_ID, Bytes.toBytes(columns[7]));
  77.                 put.addColumn(CF, ATTR8_CARRIER, Bytes.toBytes(columns[8]));
  78.                 put.addColumn(CF, ATTR9_TAIL_NUM, Bytes.toBytes(columns[9]));
  79.                 put.addColumn(CF, ATTR10_FL_NUM, Bytes.toBytes(columns[10]));
  80.                 put.addColumn(CF, ATTR11_ORIGIN_AIRPORT_ID, Bytes.toBytes(columns[11]));
  81.                 put.addColumn(CF, ATTR12_ORIGIN_AIRPORT_SEQ_ID, Bytes.toBytes(columns[12]));
  82.                 put.addColumn(CF, ATTR13_ORIGIN_CITY_MARKET_ID, Bytes.toBytes(columns[13]));
  83.                 put.addColumn(CF, ATTR14_DEST_AIRPORT_ID, Bytes.toBytes(columns[14]));
  84.                 put.addColumn(CF, ATTR15_WHEELS_ON, Bytes.toBytes(columns[15]));
  85.                 put.addColumn(CF, ATTR16_ARR_TIME, Bytes.toBytes(columns[16]));
  86.                 put.addColumn(CF, ATTR17_ARR_DELAY, Bytes.toBytes(columns[17]));
  87.                 put.addColumn(CF, ATTR18_ARR_DELAY_NEW, Bytes.toBytes(columns[18]));
  88.                 put.addColumn(CF, ATTR19_CANCELLED, Bytes.toBytes(columns[19]));
  89.                 put.addColumn(CF, ATTR20_CANCELLATION_CODE, Bytes.toBytes(columns[20]));
  90.                 put.addColumn(CF, ATTR21_AIR_TIME, Bytes.toBytes(columns[21]));
  91.                 put.addColumn(CF, ATTR22_DISTANCE, Bytes.toBytes(columns[22]));
  92.  
  93.                 table.put(put);
  94.                 row_num++;
  95.             }
  96.         }
  97.  
  98.         reader.close();
  99.  
  100.         table.close();
  101.  
  102.         connection.close();
  103.  
  104.  
  105.     }
  106.  
  107. }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement