Advertisement
Guest User

Untitled

a guest
May 24th, 2018
102
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.59 KB | None | 0 0
  1. cofrom __future__ import print_function
  2. from __future__ import division
  3. from __future__ import absolute_import
  4.  
  5. # We're using pandas to read the CSV file. This is easy for small datasets, but for large and complex datasets,
  6. # tensorflow parsing and processing functions are more powerful.
  7. import pandas as pd
  8. import numpy as np
  9.  
  10. import tensorflow as tf
  11. import glob
  12. #import cloudstorage as gcs
  13. import os
  14. from numpy import nan
  15.  
  16. #from google.appengine.api import app_identity
  17.  
  18. # We removed the csv header as part of the ETL proces so we'll define them here.
  19. names = [
  20.     'FL_DATE',
  21.     'UNIQUE_CARRIER',
  22.     'AIRLINE_ID',
  23.     'CARRIER',
  24.     'FL_NUM',
  25.     'ORIGIN_AIRPORT_ID',
  26.     'ORIGIN_AIRPORT_SEQ_ID',
  27.     'ORIGIN_CITY_MARKET_ID',
  28.     'ORIGIN',
  29.     'DEST_AIRPORT_ID',
  30.     'DEST_AIRPORT_SEQ_ID',
  31.     'DEST_CITY_MARKET_ID',
  32.     'DEST',
  33.     'CRS_DEP_TIME',
  34.     'DEP_TIME',
  35.     'DEP_DELAY',
  36.     'TAXI_OUT',
  37.     'WHEELS_OFF',
  38.     'WHEELS_ON',
  39.     'TAXI_IN',
  40.     'CRS_ARR_TIME',
  41.     'ARR_TIME',
  42.     'ARR_DELAY',
  43.     'CANCELLED',
  44.     'CANCELLATION_CODE',
  45.     'DIVERTED',
  46.     'DISTANCE'
  47. ]
  48.  
  49. # Here we'll specify the dtypes.
  50. dtypes = {
  51.     'FL_DATE': str,
  52.     'UNIQUE_CARRIER': str,
  53.     'AIRLINE_ID': np.float64,
  54.     'CARRIER': str,
  55.     'FL_NUM': np.float32,
  56.     'ORIGIN_AIRPORT_ID': np.float32,
  57.     'ORIGIN_AIRPORT_SEQ_ID': np.float32,
  58.     'ORIGIN_CITY_MARKET_ID': np.float32,
  59.     'ORIGIN': str,
  60.     'DEST_AIRPORT_ID': np.float32,
  61.     'DEST_AIRPORT_SEQ_ID': np.float32,
  62.     'DEST_CITY_MARKET_ID': np.float32,
  63.     'DEST': str,
  64.     'CRS_DEP_TIME': np.float32,
  65.     'DEP_TIME': np.float32,
  66.     'DEP_DELAY': np.float32,
  67.     'TAXI_OUT': np.float32,
  68.     'WHEELS_OFF': np.float32,
  69.     'WHEELS_ON': np.float32,
  70.     'TAXI_IN': np.float32,
  71.     'CRS_ARR_TIME': np.float32,
  72.     'ARR_TIME': np.float32,
  73.     'ARR_DELAY': np.float32,
  74.     'CANCELLED': np.float32,
  75.     'CANCELLATION_CODE': str,
  76.     'DIVERTED': np.float32,
  77.     'DISTANCE': np.float32,
  78. }
  79.  
  80. path = '201701.csv' # use your path
  81. # allFiles = glob.glob(path + "01/*.csv")
  82. # print("printing allfiles")
  83. # print(allFiles)
  84. # frame = pd.DataFrame()
  85. # list_ = []
  86. # for file_ in allFiles:
  87. #     print(file_)
  88. #     df = pd.read_csv(file_,index_col=None, header=0)
  89. #     list_.append(df)
  90. # frame = pd.concat(list_)
  91.  
  92. # Read the file.
  93.  
  94. #df = pd.concat([pd.read_csv(f) for f in glob.glob(path +'*.csv')], names=names, ignore_index = True)
  95. df = pd.read_csv(path, header=1, names=names, dtype=dtypes, na_values='?')
  96. df.fillna("0", inplace=True)
  97. df.info()
  98.  
  99. # Split the data into a training set and an eval set.
  100.  
  101. training_data = df.loc[4:8]
  102. training_data.index
  103. eval_data = df.iloc[1]
  104. test_data = df.iloc[:10]
  105.  
  106. print("training data print")
  107. print(training_data)
  108.  
  109. #training_data, training_label = df, df.pop(ARR_TIME)
  110. training_label = df.loc[4:8,['FL_NUM']]
  111. eval_label = eval_data.pop('FL_NUM')
  112. test_label = test_data.pop('FL_NUM')
  113.  
  114. training_input_fn = tf.estimator.inputs.pandas_input_fn(x=training_data, y=training_label, shuffle=True, num_epochs=None)
  115.  
  116. #eval_input_fn = tf.estimator.inputs.pandas_input_fn(x=eval_data, y=eval_label, batch_size=64, shuffle=False)
  117.  
  118. #test_input_fn = tf.estimator.inputs.pandas_input_fn(x=test_data, y=test_label, batch_size=10, shuffle=False)
  119.  
  120. #Feature columns
  121. carrier = tf.feature_column.categorical_column_with_vocabulary_list(key='CARRIER', vocabulary_list=['WN', 'OO', 'NK', 'AA', 'DL', 'UA'])
  122. distance = tf.feature_column.numeric_column(key='DISTANCE')
  123. dep_delay = tf.feature_column.numeric_column(key='DEP_DELAY')
  124.  
  125. #Linear Regressor
  126.  
  127. linear_features = [carrier, distance, dep_delay]
  128. regressor = tf.estimator.LinearRegressor(feature_columns=linear_features)
  129. regressor.train(input_fn=training_input_fn, steps=10000)
  130. #regressor.evaluate(input_fn=eval_input_fn)
  131.  
  132. #Deep Neural Network
  133.  
  134. dnn_features = [
  135.     #numerical features
  136.     distance, fl_num, dep_delay,
  137.     # densify categorical features:
  138.     tf.feature_column.indicator_column(carrier),
  139. ]
  140.  
  141. dnnregressor = tf.contrib.learn.DNNRegressor(feature_columns=dnn_features, hidden_units=[50, 30, 10])
  142. dnnregressor.fit(input_fn=training_input_fn, steps=10000)
  143. dnnregressor.evaluate(input_fn=eval_input_fn)
  144.  
  145. #Predict
  146.  
  147. predictions = list(dnnregressor.predict_scores(input_fn=training_input_fn))
  148. print(predictions)
  149.  
  150. #predictionsLarge = list(dnnregressor.predict_scores(input_fn=eval_input_fn))
  151. #print(predictionsLarge)
  152.  
  153. #predictionsLinear = list(regressor.predict_scores(input_fn=test_input_fn))
  154. #print(predictionsLinear)
  155.  
  156. #predictionsLinearLarge = list(regressor.predict_scores(input_fn=eval_input_fn))
  157. #print(predictionsLinearLarge)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement