Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- #amount of trains in each station
- from plotly.offline import plot
- from plotly.graph_objs import *
- x = trips.toPandas()['trip_headsign']
- data = Data([Histogram(x=x)])
- p = plot(data, filename = 'basic_histogram', output_type = 'div')
- displayHTML(p)
- # Graph of amount of stops for each train route
- from plotly.offline import plot
- from plotly.graph_objs import *
- # Create random data with numpy
- import numpy as np
- # Create a trace
- trace = go.Scatter(x = lineinfo.toPandas()['vehicle_id'], y = lineinfo.toPandas()['nr_of_stops'],mode = 'markers')
- data = [trace]
- # Plot and embed in ipython notebook!
- p = plot(data, filename = 'basic_histogram', output_type = 'div')
- displayHTML(p)
- #graph of arrival delays per station (boxplot)
- from plotly.offline import plot
- import plotly.graph_objs as go
- X = trip_updates.toPandas()['arrival_delay'] / 60
- data = [go.Box(y = X, x =trip_updates.toPandas()['label'])]
- p = plot(data, filename='Boxplots_arrival_delay',output_type='div', validate = False)
- displayHTML(p)
- # graph of departure delays per station (boxplot)
- from plotly.offline import plot
- import plotly.graph_objs as go
- X = trip_updates.toPandas()['departure_delay'] / 60
- data = [go.Box(y = X, x =trip_updates.toPandas()['label'])]
- p = plot(data, filename='Boxplots_departure_delay',output_type='div', validate = False)
- displayHTML(p)
- # graph of arrival delays per train type
- from pyspark.sql.functions import udf
- from pyspark.sql.types import StringType, FloatType, IntegerType
- import re
- import time
- from plotly.offline import plot
- import plotly.graph_objs as go
- trainType = udf(lambda x: re.findall("[a-zA-Z]+", x)[0], StringType())
- trip_updates = trip_updates.withColumn('train_type', trainType("id"))
- X = trip_updates.toPandas()['arrival_delay'] / 60
- data = [go.Box(y = X, x =trip_updates.toPandas()['train_type'])]
- p = plot(data, filename='Boxplots_arrival_delay',output_type='div', validate = False)
- displayHTML(p)
- # Boxplot of train delays per train type
- X = trip_updates.toPandas()['departure_delay'] / 60
- data = [go.Box(y = X, x =trip_updates.toPandas()['train_type'])]
- p = plot(data, filename='Boxplots_arrival_delay',output_type='div', validate = False)
- displayHTML(p)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement