Guest User

Ds

a guest
Jan 21st, 2020
160
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 56.20 KB | None | 0 0
  1. Aim: - Creating data model using Cassandra
  2.  
  3. Step 1: - run ‘cassandra.bat’ file as administrator to start the Cassandra server
  4.  
  5. Step 2: - Double click to execute the ‘cqlsh’ file in Python
  6.  
  7. Step 3: - Create a Data model as following:
  8.  
  9. cqlsh> create keyspace key_st_041 WITH replication={'class':'SimpleStrategy','replication_factor':3};
  10. cqlsh> use key_st_041;
  11. cqlsh:key_st_041> create table dept(dept_id int PRIMARY KEY,dept_name text,dept_location text);
  12. cqlsh:key_st_041> create table emp(emp_id int PRIMARY KEY,emp_name text,dept_id int,emp_email text,emp_phone text);
  13. cqlsh:key_st_041> Insert into dept (dept_id,dept_name,dept_location) values (1001,'Accounts','Mumbai');
  14. cqlsh:key_st_041> Insert into dept (dept_id,dept_name,dept_location) values (1002,'Marketing','Chennai');
  15. cqlsh:key_st_041> Insert into dept (dept_id,dept_name,dept_location) values (1002,'HR','Banglore');
  16. cqlsh:key_st_041> Insert into emp (emp_id,emp_name,dept_id,emp_email,emp_phone) values (1001,'Siddhi Thakkar',1001,'s@email.com','1234567890');
  17. cqlsh:key_st_041> Insert into emp (emp_id,emp_name,dept_id,emp_email,emp_phone) values (1002,'Riddhi Thakkar',1002,'r@email.com','8796541230');
  18. cqlsh:key_st_041> Insert into emp (emp_id,emp_name,dept_id,emp_email,emp_phone) values (1003,'Ruchi Thakkar',1003,'rt@email.com','8796541889');
  19. cqlsh:key_st_041>
  20. cqlsh:key_st_041> select * from emp;
  21.  
  22. emp_id | dept_id | emp_email | emp_name | emp_phone
  23. --------+---------+--------------+----------------+------------
  24. 1001 | 1001 | s@email.com | Siddhi Thakkar | 1234567890
  25. 1003 | 1003 | rt@email.com | Ruchi Thakkar | 8796541889
  26. 1002 | 1002 | r@email.com | Riddhi Thakkar | 8796541230
  27.  
  28. (3 rows)
  29. cqlsh:key_st_041> select * from dept;
  30.  
  31. dept_id | dept_location | dept_name
  32. ---------+---------------+-----------
  33. 1001 | Mumbai | Accounts
  34. 1002 | Banglore | HR
  35.  
  36. (2 rows)
  37. cqlsh:key_st_041> update dept set dept_name='Human Resources' where dept_id=1002;
  38. cqlsh:key_st_041> select * from dept;
  39.  
  40. dept_id | dept_location | dept_name
  41. ---------+---------------+-----------------
  42. 1001 | Mumbai | Accounts
  43. 1002 | Banglore | Human Resources
  44.  
  45. (2 rows)
  46. cqlsh:key_st_041> delete from emp where emp_id=1003;
  47. cqlsh:key_st_041> select * from emp;
  48.  
  49. emp_id | dept_id | emp_email | emp_name | emp_phone
  50. --------+---------+-------------+----------------+------------
  51. 1001 | 1001 | s@email.com | Siddhi Thakkar | 1234567890
  52. 1002 | 1002 | r@email.com | Riddhi Thakkar | 8796541230
  53.  
  54. (2 rows)
  55. cqlsh:key_st_041> alter table emp add emp_location text;
  56. cqlsh:key_st_041> select * from emp;
  57.  
  58. emp_id | dept_id | emp_email | emp_location | emp_name | emp_phone
  59. --------+---------+-------------+--------------+----------------+------------
  60. 1001 | 1001 | s@email.com | null | Siddhi Thakkar | 1234567890
  61. 1002 | 1002 | r@email.com | null | Riddhi Thakkar | 8796541230
  62.  
  63. (2 rows)
  64. cqlsh:key_st_041> alter table emp drop emp_location text;
  65. SyntaxException: line 1:34 mismatched input 'text' expecting EOF (alter table emp drop emp_location [text]...)
  66. cqlsh:key_st_041> alter table emp drop emp_location;
  67. cqlsh:key_st_041> select * from emp;
  68.  
  69. emp_id | dept_id | emp_email | emp_name | emp_phone
  70. --------+---------+-------------+----------------+------------
  71. 1001 | 1001 | s@email.com | Siddhi Thakkar | 1234567890
  72. 1002 | 1002 | r@email.com | Riddhi Thakkar | 8796541230
  73.  
  74. (2 rows)
  75. cqlsh:key_st_041>
  76.  
  77.  
  78. ------------------------------------------------------------------------------------------
  79. A. Text delimited CSVto HORUS format.
  80.  
  81. Code:
  82. import panda as pd
  83. InputData=pd.read_csv(r'Desktop\Country.csv',encoding="latin-1")
  84. print('Input Data Values ===================================')
  85. print(InputData)
  86. ProcessData.rename(columns={'Year': 'year'}, inplace=True)
  87. ProcessData.rename(columns={'ccTLD': 'cctld'}, inplace=True)
  88. ProcessData = InputData
  89. ProcessData.to_csv(r"E:\country2.csv")
  90. ----------------------------------------------------------------------------
  91.  
  92. B. XML to HORUS Format
  93.  
  94. Code:
  95.  
  96. # Utility Start XML to HORUS =================================
  97. # Standard Tools
  98. import pandas as pd
  99. import xml.etree.ElementTree as ET
  100. def df2xml(data):
  101. header = data.columns
  102. root = ET.Element('root')
  103. for row in range(data.shape[0]):
  104. entry = ET.SubElement(root,'entry')
  105. for index in range(data.shape[1]):
  106. schild=str(header[index])
  107. child = ET.SubElement(entry, schild)
  108. if str(data[schild][row]) != 'nan':
  109. child.text = str(data[schild][row])
  110. else:
  111. child.text = 'n/a'
  112. entry.append(child)
  113. result = ET.tostring(root)
  114. return result
  115. def xml2df(xml_data):
  116. root = ET.XML(xml_data)
  117. all_records = []
  118. for i, child in enumerate(root):
  119. record = {}
  120. for subchild in child:
  121. record[subchild.tag] = subchild.text
  122. all_records.append(record)
  123. return pd.DataFrame(all_records)
  124. sInputFileName='C:/VKHCG/05-DS/9999-Data/Country_Code.xml'
  125. InputData = open(sInputFileName).read()
  126. print('=====================================================')
  127. print('Input Data Values ===================================')
  128. print('=====================================================')
  129. print(InputData)
  130. print('=====================================================')
  131. ProcessDataXML=InputData
  132. # XML to Data Frame
  133. ProcessData=xml2df(ProcessDataXML)
  134. # Remove columns ISO-2-Code and ISO-3-CODE
  135. ProcessData.drop('ISO-2-CODE', axis=1,inplace=True)
  136. ProcessData.drop('ISO-3-Code', axis=1,inplace=True)
  137. # Rename Country and ISO-M49
  138. ProcessData.rename(columns={'Country': 'CountryName'}, inplace=True)
  139. ProcessData.rename(columns={'ISO-M49': 'CountryNumber'}, inplace=True)
  140. # Set new Index
  141. ProcessData.set_index('CountryNumber', inplace=True)
  142. # Sort data by CurrencyNumber
  143. ProcessData.sort_values('CountryName', axis=0, ascending=False, inplace=True)
  144. print('=====================================================')
  145. print('Process Data Values =================================')
  146. print('=====================================================')
  147. print(ProcessData)
  148. print('=====================================================')
  149. OutputData=ProcessData
  150. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-XML-Country.csv'
  151. OutputData.to_csv(sOutputFileName, index = False)
  152. print('=====================================================')
  153. print('XML to HORUS - Done')
  154. print('=====================================================')
  155. -------------------------------------------------------------------
  156.  
  157. C. JSON to HORUS Format
  158.  
  159. Code:
  160.  
  161. # Utility Start JSON to HORUS =================================
  162. # Standard Tools
  163. #=============================================================
  164. import pandas as pd
  165. # Input Agreement ============================================
  166. sInputFileName='C:/VKHCG/05-DS/9999-Data/Country_Code.json'
  167. InputData=pd.read_json(sInputFileName, orient='index', encoding="latin-1")
  168. print('Input Data Values ===================================')
  169. print(InputData)
  170. print('=====================================================')
  171. # Processing Rules ===========================================
  172. ProcessData=InputData
  173. # Remove columns ISO-2-Code and ISO-3-CODE
  174. ProcessData.drop('ISO-2-CODE', axis=1,inplace=True)
  175. ProcessData.drop('ISO-3-Code', axis=1,inplace=True)
  176. # Rename Country and ISO-M49
  177. ProcessData.rename(columns={'Country': 'CountryName'}, inplace=True)
  178. ProcessData.rename(columns={'ISO-M49': 'CountryNumber'}, inplace=True)
  179. # Set new Index
  180. ProcessData.set_index('CountryNumber', inplace=True)
  181. # Sort data by CurrencyNumber
  182. ProcessData.sort_values('CountryName', axis=0, ascending=False, inplace=True)
  183. print('Process Data Values =================================')
  184. print(ProcessData)
  185. print('=====================================================')
  186. # Output Agreement ===========================================
  187. OutputData=ProcessData
  188. sOutputFileName='c:/VKHCG/05-DS/9999-Data/HORUS-JSON-Country.csv'
  189. OutputData.to_csv(sOutputFileName, index = False)
  190. print('JSON to HORUS - Done')
  191. -------------------------------------------------------------------
  192.  
  193.  
  194. D. MySql Database to HORUS Format
  195.  
  196. Code:
  197.  
  198. import pandas as pd
  199. import sqlite3 as sq
  200.  
  201. sInputFileName='C:/VKHCG/05-DS/9999-Data/utility.db'
  202. sInputTable='Country_Code'
  203. conn = sq.connect(sInputFileName)
  204. sSQL='select * FROM ' + sInputTable + ';'
  205. InputData=pd.read_sql_query(sSQL, conn)
  206. print('Input Data Values ===================================')
  207. print(InputData)
  208. print('=====================================================')
  209. ProcessData=InputData
  210.  
  211. ProcessData.drop('ISO-2-CODE', axis=1,inplace=True)
  212. ProcessData.drop('ISO-3-Code', axis=1,inplace=True)
  213.  
  214. ProcessData.rename(columns={'Country': 'CountryName'}, inplace=True)
  215. ProcessData.rename(columns={'ISO-M49': 'CountryNumber'}, inplace=True)
  216.  
  217. ProcessData.set_index('CountryNumber', inplace=True)
  218. ProcessData.sort_values('CountryName', axis=0, ascending=False, inplace=True)
  219. print('Process Data Values =================================')
  220. print(ProcessData)
  221. print('=====================================================')
  222.  
  223. OutputData=ProcessData
  224. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-CSV-Country.csv'
  225. OutputData.to_csv(sOutputFileName, index = False)
  226. print('Database to HORUS - Done')
  227. -------------------------------------------------------------------
  228.  
  229. E. Picture (JPEG) to HORUS Format (Use SPYDER to run this program)
  230.  
  231. Code:
  232.  
  233. from scipy.misc import imread
  234. import pandas as pd
  235. import matplotlib.pyplot as plt
  236. import numpy as np
  237.  
  238. sInputFileName='C:/VKHCG/05-DS/9999-Data/Angus.jpg'
  239. InputData = imread(sInputFileName, flatten=False, mode='RGBA')
  240. print('Input Data Values ===================================')
  241. print('X: ',InputData.shape[0])
  242. print('Y: ',InputData.shape[1])
  243. print('RGBA: ', InputData.shape[2])
  244. print('=====================================================')
  245.  
  246. ProcessRawData=InputData.flatten()
  247. y=InputData.shape[2] + 2
  248. x=int(ProcessRawData.shape[0]/y)
  249. ProcessData=pd.DataFrame(np.reshape(ProcessRawData, (x, y)))
  250. sColumns= ['XAxis','YAxis','Red', 'Green', 'Blue','Alpha']
  251. ProcessData.columns=sColumns
  252. ProcessData.index.names =['ID']
  253. print('Rows: ',ProcessData.shape[0])
  254. print('Columns :',ProcessData.shape[1])
  255. print('=====================================================')
  256. print('Process Data Values =================================')
  257. print('=====================================================')
  258. plt.imshow(InputData)
  259. plt.show()
  260. print('=====================================================')
  261.  
  262. OutputData=ProcessData
  263. print('Storing File')
  264. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-Picture.csv'
  265. OutputData.to_csv(sOutputFileName, index = False)
  266. print('=====================================================')
  267. print('Picture to HORUS - Done')
  268. print('=====================================================')
  269. -------------------------------------------------------------------
  270.  
  271.  
  272. F. Video to HORUS Format
  273.  
  274. Code:
  275.  
  276. Movie to Frames
  277. import os
  278. import shutil
  279. import cv2
  280.  
  281. sInputFileName='C:/VKHCG/05-DS/9999-Data/dog.mp4'
  282. sDataBaseDir='C:/VKHCG/05-DS/9999-Data/temp'
  283. if os.path.exists(sDataBaseDir):
  284. shutil.rmtree(sDataBaseDir)
  285. if not os.path.exists(sDataBaseDir):
  286. os.makedirs(sDataBaseDir)
  287. print('=====================================================')
  288. print('Start Movie to Frames')
  289. print('=====================================================')
  290. vidcap = cv2.VideoCapture(sInputFileName)
  291. success,image = vidcap.read()
  292. count = 0
  293. while success:
  294. success,image = vidcap.read()
  295. sFrame=sDataBaseDir + str('/dog-frame-' + str(format(count, '04d'))+ '.jpg')
  296. print('Extracted: ', sFrame)
  297. cv2.imwrite(sFrame, image)
  298. if os.path.getsize(sFrame) == 0:
  299. count += -1
  300. os.remove(sFrame)
  301. print('Removed: ', sFrame)
  302. if cv2.waitKey(10) == 27: # exit if Escape is hit
  303. break
  304. count += 1
  305. print('=====================================================')
  306. print('Generated : ', count, ' Frames')
  307. print('=====================================================')
  308. print('Movie to Frames HORUS - Done')
  309. print('=====================================================')
  310. -------------------------------------------------------------------
  311.  
  312.  
  313. Frames to Horus (Use SPYDER to run this program)
  314.  
  315. from scipy.misc import imread
  316. import pandas as pd
  317. import matplotlib.pyplot as plt
  318. import numpy as np
  319. import os
  320. # Input Agreement ============================================
  321. sDataBaseDir='C:/VKHCG/05-DS/9999-Data/temp'
  322. f=0
  323. for file in os.listdir(sDataBaseDir):
  324. if file.endswith(".jpg"):
  325. f += 1
  326. sInputFileName=os.path.join(sDataBaseDir, file)
  327. print('Process : ', sInputFileName)
  328. InputData = imread(sInputFileName, flatten=False, mode='RGBA')
  329. print('Input Data Values ===================================')
  330. print('X: ',InputData.shape[0])
  331. print('Y: ',InputData.shape[1])
  332. print('RGBA: ', InputData.shape[2])
  333. print('=====================================================')
  334. # Processing Rules ===========================================
  335. ProcessRawData=InputData.flatten()
  336. y=InputData.shape[2] + 2
  337. x=int(ProcessRawData.shape[0]/y)
  338. ProcessFrameData=pd.DataFrame(np.reshape(ProcessRawData, (x, y)))
  339. ProcessFrameData['Frame']=file
  340. print('=====================================================')
  341. print('Process Data Values =================================')
  342. print('=====================================================')
  343. plt.imshow(InputData)
  344. plt.show()
  345. if f == 1:
  346. ProcessData=ProcessFrameData
  347. else:
  348. ProcessData=ProcessData.append(ProcessFrameData)
  349. if f > 0:
  350. sColumns= ['XAxis','YAxis','Red', 'Green', 'Blue','Alpha','FrameName']
  351. ProcessData.columns=sColumns
  352. print('=====================================================')
  353. ProcessFrameData.index.names =['ID']
  354. print('Rows: ',ProcessData.shape[0])
  355. print('Columns :',ProcessData.shape[1])
  356. print('=====================================================')
  357. # Output Agreement ===========================================
  358. OutputData=ProcessData
  359. print('Storing File')
  360. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-Movie-Frame.csv'
  361. OutputData.to_csv(sOutputFileName, index = False)
  362. print('=====================================================')
  363. print('Processed ; ', f,' frames')
  364. PSIT1P2 ~~~~~ Data Science Practical
  365. M. Sc. [Information Technology] SEMESTER ~ I Teacher’s Reference Manual
  366. 21
  367. print('=====================================================')
  368. print('Movie to HORUS - Done')
  369. print('=====================================================')
  370. -------------------------------------------------------------------
  371.  
  372. G. Audio to HORUS Format
  373.  
  374. Code:
  375.  
  376. from scipy.io import wavfile
  377. import pandas as pd
  378. import matplotlib.pyplot as plt
  379. import numpy as np
  380. #=============================================================
  381. def show_info(aname, a,r):
  382. print ('----------------')
  383. print ("Audio:", aname)
  384. print ('----------------')
  385. print ("Rate:", r)
  386. print ('----------------')
  387. print ("shape:", a.shape)
  388. print ("dtype:", a.dtype)
  389. print ("min, max:", a.min(), a.max())
  390. print ('----------------')
  391. plot_info(aname, a,r)
  392. #=============================================================
  393. def plot_info(aname, a,r):
  394. sTitle= 'Signal Wave - '+ aname + ' at ' + str(r) + 'hz'
  395. plt.title(sTitle)
  396. sLegend=[]
  397. for c in range(a.shape[1]):
  398. sLabel = 'Ch' + str(c+1)
  399. sLegend=sLegend+[str(c+1)]
  400. plt.plot(a[:,c], label=sLabel)
  401. plt.legend(sLegend)
  402. plt.show()
  403. #=============================================================
  404. sInputFileName='C:/VKHCG/05-DS/9999-Data/2ch-sound.wav'
  405. print('=====================================================')
  406. print('Processing : ', sInputFileName)
  407. print('=====================================================')
  408. InputRate, InputData = wavfile.read(sInputFileName)
  409. show_info("2 channel", InputData,InputRate)
  410. ProcessData=pd.DataFrame(InputData)
  411. sColumns= ['Ch1','Ch2']
  412. ProcessData.columns=sColumns
  413. OutputData=ProcessData
  414. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-Audio-2ch.csv'
  415. OutputData.to_csv(sOutputFileName, index = False)
  416. #=============================================================
  417. sInputFileName='C:/VKHCG/05-DS/9999-Data/4ch-sound.wav'
  418. print('=====================================================')
  419. print('Processing : ', sInputFileName)
  420. print('=====================================================')
  421. InputRate, InputData = wavfile.read(sInputFileName)
  422. show_info("4 channel", InputData,InputRate)
  423. ProcessData=pd.DataFrame(InputData)
  424. sColumns= ['Ch1','Ch2','Ch3', 'Ch4']
  425. ProcessData.columns=sColumns
  426. OutputData=ProcessData
  427. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-Audio-4ch.csv'
  428. OutputData.to_csv(sOutputFileName, index = False)
  429. #=============================================================
  430. sInputFileName='C:/VKHCG/05-DS/9999-Data/6ch-sound.wav'
  431. print('=====================================================')
  432. print('Processing : ', sInputFileName)
  433. print('=====================================================')
  434. InputRate, InputData = wavfile.read(sInputFileName)
  435. show_info("6 channel", InputData,InputRate)
  436. ProcessData=pd.DataFrame(InputData)
  437. sColumns= ['Ch1','Ch2','Ch3', 'Ch4', 'Ch5','Ch6']
  438. ProcessData.columns=sColumns
  439. OutputData=ProcessData
  440. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-Audio-6ch.csv'
  441. OutputData.to_csv(sOutputFileName, index = False)
  442. #=============================================================
  443. sInputFileName='C:/VKHCG/05-DS/9999-Data/8ch-sound.wav'
  444. print('=====================================================')
  445. print('Processing : ', sInputFileName)
  446. print('=====================================================')
  447. InputRate, InputData = wavfile.read(sInputFileName)
  448. show_info("8 channel", InputData,InputRate)
  449. ProcessData=pd.DataFrame(InputData)
  450. sColumns= ['Ch1','Ch2','Ch3', 'Ch4', 'Ch5','Ch6','Ch7','Ch8']
  451. ProcessData.columns=sColumns
  452. OutputData=ProcessData
  453. sOutputFileName='C:/VKHCG/05-DS/9999-Data/HORUS-Audio-8ch.csv'
  454. OutputData.to_csv(sOutputFileName, index = False)
  455. print('=====================================================')
  456. print('Audio to HORUS - Done')
  457. ------------------------------------------------------------------
  458.  
  459.  
  460. Practical 3: Utilities and Auditing
  461.  
  462. A. Fixers Utilities:
  463. Fixers enable your solution to take your existing data and fix a specific quality issue.
  464.  
  465. import string
  466. import datetime as dt
  467. # 1 Removing leading or lagging spaces from a data entry
  468. print('#1 Removing leading or lagging spaces from a data entry');
  469. baddata = " Data Science with too many spaces is bad!!! "
  470. print('>',baddata,'<')
  471. cleandata=baddata.strip()
  472. print('>',cleandata,'<')
  473. *******************************************************
  474.  
  475. # 2 Removing nonprintable characters from a data entry
  476. print('#2 Removing nonprintable characters from a data entry')
  477. printable = set(string.printable)
  478. baddata = "Data\x00Science with\x02 funny characters is \x10bad!!!"
  479. cleandata=''.join(filter(lambda x: x in string.printable,baddata))
  480. print('Bad Data : ',baddata);
  481. print('Clean Data : ',cleandata)
  482. ***************************************************************
  483.  
  484. # 3 Reformatting data entry to match specific formatting criteria.
  485. # Convert YYYY/MM/DD to DD Month YYYY
  486. print('# 3 Reformatting data entry to match specific formatting criteria.')
  487. baddate = dt.date(2019, 10, 31)
  488. baddata=format(baddate,'%Y-%m-%d')
  489. gooddate = dt.datetime.strptime(baddata,'%Y-%m-%d')
  490. gooddata=format(gooddate,'%d %B %Y')
  491. print('Bad Data : ',baddata)
  492. print('Good Data : ',gooddata)
  493. -------------------------------------------------------------------
  494.  
  495. C. Averaging of Data
  496.  
  497. import pandas as pd
  498. sFileName=r"C:\Users\admin\Desktop\MSC(IT)-part1\IP_DATA_ALL.csv"
  499. print('Loading :',sFileName)
  500. IP_DATA_ALL=pd.read_csv(sFileName,header=0,low_memory=False,
  501. usecols=['Country','Place Name','Latitude','Longitude'], encoding="latin-1")
  502. IP_DATA_ALL.rename(columns={'Place Name': 'Place_Name'}, inplace=True)
  503. AllData=IP_DATA_ALL[['Country', 'Place_Name','Latitude']]
  504. print(AllData)
  505. MeanData=AllData.groupby(['Country', 'Place_Name'])['Latitude'].mean()
  506. print(MeanData)
  507. ---------------------------------------------------------------
  508.  
  509. B. Data Binning or Bucketing
  510.  
  511. import pandas as pd
  512. data = pd.read_csv(r"E:\Book1.csv")
  513. bins = [0,45,75,100]
  514. columns = ["not_satisfy","good","Excellent"]
  515. data1 = pd.cut(data['Marks'],bins,labels=columns)
  516. data1
  517.  
  518. #note: make csv file book1 name,marks column
  519. @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  520.  
  521. import numpy as np
  522. import matplotlib.mlab as mlab
  523. import matplotlib.pyplot as plt
  524. import scipy.stats as stats
  525. np.random.seed(0)
  526. # example data
  527. mu = 90 # mean of distribution
  528. sigma = 25 # standard deviation of distribution
  529. x = mu + sigma * np.random.randn(5000)
  530. num_bins = 25
  531. fig, ax = plt.subplots()
  532. # the histogram of the data
  533. n, bins, patches = ax.hist(x, num_bins, density=1)
  534. # add a 'best fit' line
  535. y = stats.norm.pdf(bins, mu, sigma)
  536. # mlab.normpdf(bins, mu, sigma)
  537. ax.plot(bins, y, '--')
  538. ax.set_xlabel('Example Data')
  539. ax.set_ylabel('Probability density')
  540. sTitle=r'Histogram ' + str(len(x)) + ' entries into ' + str(num_bins) + ' Bins: $\mu=' + str(mu) + '$, $\sigma=' +
  541. str(sigma) + '$'
  542. ax.set_title(sTitle)
  543. fig.tight_layout()
  544. sPathFig='C:/VKHCG/05-DS/4000-UL/0200-DU/DU-Histogram.png'
  545. fig.savefig(sPathFig)
  546. plt.show()
  547. -------------------------------------------------------------------
  548.  
  549. D. Outlier Detection
  550.  
  551. C:\VKHCG\05-DS\4000-UL\0200-DU\DU-Outliers.py
  552. Code:
  553. ################################################################
  554.  
  555.  
  556. import pandas as pd
  557. sFileName = r"C:\practical-data-science-master\VKHCG\01-Vermeulen\00-RawData\data\IP_DATA_ALL.csv"
  558. IP_DATA_ALL=pd.read_csv(sFileName,header=0,low_memory=False,
  559. usecols=['Country','Place Name','Latitude','Longitude'], encoding="latin-1")
  560. LondonData=IP_DATA_ALL.loc[IP_DATA_ALL['Place Name']=='New York']
  561. AllData=LondonData[['Country', 'Place Name','Latitude']]
  562. print(AllData)
  563. MeanData=AllData.groupby(['Country', 'Place Name'])['Latitude'].mean()
  564. StdData=AllData.groupby(['Country', 'Place Name'])['Latitude'].std()
  565. print('Outliers')
  566. UpperBound=float(MeanData+StdData)
  567. print('Higher than ', UpperBound)
  568. OutliersHigher=AllData[AllData.Latitude>UpperBound]
  569. print(OutliersHigher)
  570. LowerBound=float(MeanData-StdData)
  571. print('Lower than ', LowerBound)
  572. OutliersLower=AllData[AllData.Latitude<LowerBound]
  573. print(OutliersLower)
  574. print('Not Outliers')
  575. OutliersNot=AllData[(AllData.Latitude>=LowerBound) & (AllData.Latitude<=UpperBound)]
  576. print(OutliersNot)
  577. -----------------------------------------
  578.  
  579. ----Write a Python / R program for basic logging in data science.
  580.  
  581. import logging
  582. logging.basicConfig(level=logging.DEBUG,
  583. format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
  584. filename=r'C:\Users\admin\Desktop\MSC(IT)-part1\abc.log',
  585.  
  586. filemode='w')
  587. logger = logging.getLogger()
  588. import math
  589. def equation(a,b,c):
  590. logger.debug("compute disc")
  591. disc = b**2 - 4*a*c
  592. logger.debug("compute roots")
  593. root1 = -b + math.sqrt(disc)/ 2*a
  594. root2 = -b - math.sqrt(disc)/ 2*a
  595. logger.debug("return roots")
  596. return root1 ,root2
  597. equation(1,0,-4)
  598. @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
  599.  
  600. import sys
  601. import os
  602. import logging
  603. import uuid
  604. import shutil
  605. import time
  606. ############################################################
  607. Base='C:/VKHCG'
  608. ############################################################
  609. sCompanies=['01-Vermeulen','02-Krennwallner','03-Hillman','04-Clark']
  610. sLayers=['01-Retrieve','02-Assess','03-Process','04-Transform','05-Organise','06-Report']
  611. sLevels=['debug','info','warning','error']
  612. for sCompany in sCompanies:
  613. sFileDir=Base + '/' + sCompany
  614. if not os.path.exists(sFileDir):
  615. os.makedirs(sFileDir)
  616. for sLayer in sLayers:
  617. log = logging.getLogger() # root logger
  618. for hdlr in log.handlers[:]: # remove all old handlers
  619. log.removeHandler(hdlr)
  620. #----------------------------------------------------------------------------------
  621. sFileDir=Base + '/' + sCompany + '/' + sLayer + '/Logging'
  622. if os.path.exists(sFileDir):
  623. shutil.rmtree(sFileDir)
  624. time.sleep(2)
  625. if not os.path.exists(sFileDir):
  626. os.makedirs(sFileDir)
  627. skey=str(uuid.uuid4())
  628. sLogFile=Base + '/' + sCompany + '/' + sLayer + '/Logging/Logging_'+skey+'.log'
  629. print('Set up:',sLogFile)
  630. # set up logging to file - see previous section for more details
  631. logging.basicConfig(level=logging.DEBUG,
  632. format='%(asctime)s %(name)-12s %(levelname)-8s %(message)s',
  633. datefmt=fmt='%m-%d%H:%M',
  634. filename=sLogFile,
  635. filemode='w')
  636. # define a Handler which writes INFO messages or higher to the sys.stderr
  637. console = logging.StreamHandler()
  638. console.setLevel(logging.INFO)
  639. # set a format which is simpler for console use
  640. formatter = logging.Formatter('%(name)-12s: %(levelname)-8s %(message)s')
  641. # tell the handler to use this format
  642. console.setFormatter(formatter)
  643. # add the handler to the root logger
  644. logging.getLogger('').addHandler(console)
  645. # Now, we can log to the root logger, or any other logger. First the root...
  646. logging.info('Practical Data Science is fun!.')
  647. for sLevel in sLevels:
  648. sApp='Apllication-'+ sCompany + '-' + sLayer + '-' + sLevel
  649. logger = logging.getLogger(sApp)
  650. if sLevel == 'debug':
  651. logger.debug('Practical Data Science logged a debugging message.')
  652. if sLevel == 'info':
  653. logger.info('Practical Data Science logged information message.')
  654. if sLevel == 'warning':
  655. logger.warning('Practical Data Science logged a warning message.')
  656. if sLevel == 'error':
  657. logger.error('Practical Data Science logged an error message.')
  658. -------------------------------------------------------------
  659.  
  660. Retrieve Superstep
  661.  
  662. A) Aim: - Perform the following data processing using R.
  663. library(readr)
  664.  
  665. Doctor_Details1 <- read_csv("Doctor_Details1.csv")
  666. View(Doctor_Details1)
  667. spec(Doctor_Details1)
  668.  
  669. > library(readr)
  670. > Doctor_Details1 <- read_csv("Doctor_Details1.csv")
  671. Parsed with column specification:
  672. cols(
  673. Doc_Id = col_double(),
  674. Doc_Name = col_character(),
  675. Doc_Dep = col_character(),
  676. Start_Time = col_time(format = ""),
  677. End_Time = col_time(format = ""),
  678. Doc_Salary = col_double(),
  679. No_of_patients = col_double()
  680. )
  681. > view(Doctor_Details1)
  682.  
  683. > spec(Doctor_Details1)
  684. cols(
  685. Doc_Id = col_double(),
  686. Doc_Name = col_character(),
  687. Doc_Dep = col_character(),
  688. Start_Time = col_time(format = ""),
  689. End_Time = col_time(format = ""),
  690. Doc_Salary = col_double(),
  691. No_of_patients = col_double()
  692. )
  693.  
  694. library(tibble)
  695. set_tidy_names(Doctor_Details1, syntactic = TRUE, quiet = FALSE)
  696.  
  697. > library(tibble)
  698. > set_tidy_names(Doctor_Details1, syntactic = TRUE, quiet = FALSE)
  699. # A tibble: 79 x 7
  700. Doc_Id Doc_Name Doc_Dep Start_Time End_Time Doc_Salary
  701. <dbl> <chr> <chr> <time> <time> <dbl>
  702. 1 1 Akhil Dots 07:51 03:34 82148
  703. 2 2 Nikil Eye 07:29 02:13 72824
  704. 3 3 Sharma Eye 07:58 02:31 89707
  705. 4 4 Dev E&T 07:26 05:51 59634
  706. 5 5 Das Dots 07:31 05:00 68756
  707. 6 6 Shilpa E&T 06:04 01:33 79237
  708. 7 7 Shetty Dots 07:36 04:23 85601
  709. 8 8 Darshit General 08:29 04:56 66319
  710. 9 9 Abhishek Eye 08:00 02:38 61841
  711. 10 10 Shrikant General 06:30 02:39 54774
  712. # ... with 69 more rows, and 1 more variable:
  713. # No_of_patients <dbl>
  714.  
  715. Doctor_Details1_Fix = set_tidy_names(Doctor_Details1, syntactic = TRUE, quiet = FALSE)
  716. sapply(Doctor_Details1_Fix,typeof)
  717.  
  718. > Doctor_Details1_Fix = set_tidy_names(Doctor_Details1, syntactic = TRUE, quiet = FALSE)
  719. > sapply(Doctor_Details1_Fix,typeof)
  720. Doc_Id Doc_Name Doc_Dep
  721. "double" "character" "character"
  722. Start_Time End_Time Doc_Salary
  723. "double" "double" "double"
  724. No_of_patients
  725. "double"
  726.  
  727. library(data.table)
  728. hist_DocName=data.table(DocName=unique(DocDetails_Fix[is.na(DocDetails_Fix['DocName'])==0,]$DocName))
  729. view(hist_DocName)
  730.  
  731. > view(hist_DocName)
  732.  
  733.  
  734. sapply(DocDetails['DocSalary'],mean,na.rm=TRUE)
  735. > sapply(DocDetails['DocSalary'],mean,na.rm=TRUE)
  736. DocSalary
  737. 71990.71
  738. -----------------------------------------------------------------
  739.  
  740. B) Aim: - Program to retrieve different attributes of data.
  741. Code: -
  742.  
  743. import sys
  744. import os
  745. import pandas as pd
  746.  
  747. sFileName='C:\Ab\VKHCG\Doctor_Details1.csv'
  748. IP_DATA_ALL=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  749. sFileDir='C:/Ab/VKHCG/'
  750. if not os.path.exists(sFileDir):
  751. os.makedirs(sFileDir)
  752. print('Rows:', IP_DATA_ALL.shape[0])
  753. print('Columns:', IP_DATA_ALL.shape[1])
  754. print('### Raw Data Set #####################################')
  755.  
  756. for i in range(0,len(IP_DATA_ALL.columns)):
  757. print(IP_DATA_ALL.columns[i],type(IP_DATA_ALL.columns[i]))
  758. print('### Fixed Data Set ###################################')
  759. IP_DATA_ALL_FIX=IP_DATA_ALL
  760. for i in range(0,len(IP_DATA_ALL.columns)):
  761. cNameOld=IP_DATA_ALL_FIX.columns[i] + ' '
  762. cNameNew=cNameOld.strip().replace(" ", ".")
  763. IP_DATA_ALL_FIX.columns.values[i] = cNameNew
  764. print(IP_DATA_ALL.columns[i],type(IP_DATA_ALL.columns[i]))
  765. print('Fixed Data Set with ID')
  766.  
  767. IP_DATA_ALL_with_ID=IP_DATA_ALL_FIX
  768. IP_DATA_ALL_with_ID.index.names = ['RowID']
  769. #print(IP_DATA_ALL_with_ID.head())
  770. sFileName2=sFileDir + '/Retrieve_IP_DATA.csv'
  771. IP_DATA_ALL_with_ID.to_csv(sFileName2, index = True, encoding="latin-1")
  772. print('### Done!! ############################################')
  773.  
  774. -------------------------------------------------------------------
  775.  
  776. C) Aim: - Data Pattern.
  777.  
  778. Code: -
  779.  
  780. library(readr)
  781. library(data.table)
  782. FileName=paste0('C:/Ab/VKHCG/DocDetails.csv')
  783. IP_DATA_ALL <- read_csv(FileName)
  784. hist_DocName=data.table(DocName=unique(IP_DATA_ALL$DocName))
  785. pattern_DocName=data.table(DocName=hist_DocName$DocName,PatternDocName=hist_DocName$DocName)
  786. oldchar=c(letters,LETTERS)
  787. newchar=replicate(length(oldchar),"A")
  788. for (r in seq(nrow(pattern_DocName))){
  789. s=pattern_DocName[r,]$PatternDocName;
  790. for (c in seq(length(oldchar))){
  791. s=chartr(oldchar[c],newchar[c],s)
  792. };
  793. for (n in seq(0,9,1)){
  794. s=chartr(as.character(n),"N",s)
  795. };
  796. s=chartr(" ","b",s)
  797. s=chartr(".","u",s)
  798. pattern_DocName[r,]$PatternDocName=s;
  799. };
  800. View(pattern_DocName)
  801. --------------------------------------------------------------
  802.  
  803. D) Aim: - Loading IP_DATA_ALL.
  804.  
  805. Code: -
  806.  
  807. import sys
  808. import os
  809. import pandas as pd
  810. sFileName='C:\Ab\VKHCG\Doctor_Details1.csv'
  811. IP_DATA_ALL=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  812. sFileDir='C:/Ab/VKHCG/'
  813. if not os.path.exists(sFileDir):
  814. os.makedirs(sFileDir)
  815. print('Rows:', IP_DATA_ALL.shape[0])
  816. print('Columns:', IP_DATA_ALL.shape[1])
  817. print('### Raw Data Set #####################################')
  818. for i in range(0,len(IP_DATA_ALL.columns)):
  819. print(IP_DATA_ALL.columns[i],type(IP_DATA_ALL.columns[i]))
  820. print('### Fixed Data Set ###################################')
  821. IP_DATA_ALL_FIX= IP_DATA_ALL
  822. for i in range(0,len(IP_DATA_ALL.columns)):
  823. cNameOld=IP_DATA_ALL_FIX.columns[i] + ' '
  824. cNameNew=cNameOld.strip().replace(" ", ".")
  825. IP_DATA_ALL_FIX.columns.values[i] = cNameNew
  826. print(IP_DATA_ALL.columns[i],type(IP_DATA_ALL.columns[i]))
  827. print('Fixed Data Set with ID')
  828. IP_DATA_ALL_with_ID=IP_DATA_ALL_FIX
  829. IP_DATA_ALL_with_ID.index.names = ['RowID']
  830. #print(IP_DATA_ALL_with_ID.head())
  831. sFileName2=sFileDir + '/Retrieve_IP_DATA.csv'
  832. IP_DATA_ALL_with_ID.to_csv(sFileName2, index = True, encoding="latin-1")
  833. print('### Done!! ############################################')
  834.  
  835. --------------------------------------------------------------------------------------------
  836.  
  837.  
  838. A) Aim: - Perform error management on the given data using pandas package
  839.  
  840. i. Drop the Columns Where All Elements Are Missing Values
  841.  
  842. Code: -
  843.  
  844. import sys
  845. import os
  846. import pandas as pd
  847. sInputFileName='Good-or-Bad.csv'
  848. sOutputFileName='Good-or-Bad-01.csv'
  849. sFileDir= 'C:/Ab/VKHCG/'
  850. if not os.path.exists(sFileDir):
  851. os.makedirs(sFileDir)
  852. sFileName='C:/Ab/VKHCG/' + sInputFileName
  853. RawData=pd.read_csv(sFileName,header=0)
  854. print('## Raw Data Values')
  855. print(RawData)
  856. print('## Data Profile')
  857. print('Rows :',RawData.shape[0])
  858. print('Columns :',RawData.shape[1])
  859. sFileName=sFileDir + '/' + sInputFileName
  860. RawData.to_csv(sFileName, index = False)
  861. TestData=RawData.dropna(axis=1, how='all')
  862. print('## Test Data Values')
  863. print(TestData)
  864. print('## Data Profile')
  865. print('Rows :',TestData.shape[0])
  866. print('Columns :',TestData.shape[1])
  867. sFileName=sFileDir + '/' + sOutputFileName
  868. TestData.to_csv(sFileName, index = False)
  869. print('### Done!! #####################')
  870. ?@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@?@@@
  871.  
  872. ii. Drop the Columns Where Any of the Elements Is Missing Values
  873.  
  874. Code: -
  875. import sys
  876. import os
  877. import pandas as pd
  878. sInputFileName='Good-or-Bad.csv'
  879. sOutputFileName='Good-or-Bad-01.csv'
  880. sFileDir= 'C:/Ab/VKHCG/'
  881. if not os.path.exists(sFileDir):
  882. os.makedirs(sFileDir)
  883. sFileName='C:/Ab/VKHCG/' + sInputFileName
  884. RawData=pd.read_csv(sFileName,header=0)
  885. print('## Raw Data Values')
  886. print(RawData)
  887. print('## Data Profile')
  888. print('Rows :',RawData.shape[0])
  889. print('Columns :',RawData.shape[1])
  890. sFileName=sFileDir + '/' + sInputFileName
  891. RawData.to_csv(sFileName, index = False)
  892. TestData=RawData.dropna(axis=1, how='any')
  893. print('## Test Data Values')
  894. print(TestData)
  895. print('## Data Profile')
  896. print('Rows :',TestData.shape[0])
  897. print('Columns :',TestData.shape[1])
  898. sFileName=sFileDir + '/' + sOutputFileName
  899. TestData.to_csv(sFileName, index = False)
  900. print('### Done!! #####################')
  901. @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@-
  902.  
  903. iii. Keep Only the Rows That Contain a Maximum of Two Missing Values
  904.  
  905. Code: -
  906. import sys
  907. import os
  908. import pandas as pd
  909. sInputFileName='Good-or-Bad.csv'
  910. sOutputFileName='Good-or-Bad-01.csv'
  911. sFileDir= 'C:/Ab/VKHCG/'
  912. if not os.path.exists(sFileDir):
  913. os.makedirs(sFileDir)
  914. sFileName='C:/Ab/VKHCG/' + sInputFileName
  915. RawData=pd.read_csv(sFileName,header=0)
  916. print('## Raw Data Values')
  917. print(RawData)
  918. print('## Data Profile')
  919. print('Rows :',RawData.shape[0])
  920. print('Columns :',RawData.shape[1])
  921. sFileName=sFileDir + '/' + sInputFileName
  922. RawData.to_csv(sFileName, index = False)
  923. TestData=RawData.dropna(thresh=2)
  924. print('## Test Data Values')
  925. print(TestData)
  926. print('## Data Profile')
  927. print('Rows :',TestData.shape[0])
  928. print('Columns :',TestData.shape[1])
  929. sFileName=sFileDir + '/' + sOutputFileName
  930. TestData.to_csv(sFileName, index = False)
  931. print('### Done!! #####################')
  932. ---------------------------------------------------------------
  933.  
  934.  
  935. B) Aim: - Write Python program to create the network routing diagram from the given data.
  936.  
  937. Code: -
  938.  
  939. import sys
  940. import os
  941. import pandas as pd
  942. pd.options.mode.chained_assignment = None
  943.  
  944. Base= 'F:/VSIT/MscIT/Part-1/SEM-I/DS/Practs'
  945. sInputFileName1='/DocSalDetails.csv'
  946. sInputFileName2='/DocTimeDetails.csv'
  947. sInputFileName3='/DocDetails.csv'
  948. sOutputFileName='/DocDetailsST.csv'
  949. sFileName=Base + sInputFileName1
  950. print('Loading :',sFileName)
  951. SalData=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  952. print('Loaded Salary Data:',SalData.columns.values)
  953. print('################################')
  954.  
  955. print('Changed :',SalData.columns.values)
  956. SalData.rename(columns={'DocName': 'Doc_Name'}, inplace=True)
  957. SalData.rename(columns={'DocSalary': 'Doc_Salary'}, inplace=True)
  958. SalData.drop('DocDep', axis=1, inplace=True)
  959. print('To :',SalData.columns.values)
  960. print('################################')
  961.  
  962. sFileName=Base + sInputFileName2
  963. print('Loading :',sFileName)
  964. TimeData=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  965. print('Loaded Time Data:',TimeData.columns.values)
  966. print('################################')
  967.  
  968. print('Changed :',TimeData.columns.values)
  969. TimeData.rename(columns={'StartTime': 'Start_Time'}, inplace=True)
  970. TimeData.rename(columns={'EndTime': 'End_Time'}, inplace=True)
  971. print('To :',TimeData.columns.values)
  972. print('################################')
  973.  
  974. sFileName=Base + sInputFileName3
  975. print('Loading :',sFileName)
  976. DocRawData=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  977. DocNetworkData=pd.merge(
  978. SalData,
  979. TimeData,
  980. how='inner',
  981. on='DocId'
  982. )
  983. ################################################################
  984. print('################################')
  985.  
  986. print('Change ',DocNetworkData.columns.values)
  987. for i in DocNetworkData.columns.values:
  988. j='Doc_'+i
  989. DocNetworkData.rename(columns={i: j}, inplace=True)
  990. print('To ', DocNetworkData.columns.values)
  991. print('################################')
  992.  
  993. sFileDir=Base
  994. if not os.path.exists(sFileDir):
  995. os.makedirs(sFileDir)
  996. sFileName=sFileDir + '/' + sOutputFileName
  997. print('Storing :', sFileName)
  998. DocNetworkData.to_csv(sFileName, index = False, encoding="latin-1")
  999. print('### Done!! #####################')
  1000.  
  1001. @@@@@@@@@@@@@@@@@?@?@??@@?@@CONTINUE@@@@@@@@@@@@@@@@@@@@@@@@
  1002.  
  1003. Code: -
  1004. import sys
  1005. import os
  1006. import pandas as pd
  1007. pd.options.mode.chained_assignment = None
  1008.  
  1009. Base= 'F:/VSIT/MscIT/Part-1/SEM-I/DS/Practs'
  1010. sInputFileName='/DocDetails.csv'
  1011. sOutputFileName='/DocDetailsNode.csv'
  1012. sFileName=Base + sInputFileName
  1013. print('Loading :',sFileName)
  1014. IPData=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  1015. print('Loaded IP :', IPData.columns.values)
  1016. print('################################')
  1017.  
  1018. print('Changed :',IPData.columns.values)
  1019. IPData.drop('DocId', axis=1, inplace=True)
  1020. IPData.drop('StartTime', axis=1, inplace=True)
  1021. IPData.drop('EndTime', axis=1, inplace=True)
  1022. IPData.drop('NoOfPatients', axis=1, inplace=True)
  1023. IPData.rename(columns={'DocName': 'Doc_Name'}, inplace=True)
  1024. IPData.rename(columns={'DocDept': 'Doc_Dept'}, inplace=True)
  1025. IPData.rename(columns={'DocSalary': 'Doc_Salary'}, inplace=True)
  1026. print('To :',IPData.columns.values)
  1027. print('################################')
  1028.  
  1029. print('Change ',IPData.columns.values)
  1030. for i in IPData.columns.values:
  1031. j='Node_'+i
  1032. IPData.rename(columns={i: j}, inplace=True)
  1033. print('To ', IPData.columns.values)
  1034. print('################################')
  1035.  
  1036. sFileDir=Base
  1037. if not os.path.exists(sFileDir):
  1038. os.makedirs(sFileDir)
  1039. sFileName=sFileDir + '/' + sOutputFileName
  1040. print('################################')
  1041.  
  1042. print('Storing :', sFileName)
  1043. print('################################')
  1044.  
  1045. IPData.to_csv(sFileName, index = False, encoding="latin-1")
  1046.  
  1047. -------------------------------------------------------------------
  1048.  
  1049. C) Aim: - Write a Python / R program to build directed acyclic graph
  1050.  
  1051. Code: -
  1052.  
  1053. import networkx as nx
  1054. import matplotlib.pyplot as plt
  1055. import sys
  1056. import os
  1057. import pandas as pd
  1058.  
  1059. Base= 'F:/VSIT/MscIT/Part-1/SEM-I/DS/Practs'
  1060. sInputFileName='/DocDetails.csv'
  1061. sOutputFileName1='DocDAG-1.png'
  1062. sOutputFileName2='DocDAG-2.png'
  1063. sFileName=Base + sInputFileName
  1064. print('Loading :',sFileName)
  1065. print('################################')
  1066.  
  1067. DocData=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  1068. print('Loaded Doctor Data :',DocData.columns.values)
  1069. print('################################')
  1070.  
  1071. print(DocData)
  1072. print('################################')
  1073. print('Rows : ',DocData.shape[0])
  1074. print('################################')
  1075.  
  1076. G1=nx.DiGraph()
  1077. G2=nx.DiGraph()
  1078.  
  1079. for i in range(DocData.shape[0]):
  1080. G1.add_node(DocData['DocDep'][i])
  1081. sDepName= DocData['DocDep'][i] + '-' + DocData['DocDep'][i]
  1082. G2.add_node(sDepName)
  1083. print('################################')
  1084.  
  1085. for n1 in G1.nodes():
  1086. for n2 in G1.nodes():
  1087. if n1 != n2:
  1088. print('Link :',n1,' to ', n2)
  1089. G1.add_edge(n1,n2)
  1090. print("Nodes of graph: ")
  1091. print(G1.nodes())
  1092. print("Edges of graph: ")
  1093. print(G1.edges())
  1094. print('################################')
  1095.  
  1096. sFileDir=Base
  1097. if not os.path.exists(sFileDir):
  1098. os.makedirs(sFileDir)
  1099. sFileName=sFileDir + '/' + sOutputFileName1
  1100. print('Storing :', sFileName)
  1101. nx.draw(G1,pos=nx.spectral_layout(G1),
  1102. nodecolor='r',edge_color='g',
  1103. with_labels=True,node_size=8000,
  1104. font_size=12)
  1105. plt.savefig(sFileName) # save as png
  1106. plt.show() # display
  1107. print('################################')
  1108.  
  1109. for n1 in G2.nodes():
  1110. for n2 in G2.nodes():
  1111. if n1 != n2:
  1112. print('Link :',n1,' to ', n2)
  1113. G2.add_edge(n1,n2)
  1114. print("Nodes of graph: ")
  1115. print(G2.nodes())
  1116. print("Edges of graph: ")
  1117. print(G2.edges())
  1118. print('################################')
  1119.  
  1120. sFileDir=Base
  1121. if not os.path.exists(sFileDir):
  1122. os.makedirs(sFileDir)
  1123. sFileName=sFileDir + '/' + sOutputFileName2
  1124. print('Storing :', sFileName)
  1125. print('################################')
  1126.  
  1127. nx.draw(G2,pos=nx.spectral_layout(G2),
  1128. nodecolor='r',edge_color='b',
  1129. with_labels=True,node_size=8000,
  1130. font_size=12)
  1131. plt.savefig(sFileName)
  1132. plt.show()
  1133.  
  1134. -----------------------------------------------------------------
  1135.  
  1136. Process Superstep
  1137.  
  1138. Aim: - Process Location
  1139.  
  1140. Code: -
  1141.  
  1142. import sys
  1143. import os
  1144. import pandas as pd
  1145. import sqlite3 as sq
  1146. from pandas.io import sql
  1147. import uuid
  1148. Base='C:/VKHCG'
  1149. print('Working Base :',Base, ' using ', sys.platform)
  1150. Company='01-Vermeulen'
  1151. InputAssessGraphName='Assess_All_Animals.gml'
  1152. EDSAssessDir='02-Assess/01-EDS'
  1153. InputAssessDir=EDSAssessDir + '/02-Python'
  1154. sFileAssessDir=Base + '/' + Company + '/' + InputAssessDir
  1155. if not os.path.exists(sFileAssessDir):
  1156. os.makedirs(sFileAssessDir)
  1157. sDataBaseDir=Base + '/' + Company + '/03-Process/SQLite'
  1158. if not os.path.exists(sDataBaseDir):
  1159. os.makedirs(sDataBaseDir)
  1160. sDatabaseName=sDataBaseDir + '/Vermeulen.db'
  1161. conn1 = sq.connect(sDatabaseName)
  1162. sDataVaultDir=Base + '/88-DV'
  1163. if not os.path.exists(sDataBaseDir):
  1164. os.makedirs(sDataBaseDir)
  1165. sDatabaseName=sDataVaultDir + '/datavault.db'
  1166. conn2 = sq.connect(sDatabaseName)
  1167. t=0
  1168. tMax=360*180
  1169. for Longitude in range(-180,180,10):
  1170. for Latitude in range(-90,90,10):
  1171. t+=1
  1172. IDNumber=str(uuid.uuid4())
  1173. LocationName='L'+format(round(Longitude,3)*1000, '+07d') +\
  1174. '-'+format(round(Longitude,3)*1000, '+07d')
  1175. #LocationName='L'+str(Longitude)+'-'+str(Longitude)
  1176. print('Create:',t,' of ',tMax,':',LocationName)
  1177. LocationLine=[('ObjectBaseKey', ['GPS']),('IDNumber', [IDNumber]),('LocationNumber', [str(t)]),('LocationName', [LocationName]),('Longitude', [Longitude]),('Latitude', [Latitude])]
  1178. if t==1:
  1179. LocationFrame = pd.DataFrame.from_items(LocationLine)
  1180. else:
  1181. LocationRow = pd.DataFrame.from_items(LocationLine)
  1182. LocationFrame = LocationFrame.append(LocationRow)
  1183. LocationHubIndex=LocationFrame.set_index(['IDNumber'],inplace=False)
  1184. sTable = 'Process-Location'
  1185. print('Storing :',sDatabaseName,' Table:',sTable)
  1186. LocationHubIndex.to_sql(sTable, conn1, if_exists="replace")
  1187. sTable = 'Hub-Location'
  1188. print('Storing :',sDatabaseName,' Table:',sTable)
  1189. #LocationHubIndex.to_sql(sTable, conn2, if_exists="replace")
  1190. print('Vacuum Databases')
  1191. sSQL="VACUUM;"
  1192. sql.execute(sSQL,conn1)
  1193. #sql.execute(sSQL,conn2)
  1194. print('################')
  1195. print('### Done!! ############################################')
  1196.  
  1197. --------------------------------------------------------------------------------------
  1198.  
  1199. Aim: - Process Event
  1200.  
  1201. Code: -
  1202.  
  1203. import sys
  1204. import os
  1205. import pandas as pd
  1206. import sqlite3 as sq
  1207. from pandas.io import sql
  1208. Base='C:/VKHCG'
  1209. print('Working Base :',Base, ' using ', sys.platform)
  1210. Company='01-Vermeulen'
  1211. InputFileName='Action_Plan.csv'
  1212. sDataBaseDir=Base + '/' + Company + '/03-Process/SQLite'
  1213. if not os.path.exists(sDataBaseDir):
  1214. os.makedirs(sDataBaseDir)
  1215. sDatabaseName=sDataBaseDir + '/Vermeulen.db'
  1216. conn1 = sq.connect(sDatabaseName)
  1217. sDataVaultDir=Base + '/88-DV'
  1218. if not os.path.exists(sDataBaseDir):
  1219. os.makedirs(sDataBaseDir)
  1220. sDatabaseName=sDataVaultDir + '/datavault.db'
  1221. conn2 = sq.connect(sDatabaseName)
  1222. sFileName=Base + '/' + Company + '/00-RawData/' + InputFileName
  1223. print('Loading :',sFileName)
  1224. EventRawData=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  1225. EventRawData.index.names=['EventID']
  1226. EventHubIndex=EventRawData
  1227. sTable = 'Process-Event'
  1228. print('Storing :',sDatabaseName,' Table:',sTable)
  1229. EventHubIndex.to_sql(sTable, conn1, if_exists="replace")
  1230. sTable = 'Hub-Event'
  1231. print('Storing :',sDatabaseName,' Table:',sTable)
  1232. #EventHubIndex.to_sql(sTable, conn2, if_exists="replace")
  1233. print('################')
  1234. print('Vacuum Databases')
  1235. sSQL="VACUUM;"
  1236. sql.execute(sSQL,conn1)
  1237. #sql.execute(sSQL,conn2)
  1238. print('### Done!! ############################################')
  1239. -----------------------------------------------------------------------------------------
  1240.  
  1241.  
  1242. Aim: - Sun model
  1243.  
  1244. Code: -
  1245.  
  1246. import sys
  1247. import os
  1248. from datetime import datetime
  1249. from pytz import timezone
  1250. import pandas as pd
  1251. import sqlite3 as sq
  1252. import uuid
  1253. pd.options.mode.chained_assignment = None
  1254. sDatabaseName='E:/Data Science/Vermeulen.db'
  1255. conn1 = sq.connect(sDatabaseName)
  1256. sDatabaseName='E:/Data Science/datavault.db'
  1257. conn2 = sq.connect(sDatabaseName)
  1258. sDatabaseName='E:/Data Science/datawarehouse.db'
  1259. conn3 = sq.connect(sDatabaseName)
  1260. print('\n#################################')
  1261. sSQL=" SELECT DateTimeValue FROM [Hub-Time-Gunnarsson];"
  1262. DateDataRaw=pd.read_sql_query(sSQL, conn2)
  1263. DateData=DateDataRaw.head(1000)
  1264. print(DateData,'thisis Data')
  1265. print('Time Dimension')
  1266. print('\n#################################')
  1267. t=0
  1268. mt=DateData.shape[0]
  1269. for i in range(mt):
  1270. BirthZone = ('Atlantic/Reykjavik','Europe/London','UCT')
  1271. for j in range(len(BirthZone)):
  1272. t+=1
  1273. print(t,mt*3)
  1274. BirthDateZoneStr=DateData[DateTimeKey]
  1275. BirthDateLocal=DateData[DateTimeValue]
  1276. BirthZone='UCT'
  1277. IDTimeNumber=str(uuid.uuid4())
  1278. TimeLine=[('TimeID', [str(IDTimeNumber)]),
  1279. ('UTCDate', [str(BirthDateZoneStr)]),
  1280. ('LocalTime', [str(BirthDateLocal)]),
  1281. ('TimeZone', [str(BirthZone)])]
  1282. if t==1:
  1283. TimeFrame = pd.DataFrame.from_items(TimeLine)
  1284. else:
  1285. TimeRow = pd.DataFrame.from_items(TimeLine)
  1286. TimeFrame=TimeFrame.append(TimeRow)
  1287. DimTime=TimeFrame
  1288. DimTimeIndex=DimTime.set_index(['TimeID'],inplace=False)
  1289. sTable = 'Dim-Time'
  1290. print('\n#################################')
  1291. print('Storing :',sDatabaseName,'\n Table:',sTable)
  1292. print('\n#################################')
  1293. DimTimeIndex.to_sql(sTable, conn1, if_exists="replace")
  1294. DimTimeIndex.to_sql(sTable, conn3, if_exists="replace")
  1295. sSQL=" SELECT " + \
  1296. " FirstName," + \
  1297. " SecondName," + \
  1298. " LastName," + \
  1299. " BirthDateKey " + \
  1300. " FROM [Hub-Person];"
  1301. PersonDataRaw=pd.read_sql_query(sSQL, conn2)
  1302. PersonData=PersonDataRaw.head(1000)
  1303. print('\n#################################')
  1304. print('Dimension Person')
  1305. print('\n#################################')
  1306. t=0
  1307. mt=DateData.shape[0]
  1308. for i in range(mt):
  1309. t+=1
  1310. print(t,mt)
  1311. FirstName = str(PersonData["FirstName"])
  1312. SecondName = str(PersonData["SecondName"])
  1313. if len(SecondName) > 0:
  1314. SecondName=""
  1315. LastName = str(PersonData["LastName"])
  1316. BirthDateKey = str(PersonData["BirthDateKey"])
  1317. IDPersonNumber=str(uuid.uuid4())
  1318. PersonLine=[('PersonID', [str(IDPersonNumber)]),
  1319. ('FirstName', [FirstName]),
  1320. ('SecondName', [SecondName]),
  1321. ('LastName', [LastName]),
  1322. ('Zone', [str('UTC')]),
  1323. ('BirthDate', [BirthDateKey])]
  1324. if t==1:
  1325. PersonFrame = pd.DataFrame.from_items(PersonLine)
  1326. else:
  1327. PersonRow = pd.DataFrame.from_items(PersonLine)
  1328. PersonFrame = PersonFrame.append(PersonRow)
  1329. DimPerson=PersonFrame
  1330. print(DimPerson)
  1331. DimPersonIndex=DimPerson.set_index(['PersonID'],inplace=False)
  1332. sTable = 'Dim-Person'
  1333. print('\n#################################')
  1334. print('Storing :',sDatabaseName,'\n Table:',sTable)
  1335. DimPersonIndex.to_sql(sTable, conn1, if_exists="replace")
  1336. DimPersonIndex.to_sql(sTable, conn3, if_exists="replace")
  1337.  
  1338. --------------------------------------------------------------------------------------
  1339.  
  1340. Organize Superstep
  1341.  
  1342. A) Aim: - Organizing Data
  1343.  
  1344. Code: -
  1345.  
  1346. import sys
  1347. import os
  1348. import pandas as pd
  1349. import networkx as nx
  1350. import matplotlib.pyplot as plt
  1351. pd.options.mode.chained_assignment = None
  1352. sFileName='C:\\VKHCG\\01-Vermeulen\\02-Assess\\01-EDS\\02-Python\\Assess-Network-Routing-Company1.csv'
  1353. print('Loading :',sFileName)
  1354. CompanyData=pd.read_csv(sFileName,header=0,low_memory=False, encoding="latin-1")
  1355. print(CompanyData.head())
  1356. print(CompanyData.shape)
  1357. G=nx.Graph()
  1358. for i in range(CompanyData.shape[0]):
  1359. for j in range(CompanyData.shape[0]):
  1360. Node0=CompanyData['Company_Country_Name'][i]
  1361. Node1=CompanyData['Company_Country_Name'][j]
  1362. if Node0 != Node1:
  1363. G.add_edge(Node0,Node1)
  1364. for i in range(CompanyData.shape[0]):
  1365. Node0=CompanyData['Company_Country_Name'][i]
  1366. Node1=CompanyData['Company_Place_Name'][i] + '('+ CompanyData['Company_Country_Name'][i] + ')'
  1367. if Node0 != Node1:
  1368. G.add_edge(Node0,Node1)
  1369. print('Nodes:', G.number_of_nodes())
  1370. print('Edges:', G.number_of_edges())
  1371. sFileName='C:\\VKHCG\\01-Vermeulen\\02-Assess\\01-EDS\\02-Python\\Assess-Network-Routing-Company1.csv'
  1372. print('Storing :',sFileName)
  1373. #nx.write_gml(G, sFileName)
  1374. sFileName='C:\\VKHCG\\01-Vermeulen\\02-Assess\\01-EDS\\02-Python\\Assess-Network-Routing-Company1.csv'
  1375. print('Storing Graph Image:',sFileName)
  1376. plt.figure(figsize=(15, 15))
  1377. pos=nx.spectral_layout(G,dim=2)
  1378. nx.draw_networkx_nodes(G,pos, node_color='k', node_size=10, alpha=0.8)
  1379. nx.draw_networkx_edges(G, pos,edge_color='r', arrows=False, style='dashed')
  1380. nx.draw_networkx_labels(G,pos,font_size=12,font_family='sans-serif',font_color='b')
  1381. plt.axis('off')
  1382. #plt.savefig(sFileName,dpi=600)
  1383. plt.show()
  1384. print('### Done!! #####################')
  1385.  
  1386. ----------------------------------------------------------------------------------------
  1387.  
  1388. B) Aim: - Organizing Data horizontally
  1389.  
  1390. Code: -
  1391.  
  1392. import sys
  1393. import os
  1394. import pandas as pd
  1395. import sqlite3 as sq
  1396. sDatabaseName='C:/Users/exam/Downloads/datawarehouse.db'
  1397. conn1 = sq.connect(sDatabaseName)
  1398. sDatabaseName= 'C:/Users/exam/Downloads/datamart.db'
  1399. conn2 = sq.connect(sDatabaseName)
  1400. sTable = 'Dim-BMI'
  1401. print('Loading :',sDatabaseName,' Table:',sTable)
  1402. sSQL="SELECT * FROM [Dim-BMI];"
  1403. PersonFrame0=pd.read_sql_query(sSQL, conn1)
  1404. sTable = 'Dim-BMI'
  1405. print('Loading :',sDatabaseName,' Table:',sTable)
  1406. sSQL="SELECT PersonID,\
  1407. Height,\
  1408. Weight,\
  1409. bmi,\
  1410. Indicator\
  1411. FROM [Dim-BMI]\
  1412. WHERE \
  1413. Height > 1.5 \
  1414. and Indicator = 1\
  1415. ORDER BY \
  1416. Height,\
  1417. Weight;"
  1418. PersonFrame1=pd.read_sql_query(sSQL, conn1)
  1419. DimPerson=PersonFrame1
  1420. DimPersonIndex=DimPerson.set_index(['PersonID'],inplace=False)
  1421. sTable = 'Dim-BMI-Horizontal'
  1422. print('Storing :',sDatabaseName,'\n Table:',sTable)
  1423. DimPersonIndex.to_sql(sTable, conn2, if_exists="replace")
  1424. sTable = 'Dim-BMI-Horizontal'
  1425. print('Loading :',sDatabaseName,' Table:',sTable)
  1426. sSQL="SELECT * FROM [Dim-BMI-Horizontal];"
  1427. PersonFrame2=pd.read_sql_query(sSQL, conn2)
  1428. print('Full Data Set (Rows):', PersonFrame0.shape[0])
  1429. print('Full Data Set (Columns):', PersonFrame0.shape[1])
  1430. print('Horizontal Data Set (Rows):', PersonFrame2.shape[0])
  1431. print('Horizontal Data Set (Columns):', PersonFrame2.shape[1])
  1432.  
  1433.  
  1434. ---------------------------------------------------------------------------------------------
  1435.  
  1436. Aim: - Organizing Data Vertically
  1437.  
  1438. Code: -
  1439.  
  1440. import sys
  1441. import os
  1442. import pandas as pd
  1443. import sqlite3 as sq
  1444. sDatabaseName='C:/Users/exam/Downloads/datawarehouse.db'
  1445. conn1 = sq.connect(sDatabaseName)
  1446. sDatabaseName='C:/Users/exam/Downloads/datamart.db'
  1447. conn2 = sq.connect(sDatabaseName)
  1448. sTable = 'Dim-BMI'
  1449. print('Loading :',sDatabaseName,' Table:',sTable)
  1450. sSQL="SELECT * FROM [Dim-BMI];"
  1451. PersonFrame0=pd.read_sql_query(sSQL, conn1)
  1452. sTable = 'Dim-BMI'
  1453. print('Loading :',sDatabaseName,' Table:',sTable)
  1454. sSQL="SELECT \
  1455. Height,\
  1456. Weight,\
  1457. Indicator\
  1458. FROM [Dim-BMI];"
  1459. PersonFrame1=pd.read_sql_query(sSQL, conn1)
  1460. DimPerson=PersonFrame1
  1461. DimPersonIndex=DimPerson.set_index(['Indicator'],inplace=False)
  1462. sTable = 'Dim-BMI-Vertical'
  1463. print('Storing :',sDatabaseName,'\n Table:',sTable)
  1464. DimPersonIndex.to_sql(sTable, conn2, if_exists="replace")
  1465. sTable = 'Dim-BMI-Vertical'
  1466. print('Loading :',sDatabaseName,' Table:',sTable)
  1467. sSQL="SELECT * FROM [Dim-BMI-Vertical];"
  1468. PersonFrame2=pd.read_sql_query(sSQL, conn2)
  1469. print('Full Data Set (Rows):', PersonFrame0.shape[0])
  1470. print('Full Data Set (Columns):', PersonFrame0.shape[1])
  1471. print('Horizontal Data Set (Rows):', PersonFrame2.shape[0])
  1472. print('Horizontal Data Set (Columns):', PersonFrame2.shape[1])
  1473.  
  1474. ----------------------------------------------------------------------------------------------
  1475.  
  1476.  
  1477. D) Aim: - Organize Island
  1478.  
  1479. Code: -
  1480.  
  1481. import sys
  1482. import os
  1483. import pandas as pd
  1484. import sqlite3 as sq
  1485. sDatabaseName='C:/Users/exam/Downloads/datawarehouse.db'
  1486. conn1 = sq.connect(sDatabaseName)
  1487. ################################################################
  1488. sDatabaseName='C:/Users/exam/Downloads/datamart(1).db'
  1489. conn2 = sq.connect(sDatabaseName)
  1490. ################################################################
  1491. print('################')
  1492. sTable = 'Dim-BMI'
  1493. print('Loading :',sDatabaseName,' Table:',sTable)
  1494. sSQL="SELECT * FROM [Dim-BMI];"
  1495. PersonFrame0=pd.read_sql_query(sSQL, conn1)
  1496. ################################################################
  1497. print('################')
  1498. sTable = 'Dim-BMI'
  1499. print('Loading :',sDatabaseName,' Table:',sTable)
  1500. sSQL="SELECT \
  1501. Height,\
  1502. Weight,\
  1503. Indicator\
  1504. FROM [Dim-BMI]\
  1505. WHERE Indicator > 2\
  1506. ORDER BY \
  1507. Height,\
  1508. Weight;"
  1509. PersonFrame1=pd.read_sql_query(sSQL, conn1)
  1510. ################################################################
  1511. DimPerson=PersonFrame1
  1512. DimPersonIndex=DimPerson.set_index(['Indicator'],inplace=False)
  1513. ################################################################
  1514. sTable = 'Dim-BMI-Vertical'
  1515. print('\n#################################')
  1516. print('Storing :',sDatabaseName,'\n Table:',sTable)
  1517. print('\n#################################')
  1518. DimPersonIndex.to_sql(sTable, conn2, if_exists="replace")
  1519. ################################################################
  1520. print('################################')
  1521. sTable = 'Dim-BMI-Vertical'
  1522. print('Loading :',sDatabaseName,' Table:',sTable)
  1523. print('################################')
  1524. sSQL="SELECT * FROM [Dim-BMI-Vertical];"
  1525. PersonFrame2=pd.read_sql_query(sSQL, conn2)
  1526. ################################################################
  1527. print('################################')
  1528. print('Full Data Set (Rows):', PersonFrame0.shape[0])
  1529. print('Full Data Set (Columns):', PersonFrame0.shape[1])
  1530. print('################################')
  1531. print('Horizontal Data Set (Rows):', PersonFrame2.shape[0])
  1532. print('Horizontal Data Set (Columns):', PersonFrame2.shape[1])
  1533.  
  1534.  
  1535. --------------------------------------------------------------------------------------------------------
  1536.  
  1537. Report Superstep
  1538.  
  1539.  
  1540. Aim: - Generating Data
  1541. Code: -
  1542.  
  1543. import sys
  1544. import os
  1545. import pandas as pd
  1546. import matplotlib as ml
  1547. from matplotlib import pyplot as plt
  1548. data=[
  1549. ['London', 29.2, 17.4],
  1550. ['Glasgow', 18.8, 11.3],
  1551. ['Cape Town', 15.3, 9.0],
  1552. ['Houston', 22.0, 7.8],
  1553. ['Perth', 18.0, 23.7],
  1554. ['San Francisco', 11.4, 33.3]]
  1555. os_new=pd.DataFrame(data)
  1556. pd.Index(['Item', 'Value', 'Value Percent', 'Conversions', 'Conversion Percent',
  1557. 'URL', 'Stats URL'],
  1558. dtype='object')
  1559. os_new.rename(columns = {0 : "Warehouse Location"}, inplace=True)
  1560. os_new.rename(columns = {1 : "Profit 2016"}, inplace=True)
  1561. os_new.rename(columns = {2 : "Profit 2017"}, inplace=True)
  1562. explode = (0, 0.2, 0, 0, 0, 0.1)
  1563. labels=os_new['Warehouse Location']
  1564. colors_mine = ['yellowgreen', 'gold', 'lightskyblue', 'lightcoral', 'lightcyan','lightblue']
  1565. os_new.plot(figsize=(10, 10),kind="pie", y="Profit 2017",autopct='%.2f%%', \
  1566. shadow=True, explode=explode, legend = False, colors = colors_mine,\
  1567. labels=labels, fontsize=20)
  1568. sPicNameOut1='E:/Data Science/pie_explode.png'
  1569. plt.savefig(sPicNameOut1,dpi=600)
  1570. plt.show()
  1571. os_new.iloc[:5].plot(figsize=(10, 10),kind='line',x='Warehouse Location',\
  1572. y=['Profit 2016','Profit 2017']);
  1573. plt.show()
  1574. os_new.iloc[:5].plot(figsize=(10, 10),kind='bar',x='Warehouse Location',\
  1575. y=['Profit 2016','Profit 2017']);
  1576. plt.show()
  1577. os_new.iloc[:5].plot(figsize=(10, 10),kind='barh',x='Warehouse Location',\
  1578. y=['Profit 2016','Profit 2017']);
  1579. plt.show()
  1580. os_new.iloc[:5].plot(figsize=(10, 10),kind='area',x='Warehouse Location',\
  1581. y=['Profit 2016','Profit 2017'],stacked=False);
  1582. plt.show()
  1583. os_new.iloc[:5].plot(figsize=(10, 10),kind='scatter',x='Profit 2016',\
  1584. y='Profit 2017',color='DarkBlue',marker='D');
  1585. plt.show()
  1586. os_new.iloc[:5].plot(figsize=(13, 10),kind='hexbin',x='Profit 2016',\
  1587. y='Profit 2017', gridsize=25);
  1588. plt.show()
  1589.  
  1590. -----------------------------------------------------------------------------------------------------
Add Comment
Please, Sign In to add comment