Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- import pandas as pd
- import sklearn
- from sklearn.cluster import KMeans
- from sklearn import preprocessing
- le = preprocessing.LabelEncoder()
- pd.set_option('display.max_columns', None)
- #df1 = pd.read_excel('BVWEK33GZ17021703_MBB_Data Scientist Event.XLSX')
- df1 = pd.read_excel('c0.xlsx')
- le.fit(df1['vin'].unique())
- df1['num_vin'] = le.transform(df1['vin'])
- #print(df1.head())
- #1 if successful, 0 if not
- success_codes = ['202', '200', '200 - OK', '202 - Accepted']
- df1['status_success'] = df1['responseStatus'].isin(success_codes)
- df1['strange_browser'] = df1['userAgent'].isin(['Opera/9.80 Presto/2.10.x',
- 'Apache-HttpClient/4.5.2 (Java 1.5 minimum; Java/1.8.0_152)',
- 'Apache-HttpClient/4.5.2 (Java 1.5 minimum; Java/1.8.0_162)',
- 'Opera/9.80 Presto/2.10.x', 'Java/1.8.0_162'])
- traceId_unique = df1['traceId'].unique()
- parenttraceId_unique = df1['parentTraceId'].unique()
- commonIdSet = set(traceId_unique) & set(parenttraceId_unique)
- df1['hasParentId'] = df1['traceId'].isin(commonIdSet)
- tIdsWithParent = df1[(df1['hasParentId'] == True)]
- #clustering_columns = [df1['status_success'], df1['strange_browser'], df1['hasParentId'] ]
- #k_means = df1[['status_success','strange_browser','hasParentId', 'processingTime', 'num_vin']]
- # talk about this model! :::: ones: 127346, zeroes: 43972
- scaler = preprocessing.MinMaxScaler()
- scaler.fit(df1['processingTime'])
- print('i fit')
- #df1['scaled_times'] = scaler.fit_transform(df1['processingTime'])
- df1['scaled_times'] = (df1.processingTime - df1.processingTime.min() ) / df1.processingTime.max() - df1.processingTime.min()
- k_means = df1[['status_success','strange_browser', 'hasParentId', 'num_vin', 'scaled_times']]
- print(k_means.head())
- kmeans = KMeans(n_clusters=2, random_state=0).fit(k_means)
- ones = [a for a in kmeans.labels_ if a == 1]
- zeroes = [a for a in kmeans.labels_ if a == 0]
- print(f'ones: {len(ones)}, zeroes: {len(zeroes)}')
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement