Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def processing(request, id=None):
- template = 'kmeans/processing.html'
- q = get_object_or_404(FileDetails, id = id, user = request.user)
- print(q)
- c = q.data_columns.replace(',', '\n').replace('[', '').replace(']', '').replace("'", '') # cleaning data columns
- import os
- media_path = settings.MEDIA_ROOT # reading media path
- file_path = os.path.join(media_path, q.file_name) # makeing file url by joining media_path & filtered file name
- data = pd.read_csv(file_path) # reading selected file
- data['id'] = range(0, len(data)) # create unique id for csv file
- head = data.head()
- km = KMeans(n_clusters=3) # defining no. of cluster
- if request.method == 'POST':
- v = request.POST.getlist('sel') # taking file column as input for clustering
- print(v)
- X_std = StandardScaler().fit_transform(data[v]) # data scaling
- pca = PCA(n_components=2) # applying pca
- x_pca = pca.fit_transform(X_std) # fitting & tramsformation
- cl_data = pd.DataFrame(columns=['id','PCA1', 'PCA2', 'cluster']) # create new dataframe
- cl_data['id'] = data['id'] # adding id
- cl_data['PCA1'] = [xx[0] for xx in x_pca] # adding pca1
- cl_data['PCA2'] = [xx[1] for xx in x_pca] # adding pca2
- y_predicted = km.fit_predict(x_pca) # making predicting
- cl_data['cluster'] = y_predicted # adding prediction to cluster
- data['cluster'] = y_predicted
- # creating a table dynamically
- from django.db import connection
- cur = connection.cursor()
- cl = list(data.columns) # saving data columns in a list
- clms = [re.sub('\W+', '',i) for i in cl] # cleaning column names
- import datetime
- fl = re.sub('\W+', '',q.file_name+str(datetime.datetime.now().strftime("%X")))
- # using session to store table name
- request.session['fl'] = fl
- # creating query dynamically
- def cr(s):
- return s + ' varchar(255)'
- zz = "CREATE TABLE IF NOT EXISTS "+fl+" (" + ', '.join(map(cr, clms)) + ')' # creating query
- cur.execute(zz) # executing query
- # setting up database
- database_url = 'mysql+pymysql://{user}:{password}@127.0.0.1/{database_name}'.format(
- user='root',
- password='user1234',
- database_name="clustered",
- )
- engine = create_engine(database_url, echo=False)
- data.to_sql(name=fl, con=engine, if_exists='append', index=False) # saving data into a table
- # inserting data into database table
- for uid, pca1, pca2, cluster in zip(cl_data['id'], cl_data['PCA1'], cl_data['PCA2'], cl_data['cluster']):
- ClusterData.objects.create(file_id=q, u_id=uid, pca1=pca1, pca2=pca2, cluster=cluster)
- FileDetails.objects.filter(user= request.user).filter(file_name=q.file_name).update(status = True, sel_columns = v, table_name = fl)
- return HttpResponse('<h2>Your file has been successfully proceed.</h2>')
- context = {'q':q, 'columns':c.split(), 'head':head.to_html}
- return render(request, template, context)
- # dashboard function
- def dashboard(request, id=None):
- template = 'kmeans/dashboard.html'
- request.session['fid'] = id
- q = FileDetails.objects.filter(id = id)
- for qq in q:
- aa = qq.sel_columns
- xx = aa.replace(',', '\n').replace('[', '').replace(']', '').replace("'", '')
- context = {'q':q, 'col':xx.split()}
- return render(request, template, context)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement