Untitled

# coding=utf-8

__author__     = "Thomas & Amin"
__build__      = "Thomas & Amin"
__copyright__  = "Copyleft 2018 - SSI Efrei"
__license__    = "GPL"
__title__      = "TP Big Data en python"
__version__    = "1.0.0"
__maintainer__ = "Thomas & Amin"
__email__      = "[email protected]"
__status__     = "Production"
__credits__    = "LOUNIS, BOUMEDIENE"


import pandas as pd


df = pd.read_excel('test.xlsx', sheet_name='Essais', skiprows=1)

df.drop(0, axis=0, inplace=True)

print "\n\nProjet de Big Data en Python\nEfrei Promo 2020 M1 SSI 1\nRealiser par Thomas Lounis et Amin Boumediene\n04/03/2019\nVersion: Finale\n\n\n"


print "Question 1\n"

for i, row in df.groupby('GROUP')['STUDENT'].nunique().iteritems():
    print "there is {} students in the group {} \n".format(row, i)


print "\n\nQuestion 2\n"

for i, row in df.groupby('GROUP')['TESTS'].sum().iteritems():
    print "The group {} has made {} attempts \n".format(i, row)


print "\n\nQuestion 3\n"
tests = []
groupes = []
t = ()
for i, row in df.groupby('GROUP')['TESTS'].sum().iteritems():
    tests.append(row)
    groupes.append(i)

z = zip(groupes, tests)
t = max(z,key=lambda item:item[1])

x, _ = t
_, y = t
print "The group that has done the most attempts is the group {} with {} attempts\n".format(x, y)


print "Question 1\n"
total = df['TESTS'].sum()
students = []
students_unique = []
for i in df.index:
    students.append(df[u'ÉTUDIANT'][i])
students = students[1:]
students_unique = list(set(students))
moyenne = total / len(students_unique)
print "The average number of attempts per student is {} tests ".format(moyenne)


print "\n\nQuestion 1"
for i, row in df.groupby('GROUPE')['EXO'].unique().iteritems():
    print "\n\nthe group {} has made the following exercises: \n".format(i)
    print (row)