Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # The script MUST contain a function named azureml_main
- # which is the entry point for this module.
- # imports up here can be used to
- import pandas as pd
- # The entry point function can contain up to two input arguments:
- # Param<dataframe1>: a pandas.DataFrame
- # Param<dataframe2>: a pandas.DataFrame
- def azureml_main(dataframe1):
- # Execution logic goes here
- print('Input pandas.DataFrame #1:\r\n\r\n{0}'.format(dataframe1))
- # If a zip file is connected to the third input port is connected,
- # it is unzipped under ".\Script Bundle". This directory is added
- # to sys.path. Therefore, if your zip file contains a Python file
- # mymodule.py you can import it using:
- # import mymodule
- products = {}
- for i in range(len(dataframe1['Product'])):
- name = dataframe1['Product'][i]
- if name in products:
- products[name] += dataframe1['Income'][i]
- else:
- products[name] = dataframe1['Income'][i]
- data = {}
- data['Product'] = list(products.keys())
- result = pd.DataFrame(data)
- result['Income'] = list(products.values())
- periods = {}
- for i in range(1996 * 12 + 7, 1998 * 12 + 6):
- periods[i] = {}
- for i in range(len(dataframe1['Product'])):
- date = dataframe1['Date'][i]
- arr = date.split('.')
- m = int(arr[1])
- y = int(arr[2])
- name = dataframe1['Product'][i]
- income = dataframe1['Income'][i]
- month = y * 12 + m
- if name in periods[month]:
- periods[month][name] += income
- else:
- periods[month][name] = income
- xyz = []
- for name in products:
- mean = 0.0
- for month in periods:
- if name in periods[month]:
- mean += periods[month][name]
- mean /= len(periods)
- sigma = 0.0
- for month in periods:
- if name in periods[month]:
- sigma += (periods[month][name] - mean) ** 2
- else:
- sigma += mean ** 2
- sigma /= len(periods)
- sigma = sigma ** 0.5
- var = 1.0 * sigma / mean
- if 100 * var <= 85:
- xyz.append('X')
- continue
- if 100 * var <= 100:
- xyz.append('Y')
- continue
- else:
- xyz.append('Z')
- continue
- result['ABC'] = pd.Series(len(products), index=result.index)
- result['XYZ'] = pd.Series(len(products), index=result.index)
- for i in range(len(products)):
- result['XYZ'][i] = xyz[i]
- result.sort_values(by=['Income'], ascending=False, inplace=True)
- sumOfIncomes = result['Income'].sum()
- result = result.reset_index(drop=True)
- accumulate = 0
- s = result['Income']
- for i in range(len(products)):
- accumulate += s[i]
- if (accumulate <= 0.7 * sumOfIncomes):
- result['ABC'][i] = 'A'
- elif (accumulate <= 0.9 * sumOfIncomes):
- result['ABC'][i] = 'B'
- else:
- result['ABC'][i] = 'C'
- # Return value must be of a sequence of pandas.DataFrame
- return result
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement