Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- def missing_values(dataFrame):
- """function returns the percent rate of null values in given DataFrame"""
- missing_data = dataFrame.isnull().sum() * 100
- missing_data = missing_data.sort_values(ascending = False)
- missing_data = pd.DataFrame({'missing_rate':missing_data})
- return missing_data
- def get_ticks(DataFrame,labels):
- """return list of lists corelated with labels.
- Each label contain uniq set of ticks which can be use for countplot"""
- xticks = []
- for i,label in enumerate(df_labels):
- values = list(set(DataFrame[label]))
- xtick = []
- listed_data = list(DataFrame[label])
- for value in values:
- xtick.append(listed_data.count(value))
- xticks.append(list(set(xtick)))
- return xticks
- import dateutil.parser
- def insert_data_columns(dataFrame, nameOfDataColumn):
- def getDateTimeFromISO8601String(s):
- d = dateutil.parser.parse(s)
- return d
- dateTimeList = [getDateTimeFromISO8601String(x).date() for x in dataFrame[nameOfDataColumn]]
- dataFrame['day'] = pd.Series([x.day for x in dateTimeList])
- dataFrame['month'] = pd.Series([x.month for x in dateTimeList])
- dataFrame['year'] = pd.Series([x.year for x in dateTimeList])
- dataFrame['day_of_week'] = pd.Series([x.weekday() for x in dateTimeList])
Add Comment
Please, Sign In to add comment