Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from great_expectations.render.renderer import (
- ExpectationSuitePageRenderer
- )
- from great_expectations.render.types import (
- RenderedDocumentContent,
- RenderedSectionContent,
- RenderedComponentContent,
- )
- import pandas as pd
- class MyCustomExpectationSuitePageRenderer(ExpectationSuitePageRenderer):
- def __init__(self):
- self.data_types = {
- "integer": set(["INTEGER", "int", "INT", "TINYINT", "BYTEINT", "SMALLINT", "BIGINT", "IntegerType", "LongType", "DECIMAL"]),
- "float": set(["FLOAT", "FLOAT4", "FLOAT8", "DOUBLE_PRECISION", "NUMERIC", "FloatType", "DoubleType", "float"]),
- "string": set(["CHAR", "VARCHAR", "TEXT", "StringType", "string", "str"]),
- "boolean": set(["BOOLEAN", "BOOL", "bool", "BooleanType"]),
- "datetime": set(["DATETIME", "DATE", "TIMESTAMP", "DateType", "TimestampType", "datetime64", "Timestamp"]),
- }
- def render(self, expectations):
- rendered_document_content = super().render(expectations)
- content_blocks = [
- self._render_data_dictionary_header(expectations),
- self._render_data_dictionary(expectations)
- ]
- rendered_document_content["sections"] = [
- RenderedSectionContent(**{
- "section_name": "Table Summary",
- "content_blocks": content_blocks
- })
- ] + rendered_document_content["sections"]
- return rendered_document_content
- def _render_data_dictionary_header(self, expectations):
- return RenderedComponentContent(**{
- "content_block_type": "header",
- "header": "Table Summary",
- "styling": {
- "classes": ["col-12"],
- "header": {
- "classes": ["alert", "alert-secondary"]
- }
- }
- })
- def _render_data_dictionary(self, expectations):
- data_dictionary_df = pd.DataFrame()
- expectations_by_column = self._sort_expectations_by_column(expectations)
- data_dictionary_df["Column Name"] = [i for i in self._get_table_columns(expectations)]
- data_dictionary_df["Description"] = [i["description"] for i in self._get_table_columns(expectations).values()]
- data_dictionary_df["Data Type"] = [i for i in self._get_column_data_types(expectations_by_column).values()]
- data_dictionary_df["Nullity"] = [i for i in self._get_column_nullity(expectations_by_column).values()]
- data_dictionary_df["Possible Values"] = [i for i in self._get_column_set_values(expectations_by_column).values()]
- data_dictionary_df["Column Min"] = [i for i in self._get_column_min(expectations_by_column).values()]
- data_dictionary_df["Column Max"] = [i for i in self._get_column_max(expectations_by_column).values()]
- return RenderedComponentContent(**{
- "content_block_type": "table",
- "header_row": ["Column Name", "Description","Data Type", "Nullity", "Possible Values", "Min","Max"],
- "header": "Data Dictionary",
- "table": data_dictionary_df.values,
- "styling": {
- "classes": ["col-12", "table-responsive"],
- "styles": {
- "margin-top": "20px",
- "margin-bottom": "20px"
- },
- "body": {
- "classes": ["table", "table-sm"]
- }
- },
- })
- def _sort_expectations_by_column(self, expectations):
- expectations_by_column = {}
- expectations_dictionary = expectations.get("expectations")
- column_names = list(self._get_table_columns(expectations).keys())
- for column in column_names:
- expectations_by_column[column] = list(filter(
- lambda x: x.get("kwargs").get("column")==column,
- expectations_dictionary))
- return expectations_by_column
- def _get_column_data_types(self,expectations_by_column):
- column_data_type_expectations = {}
- for k,v in expectations_by_column.items():
- expectation = [i for i in v if i["expectation_type"] == "expect_column_values_to_be_in_type_list"]
- if len(expectation)>0:
- type_list = expectation[0].get("kwargs").get("type_list")
- if len(type_list) > 0:
- type_lookup = [k for k,v in self.data_types.items() if type_list[0] in v]
- column_data_type_expectations[k], = type_lookup
- else:
- column_data_type_expectations[k] = None
- return column_data_type_expectations
- def _get_column_min(self, expectations_by_column):
- column_min_expectations = {}
- expectation_types = self._get_column_data_types(expectations_by_column)
- for k,v in expectation_types.items():
- if (v == "integer" or v == "float"):
- column_expectations = expectations_by_column[k]
- min_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_min_to_be_between"]
- if len(min_expectation) > 0:
- min_value = min_expectation[0].get("kwargs").get("min_value")
- column_min_expectations[k] = min_value
- else:
- column_min_expectations[k] = None
- else:
- column_min_expectations[k] = "N/A"
- return column_min_expectations
- def _get_column_max(self, expectations_by_column):
- column_max_expectations = {}
- expectation_types = self._get_column_data_types(expectations_by_column)
- for k,v in expectation_types.items():
- if (v == "integer" or v == "float"):
- column_expectations = expectations_by_column[k]
- max_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_max_to_be_between"]
- if len(max_expectation) > 0:
- max_value = max_expectation[0].get("kwargs").get("max_value")
- column_max_expectations[k] = max_value
- else:
- column_max_expectations[k] = None
- else:
- column_max_expectations[k] = "N/A"
- return column_max_expectations
- def _get_column_nullity(self, expectations_by_column):
- column_null_expectations = {}
- expectation_types = self._get_column_data_types(expectations_by_column)
- for k,v in expectation_types.items():
- column_expectations = expectations_by_column[k]
- null_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_not_be_null"]
- if len(null_expectation) > 0:
- nullity = null_expectation[0].get("kwargs").get("mostly")
- if nullity is None:
- column_null_expectations[k] = "never"
- else:
- column_null_expectations[k] = f"At most {(1-nullity)*100}% missing"
- else:
- column_null_expectations[k] = None
- return column_null_expectations
- def _get_column_set_values(self, expectations_by_column):
- column_value_set_expectations = {}
- expectation_types = self._get_column_data_types(expectations_by_column)
- for k,v in expectation_types.items():
- column_expectations = expectations_by_column[k]
- value_set_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_be_in_set"]
- if len(value_set_expectation) > 0:
- expectation_set = value_set_expectation[0].get("kwargs").get("value_set")
- expectation_set = [i + ", " if index < len(expectation_set)-1 else i for index, i in enumerate(expectation_set) ]
- column_value_set_expectations[k] = expectation_set
- else:
- column_value_set_expectations[k] = None
- return column_value_set_expectations
- def _get_table_columns(self,expectations):
- return expectations.get("meta").get("columns")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement