SHARE
TWEET

Untitled

a guest Oct 21st, 2019 90 Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. from great_expectations.render.renderer import (
  2.     ExpectationSuitePageRenderer
  3. )
  4. from great_expectations.render.types import (
  5.     RenderedDocumentContent,
  6.     RenderedSectionContent,
  7.     RenderedComponentContent,
  8. )
  9. import pandas as pd
  10.  
  11. class MyCustomExpectationSuitePageRenderer(ExpectationSuitePageRenderer):
  12.     def __init__(self):
  13.         self.data_types = {
  14.             "integer": set(["INTEGER", "int", "INT", "TINYINT", "BYTEINT", "SMALLINT", "BIGINT", "IntegerType", "LongType", "DECIMAL"]),
  15.             "float": set(["FLOAT", "FLOAT4", "FLOAT8", "DOUBLE_PRECISION", "NUMERIC", "FloatType", "DoubleType", "float"]),
  16.             "string": set(["CHAR", "VARCHAR", "TEXT", "StringType", "string", "str"]),
  17.             "boolean": set(["BOOLEAN", "BOOL", "bool", "BooleanType"]),
  18.             "datetime": set(["DATETIME", "DATE", "TIMESTAMP", "DateType", "TimestampType", "datetime64", "Timestamp"]),
  19.         }
  20.  
  21.     def render(self, expectations):
  22.         rendered_document_content = super().render(expectations)
  23.  
  24.         content_blocks = [
  25.             self._render_data_dictionary_header(expectations),
  26.             self._render_data_dictionary(expectations)
  27.         ]
  28.  
  29.         rendered_document_content["sections"] = [
  30.             RenderedSectionContent(**{
  31.                 "section_name": "Table Summary",
  32.                 "content_blocks": content_blocks
  33.             })
  34.         ] + rendered_document_content["sections"]
  35.  
  36.         return rendered_document_content
  37.  
  38.     def _render_data_dictionary_header(self, expectations):
  39.         return RenderedComponentContent(**{
  40.             "content_block_type": "header",
  41.             "header": "Table Summary",
  42.             "styling": {
  43.                 "classes": ["col-12"],
  44.                 "header": {
  45.                     "classes": ["alert", "alert-secondary"]
  46.                 }
  47.             }
  48.         })
  49.     def _render_data_dictionary(self, expectations):
  50.         data_dictionary_df = pd.DataFrame()
  51.         expectations_by_column = self._sort_expectations_by_column(expectations)
  52.         data_dictionary_df["Column Name"] = [i for i in self._get_table_columns(expectations)]
  53.         data_dictionary_df["Description"] = [i["description"] for i in self._get_table_columns(expectations).values()]
  54.         data_dictionary_df["Data Type"] = [i for i in self._get_column_data_types(expectations_by_column).values()]
  55.         data_dictionary_df["Nullity"] = [i for i in self._get_column_nullity(expectations_by_column).values()]
  56.         data_dictionary_df["Possible Values"] = [i for i in self._get_column_set_values(expectations_by_column).values()]
  57.         data_dictionary_df["Column Min"] = [i for i in self._get_column_min(expectations_by_column).values()]
  58.         data_dictionary_df["Column Max"] = [i for i in self._get_column_max(expectations_by_column).values()]
  59.  
  60.         return RenderedComponentContent(**{
  61.             "content_block_type": "table",
  62.             "header_row": ["Column Name", "Description","Data Type", "Nullity", "Possible Values", "Min","Max"],
  63.             "header": "Data Dictionary",
  64.             "table": data_dictionary_df.values,
  65.             "styling": {
  66.                 "classes": ["col-12", "table-responsive"],
  67.                 "styles": {
  68.                     "margin-top": "20px",
  69.                     "margin-bottom": "20px"
  70.                 },
  71.                 "body": {
  72.                     "classes": ["table", "table-sm"]
  73.                 }
  74.             },
  75.         })
  76.     def _sort_expectations_by_column(self, expectations):
  77.         expectations_by_column = {}
  78.         expectations_dictionary = expectations.get("expectations")
  79.         column_names = list(self._get_table_columns(expectations).keys())
  80.         for column in column_names:
  81.             expectations_by_column[column] = list(filter(
  82.                 lambda x: x.get("kwargs").get("column")==column,
  83.                 expectations_dictionary))
  84.         return expectations_by_column
  85.  
  86.     def _get_column_data_types(self,expectations_by_column):
  87.         column_data_type_expectations = {}
  88.         for k,v in expectations_by_column.items():
  89.             expectation = [i for i in v if i["expectation_type"] == "expect_column_values_to_be_in_type_list"]
  90.             if len(expectation)>0:
  91.                 type_list = expectation[0].get("kwargs").get("type_list")
  92.                 if len(type_list) > 0:
  93.                     type_lookup = [k for k,v in self.data_types.items() if type_list[0] in v]
  94.                 column_data_type_expectations[k], = type_lookup
  95.             else:
  96.                 column_data_type_expectations[k] = None
  97.         return column_data_type_expectations
  98.  
  99.     def _get_column_min(self, expectations_by_column):
  100.         column_min_expectations = {}
  101.         expectation_types = self._get_column_data_types(expectations_by_column)
  102.         for k,v in expectation_types.items():
  103.             if (v == "integer" or v == "float"):
  104.                 column_expectations = expectations_by_column[k]
  105.                 min_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_min_to_be_between"]
  106.                 if len(min_expectation) > 0:
  107.                     min_value = min_expectation[0].get("kwargs").get("min_value")
  108.                     column_min_expectations[k] = min_value
  109.                 else:
  110.                     column_min_expectations[k] = None
  111.             else:
  112.                 column_min_expectations[k] = "N/A"
  113.         return column_min_expectations
  114.  
  115.     def _get_column_max(self, expectations_by_column):
  116.         column_max_expectations = {}
  117.         expectation_types = self._get_column_data_types(expectations_by_column)
  118.         for k,v in expectation_types.items():
  119.             if (v == "integer" or v == "float"):
  120.                 column_expectations = expectations_by_column[k]
  121.                 max_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_max_to_be_between"]
  122.                 if len(max_expectation) > 0:
  123.                     max_value = max_expectation[0].get("kwargs").get("max_value")
  124.                     column_max_expectations[k] = max_value
  125.                 else:
  126.                     column_max_expectations[k] = None
  127.             else:
  128.                 column_max_expectations[k] = "N/A"
  129.         return column_max_expectations
  130.  
  131.     def _get_column_nullity(self, expectations_by_column):
  132.         column_null_expectations = {}
  133.         expectation_types = self._get_column_data_types(expectations_by_column)
  134.         for k,v in expectation_types.items():
  135.             column_expectations = expectations_by_column[k]
  136.             null_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_not_be_null"]
  137.             if len(null_expectation) > 0:
  138.                 nullity = null_expectation[0].get("kwargs").get("mostly")
  139.                 if nullity is None:
  140.                     column_null_expectations[k] = "never"
  141.                 else:
  142.                     column_null_expectations[k] = f"At most {(1-nullity)*100}% missing"
  143.             else:
  144.                 column_null_expectations[k] = None
  145.         return column_null_expectations
  146.  
  147.     def _get_column_set_values(self, expectations_by_column):
  148.         column_value_set_expectations = {}
  149.         expectation_types = self._get_column_data_types(expectations_by_column)
  150.         for k,v in expectation_types.items():
  151.             column_expectations = expectations_by_column[k]
  152.             value_set_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_be_in_set"]
  153.             if len(value_set_expectation) > 0:
  154.                 expectation_set = value_set_expectation[0].get("kwargs").get("value_set")
  155.                 expectation_set = [i + ", " if index < len(expectation_set)-1 else i for index, i in enumerate(expectation_set)  ]
  156.                 column_value_set_expectations[k] = expectation_set
  157.             else:
  158.                 column_value_set_expectations[k] = None
  159.         return column_value_set_expectations
  160.  
  161.     def _get_table_columns(self,expectations):
  162.         return expectations.get("meta").get("columns")
RAW Paste Data
We use cookies for various purposes including analytics. By continuing to use Pastebin, you agree to our use of cookies as described in the Cookies Policy. OK, I Understand
 
Top