Advertisement
Guest User

Untitled

a guest
Oct 21st, 2019
139
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.91 KB | None | 0 0
  1. from great_expectations.render.renderer import (
  2. ExpectationSuitePageRenderer
  3. )
  4. from great_expectations.render.types import (
  5. RenderedDocumentContent,
  6. RenderedSectionContent,
  7. RenderedComponentContent,
  8. )
  9. import pandas as pd
  10.  
  11. class MyCustomExpectationSuitePageRenderer(ExpectationSuitePageRenderer):
  12. def __init__(self):
  13. self.data_types = {
  14. "integer": set(["INTEGER", "int", "INT", "TINYINT", "BYTEINT", "SMALLINT", "BIGINT", "IntegerType", "LongType", "DECIMAL"]),
  15. "float": set(["FLOAT", "FLOAT4", "FLOAT8", "DOUBLE_PRECISION", "NUMERIC", "FloatType", "DoubleType", "float"]),
  16. "string": set(["CHAR", "VARCHAR", "TEXT", "StringType", "string", "str"]),
  17. "boolean": set(["BOOLEAN", "BOOL", "bool", "BooleanType"]),
  18. "datetime": set(["DATETIME", "DATE", "TIMESTAMP", "DateType", "TimestampType", "datetime64", "Timestamp"]),
  19. }
  20.  
  21. def render(self, expectations):
  22. rendered_document_content = super().render(expectations)
  23.  
  24. content_blocks = [
  25. self._render_data_dictionary_header(expectations),
  26. self._render_data_dictionary(expectations)
  27. ]
  28.  
  29. rendered_document_content["sections"] = [
  30. RenderedSectionContent(**{
  31. "section_name": "Table Summary",
  32. "content_blocks": content_blocks
  33. })
  34. ] + rendered_document_content["sections"]
  35.  
  36. return rendered_document_content
  37.  
  38. def _render_data_dictionary_header(self, expectations):
  39. return RenderedComponentContent(**{
  40. "content_block_type": "header",
  41. "header": "Table Summary",
  42. "styling": {
  43. "classes": ["col-12"],
  44. "header": {
  45. "classes": ["alert", "alert-secondary"]
  46. }
  47. }
  48. })
  49. def _render_data_dictionary(self, expectations):
  50. data_dictionary_df = pd.DataFrame()
  51. expectations_by_column = self._sort_expectations_by_column(expectations)
  52. data_dictionary_df["Column Name"] = [i for i in self._get_table_columns(expectations)]
  53. data_dictionary_df["Description"] = [i["description"] for i in self._get_table_columns(expectations).values()]
  54. data_dictionary_df["Data Type"] = [i for i in self._get_column_data_types(expectations_by_column).values()]
  55. data_dictionary_df["Nullity"] = [i for i in self._get_column_nullity(expectations_by_column).values()]
  56. data_dictionary_df["Possible Values"] = [i for i in self._get_column_set_values(expectations_by_column).values()]
  57. data_dictionary_df["Column Min"] = [i for i in self._get_column_min(expectations_by_column).values()]
  58. data_dictionary_df["Column Max"] = [i for i in self._get_column_max(expectations_by_column).values()]
  59.  
  60. return RenderedComponentContent(**{
  61. "content_block_type": "table",
  62. "header_row": ["Column Name", "Description","Data Type", "Nullity", "Possible Values", "Min","Max"],
  63. "header": "Data Dictionary",
  64. "table": data_dictionary_df.values,
  65. "styling": {
  66. "classes": ["col-12", "table-responsive"],
  67. "styles": {
  68. "margin-top": "20px",
  69. "margin-bottom": "20px"
  70. },
  71. "body": {
  72. "classes": ["table", "table-sm"]
  73. }
  74. },
  75. })
  76. def _sort_expectations_by_column(self, expectations):
  77. expectations_by_column = {}
  78. expectations_dictionary = expectations.get("expectations")
  79. column_names = list(self._get_table_columns(expectations).keys())
  80. for column in column_names:
  81. expectations_by_column[column] = list(filter(
  82. lambda x: x.get("kwargs").get("column")==column,
  83. expectations_dictionary))
  84. return expectations_by_column
  85.  
  86. def _get_column_data_types(self,expectations_by_column):
  87. column_data_type_expectations = {}
  88. for k,v in expectations_by_column.items():
  89. expectation = [i for i in v if i["expectation_type"] == "expect_column_values_to_be_in_type_list"]
  90. if len(expectation)>0:
  91. type_list = expectation[0].get("kwargs").get("type_list")
  92. if len(type_list) > 0:
  93. type_lookup = [k for k,v in self.data_types.items() if type_list[0] in v]
  94. column_data_type_expectations[k], = type_lookup
  95. else:
  96. column_data_type_expectations[k] = None
  97. return column_data_type_expectations
  98.  
  99. def _get_column_min(self, expectations_by_column):
  100. column_min_expectations = {}
  101. expectation_types = self._get_column_data_types(expectations_by_column)
  102. for k,v in expectation_types.items():
  103. if (v == "integer" or v == "float"):
  104. column_expectations = expectations_by_column[k]
  105. min_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_min_to_be_between"]
  106. if len(min_expectation) > 0:
  107. min_value = min_expectation[0].get("kwargs").get("min_value")
  108. column_min_expectations[k] = min_value
  109. else:
  110. column_min_expectations[k] = None
  111. else:
  112. column_min_expectations[k] = "N/A"
  113. return column_min_expectations
  114.  
  115. def _get_column_max(self, expectations_by_column):
  116. column_max_expectations = {}
  117. expectation_types = self._get_column_data_types(expectations_by_column)
  118. for k,v in expectation_types.items():
  119. if (v == "integer" or v == "float"):
  120. column_expectations = expectations_by_column[k]
  121. max_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_max_to_be_between"]
  122. if len(max_expectation) > 0:
  123. max_value = max_expectation[0].get("kwargs").get("max_value")
  124. column_max_expectations[k] = max_value
  125. else:
  126. column_max_expectations[k] = None
  127. else:
  128. column_max_expectations[k] = "N/A"
  129. return column_max_expectations
  130.  
  131. def _get_column_nullity(self, expectations_by_column):
  132. column_null_expectations = {}
  133. expectation_types = self._get_column_data_types(expectations_by_column)
  134. for k,v in expectation_types.items():
  135. column_expectations = expectations_by_column[k]
  136. null_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_not_be_null"]
  137. if len(null_expectation) > 0:
  138. nullity = null_expectation[0].get("kwargs").get("mostly")
  139. if nullity is None:
  140. column_null_expectations[k] = "never"
  141. else:
  142. column_null_expectations[k] = f"At most {(1-nullity)*100}% missing"
  143. else:
  144. column_null_expectations[k] = None
  145. return column_null_expectations
  146.  
  147. def _get_column_set_values(self, expectations_by_column):
  148. column_value_set_expectations = {}
  149. expectation_types = self._get_column_data_types(expectations_by_column)
  150. for k,v in expectation_types.items():
  151. column_expectations = expectations_by_column[k]
  152. value_set_expectation = [i for i in column_expectations if i["expectation_type"] == "expect_column_values_to_be_in_set"]
  153. if len(value_set_expectation) > 0:
  154. expectation_set = value_set_expectation[0].get("kwargs").get("value_set")
  155. expectation_set = [i + ", " if index < len(expectation_set)-1 else i for index, i in enumerate(expectation_set) ]
  156. column_value_set_expectations[k] = expectation_set
  157. else:
  158. column_value_set_expectations[k] = None
  159. return column_value_set_expectations
  160.  
  161. def _get_table_columns(self,expectations):
  162. return expectations.get("meta").get("columns")
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement