Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- # file name: dataParser.py
- import pgConnector
- import attribute as attr
- # This class hides the functionality of reading the data from a table and arranging it on ways that are useful
- # to the main program, such as a list of lists of atributes, a list counting how many times a certain attribute
- # appears on the table, among others.
- class DataParser:
- def __init__(self, db_connector):
- self.__field_names = []
- self.__db_connector = db_connector
- def amt_fields(self):
- sum = 0
- for (_, included) in self.__field_names:
- sum += 1 if included else 0
- return sum
- def initialize_field_names(self, table_name, excluded_fields = []):
- # List of tuples where the second value of the tuple is the
- # field name and the first is the field number
- attribs_raw = self.__db_connector.do_query(
- "SELECT DISTINCT\n"+
- "a.attnum as num,\n"+
- "a.attname as name\n"+
- "FROM pg_attribute a \n"+
- "JOIN pg_class pgc ON pgc.oid = a.attrelid\n"+
- "LEFT JOIN pg_index i ON \n"+
- "(pgc.oid = i.indrelid AND i.indkey[0] = a.attnum)\n"+
- "LEFT JOIN pg_description com on \n"+
- "(pgc.oid = com.objoid AND a.attnum = com.objsubid)\n"+
- "LEFT JOIN pg_attrdef def ON \n"+
- "(a.attrelid = def.adrelid AND a.attnum = def.adnum)\n"+
- "WHERE a.attnum > 0 AND pgc.oid = a.attrelid\n"+
- "AND pg_table_is_visible(pgc.oid)\n"+
- "AND NOT a.attisdropped\n"+
- "AND pgc.relname = \'" + table_name + "\'\n"+
- "ORDER BY a.attnum;"
- )
- self.__field_names = []
- excluded_fields_lower = [field.lower() for field in excluded_fields]
- # Filtering only the field names
- for i in attribs_raw:
- included = not (i[1].lower() in excluded_fields_lower)
- self.__field_names.append((i[1], included))
- def parse_objects(self, table_name, excluded_fields = []):
- self.initialize_field_names(table_name, excluded_fields)
- parsed_data = []
- raw_data = self.__db_connector.do_query("select * from " + table_name)
- for (i, row) in enumerate(raw_data):
- parsed_data.append([])
- for (index, value) in enumerate(row):
- if (self.__field_names[index][1]):
- parsed_data[i].append(attr.Attribute(self.__field_names[index][0], value))
- return (self.__field_names, parsed_data)
- def __count_field_data(self, field_name, table_name, class_field, as_dict):
- parsed_data = []
- # this query counts, for each value of an attribute, how many times it appears in each class
- q_attr_class = ("(select " + field_name + ", " + class_field + ", count(*) as class_count from " + table_name +
- " group by " + field_name + ", " + class_field + " order by " + field_name + ") as t1 ")
- # this query counts, for each value of an attribute, how many times it shows up
- q_attr_amt = "(select " + field_name + ", count(*) as attr_count from " + table_name + " group by " + field_name + ") as t2 "
- query_lines = ("select t1." + field_name + ", t1." + class_field + ", t1.class_count, t2.attr_count from " +
- q_attr_class +
- "inner join " +
- q_attr_amt +
- "on t1." + field_name + " = t2." + field_name
- )
- field_values = self.__db_connector.do_query(query_lines)
- if as_dict:
- parsed_data = {}
- for (value, c, class_count, value_count) in field_values:
- attr_obj = attr.Attribute(field_name, value)
- parsed_data[(attr_obj, c)] = (class_count, value_count)
- else:
- for (value, c, class_count, value_count) in field_values:
- attr_obj = attr.Attribute(field_name, value)
- parsed_data.append((attr_obj, c, class_count, value_count))
- return parsed_data
- def parse_count(self, table_name, class_field, excluded_fields = [], as_dict = False):
- if excluded_fields == []:
- excluded_fields = [class_field]
- parsed_data = None
- if as_dict:
- parsed_data = {}
- else:
- parsed_data = []
- self.initialize_field_names(table_name, excluded_fields)
- for (field, included) in self.__field_names:
- if not included:
- continue
- if not as_dict:
- parsed_data = parsed_data + self.__count_field_data(field, table_name, class_field, as_dict)
- else:
- parsed_data.update(self.__count_field_data(field, table_name, class_field, as_dict))
- return (self.__field_names, parsed_data)
- def __count_field_data_tan(self, field_i, field_j, table_name, class_field):
- parsed_data = []
- # this query counts how many times each pair (attr_i, attr_j) shows up in each class
- q_attr_class = ("(select " + field_i + ", " + field_j + ", " + class_field + ", count(*) as class_count from " + table_name +
- " group by " + field_i + ", " + field_j + ", " + class_field + " order by " + field_i + ", " + field_j + ") as t1 ")
- # this query counts how many times each pair (attr_i, attr_j) shows up
- q_attr_amt = "(select " + field_i + ", " + field_j + ", count(*) as attr_count from " + table_name + " group by " + field_i + ", " + field_j + ") as t2 "
- query_lines = ("select t1." + field_i + ", t1." + field_j + ", t1." + class_field + ", t1.class_count, t2.attr_count from " +
- q_attr_class +
- "inner join " +
- q_attr_amt +
- "on t1." + field_i + " = t2." + field_i +
- " and t1." + field_j + " = t2." + field_j
- )
- field_values = self.__db_connector.do_query(query_lines)
- for (value_i, value_j, c, class_count, value_count) in field_values:
- attr_obj_i = attr.Attribute(field_i, value_i)
- attr_obj_j = attr.Attribute(field_j, value_j)
- parsed_data.append((attr_obj_i, attr_obj_j, c, class_count, value_count))
- return parsed_data
- def parse_count_tan(self, table_name, class_field, excluded_fields = []):
- excluded = []
- if class_field not in excluded_fields:
- excluded = excluded_fields + [class_field]
- else:
- excluded = excluded_fields
- self.initialize_field_names(table_name, excluded)
- parsed_data = []
- field_pairs = []
- for i in range(len(self.__field_names)):
- field = self.__field_names[i]
- other_fields = self.__field_names[i+1:]
- for other_field in other_fields:
- field_pairs.append((field, other_field))
- for pair in field_pairs:
- if (pair[0][1] and pair[1][1]):
- parsed_data = parsed_data + self.__count_field_data_tan(pair[0][0], pair[1][0], table_name, class_field)
- return parsed_data
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement