Advertisement
Fabio_LaF

Classe DataParser

Aug 17th, 2022 (edited)
514
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.63 KB | None | 0 0
  1. # file name: dataParser.py
  2.  
  3. import pgConnector
  4. import attribute as attr
  5.  
  6. # This class hides the functionality of reading the data from a table and arranging it on ways that are useful
  7. # to the main program, such as a list of lists of atributes, a list counting how many times a certain attribute
  8. # appears on the table, among others.
  9. class DataParser:
  10.   def __init__(self, db_connector):
  11.     self.__field_names = []
  12.     self.__db_connector = db_connector
  13.  
  14.   def amt_fields(self):
  15.     sum = 0
  16.     for (_, included) in self.__field_names:
  17.       sum += 1 if included else 0
  18.  
  19.     return sum
  20.    
  21.   def initialize_field_names(self, table_name, excluded_fields = []):
  22.     # List of tuples where the second value of the tuple is the
  23.     # field name and the first is the field number
  24.     attribs_raw = self.__db_connector.do_query(
  25.       "SELECT DISTINCT\n"+
  26.           "a.attnum as num,\n"+
  27.           "a.attname as name\n"+
  28.       "FROM pg_attribute a \n"+
  29.       "JOIN pg_class pgc ON pgc.oid = a.attrelid\n"+
  30.       "LEFT JOIN pg_index i ON \n"+
  31.           "(pgc.oid = i.indrelid AND i.indkey[0] = a.attnum)\n"+
  32.       "LEFT JOIN pg_description com on \n"+
  33.           "(pgc.oid = com.objoid AND a.attnum = com.objsubid)\n"+
  34.       "LEFT JOIN pg_attrdef def ON \n"+
  35.           "(a.attrelid = def.adrelid AND a.attnum = def.adnum)\n"+
  36.       "WHERE a.attnum > 0 AND pgc.oid = a.attrelid\n"+
  37.       "AND pg_table_is_visible(pgc.oid)\n"+
  38.       "AND NOT a.attisdropped\n"+
  39.       "AND pgc.relname = \'" + table_name + "\'\n"+
  40.       "ORDER BY a.attnum;"
  41.     )
  42.  
  43.     self.__field_names = []
  44.     excluded_fields_lower = [field.lower() for field in excluded_fields]
  45.  
  46.     # Filtering only the field names
  47.     for i in attribs_raw:
  48.       included = not (i[1].lower() in excluded_fields_lower)
  49.       self.__field_names.append((i[1], included))
  50.  
  51.   def parse_objects(self, table_name, excluded_fields = []):
  52.     self.initialize_field_names(table_name, excluded_fields)
  53.     parsed_data = []
  54.  
  55.     raw_data = self.__db_connector.do_query("select * from " + table_name)
  56.  
  57.     for (i, row) in enumerate(raw_data):
  58.       parsed_data.append([])
  59.       for (index, value) in enumerate(row):
  60.         if (self.__field_names[index][1]):
  61.           parsed_data[i].append(attr.Attribute(self.__field_names[index][0], value))
  62.  
  63.     return (self.__field_names, parsed_data)
  64.  
  65.   def __count_field_data(self, field_name, table_name, class_field, as_dict):
  66.     parsed_data = []
  67.     # this query counts, for each value of an attribute, how many times it appears in each class
  68.     q_attr_class = ("(select " + field_name + ", " + class_field + ", count(*) as class_count from " + table_name +
  69.                     " group by " + field_name + ", " + class_field + " order by " + field_name + ") as t1 ")
  70.  
  71.     # this query counts, for each value of an attribute, how many times it shows up
  72.     q_attr_amt = "(select " + field_name + ", count(*) as attr_count from " + table_name + " group by " + field_name + ") as t2 "
  73.  
  74.     query_lines = ("select t1." + field_name + ", t1." + class_field + ", t1.class_count, t2.attr_count from " +
  75.                    q_attr_class +
  76.                    "inner join " +
  77.                    q_attr_amt +
  78.                    "on t1." + field_name + " = t2." + field_name
  79.                   )
  80.  
  81.     field_values = self.__db_connector.do_query(query_lines)
  82.    
  83.     if as_dict:
  84.       parsed_data = {}
  85.  
  86.       for (value, c, class_count, value_count) in field_values:
  87.         attr_obj = attr.Attribute(field_name, value)
  88.         parsed_data[(attr_obj, c)] = (class_count, value_count)
  89.     else:
  90.       for (value, c, class_count, value_count) in field_values:
  91.         attr_obj = attr.Attribute(field_name, value)
  92.         parsed_data.append((attr_obj, c, class_count, value_count))
  93.  
  94.     return parsed_data
  95.  
  96.   def parse_count(self, table_name, class_field, excluded_fields = [], as_dict = False):
  97.     if excluded_fields == []:
  98.       excluded_fields = [class_field]
  99.  
  100.     parsed_data = None
  101.  
  102.     if as_dict:
  103.       parsed_data = {}
  104.     else:
  105.       parsed_data = []
  106.     self.initialize_field_names(table_name, excluded_fields)
  107.    
  108.     for (field, included) in self.__field_names:
  109.       if not included:
  110.         continue
  111.  
  112.       if not as_dict:
  113.         parsed_data = parsed_data + self.__count_field_data(field, table_name, class_field, as_dict)
  114.       else:
  115.         parsed_data.update(self.__count_field_data(field, table_name, class_field, as_dict))
  116.  
  117.     return (self.__field_names, parsed_data)
  118.  
  119.   def __count_field_data_tan(self, field_i, field_j, table_name, class_field):
  120.     parsed_data = []
  121.  
  122.     # this query counts how many times each pair (attr_i, attr_j) shows up in each class
  123.     q_attr_class = ("(select " + field_i + ", " + field_j + ", " + class_field + ", count(*) as class_count from " + table_name +
  124.                     " group by " + field_i + ", " + field_j + ", " + class_field + " order by " + field_i + ", " + field_j + ") as t1 ")
  125.  
  126.     # this query counts how many times each pair (attr_i, attr_j) shows up
  127.     q_attr_amt = "(select " + field_i + ", " + field_j + ", count(*) as attr_count from " + table_name + " group by " + field_i + ", " + field_j + ") as t2 "
  128.  
  129.     query_lines = ("select t1." + field_i + ", t1." + field_j + ", t1." + class_field + ", t1.class_count, t2.attr_count from " +
  130.                    q_attr_class +
  131.                    "inner join " +
  132.                    q_attr_amt +
  133.                    "on t1." + field_i + " = t2." + field_i +
  134.                    " and t1." + field_j + " = t2." + field_j
  135.                   )
  136.  
  137.     field_values = self.__db_connector.do_query(query_lines)
  138.  
  139.     for (value_i, value_j, c, class_count, value_count) in field_values:
  140.       attr_obj_i = attr.Attribute(field_i, value_i)
  141.       attr_obj_j = attr.Attribute(field_j, value_j)
  142.       parsed_data.append((attr_obj_i, attr_obj_j, c, class_count, value_count))
  143.  
  144.     return parsed_data
  145.  
  146.   def parse_count_tan(self, table_name, class_field, excluded_fields = []):
  147.     excluded = []
  148.     if class_field not in excluded_fields:
  149.       excluded = excluded_fields + [class_field]
  150.     else:
  151.       excluded = excluded_fields
  152.  
  153.     self.initialize_field_names(table_name, excluded)
  154.     parsed_data = []
  155.     field_pairs = []
  156.  
  157.     for i in range(len(self.__field_names)):
  158.       field = self.__field_names[i]
  159.       other_fields = self.__field_names[i+1:]
  160.  
  161.       for other_field in other_fields:
  162.         field_pairs.append((field, other_field))
  163.  
  164.     for pair in field_pairs:
  165.       if (pair[0][1] and pair[1][1]):
  166.         parsed_data = parsed_data + self.__count_field_data_tan(pair[0][0], pair[1][0], table_name, class_field)
  167.  
  168.     return parsed_data
  169.      
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement