Advertisement
Jakzon123

Mod 14 Adv Column Store DB

May 3rd, 2023 (edited)
965
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 10.31 KB | None | 0 0
  1. """
  2. Name: Jackson McAfee
  3. Date: 04/24/2023
  4. Assignment: Module 14: Adv Column Store Querying
  5. Due Date: 04/23/2023
  6. About this project: Sets up main column store db for the amazon.mdbe database
  7. Assumptions:NA
  8. All work below was performed by Jackson McAfee
  9. """
  10.  
  11. # Using monetDb
  12. import monetdbe
  13.  
  14. # Path to the database file you want to create
  15. database = 'amazon.mdbe'
  16.  
  17. try:
  18.     with monetdbe.connect(database) as conn:
  19.         # Drops the table you are going to create if it exists
  20.         conn.set_autocommit(True)
  21.  
  22.         conn.execute("""DROP TABLE IF EXISTS AMAZON""")
  23.  
  24.         # Creates a table that contains all the attributes in your data file
  25.         # attributes are Brand (string), Total (int), Releases (int), #1 Release (int),  Lifetime Gross (int)
  26.         conn.execute("""CREATE TABLE AMAZON
  27.        (
  28.            Name TEXT,
  29.            Main_category TEXT,
  30.            Sub_category TEXT,
  31.            Image TEXT,
  32.            Link TEXT,
  33.            Ratings DOUBLE,
  34.            No_of_ratings INTEGER,
  35.            Discounted_price TEXT,
  36.            Actual_price TEXT)""")
  37.  
  38.         # Load the table created with the data from the data file you selected
  39.         # This adds data from Watches.csv to the table
  40.         conn.execute(f"""
  41.        COPY OFFSET 2 INTO AMAZON
  42.        FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Watches.csv'
  43.        USING DELIMITERS ',', E'\n', '"'
  44.        NULL AS ''
  45.        BEST EFFORT""")
  46.  
  47.         # This adds data from Wallets.csv to the table
  48.         conn.execute(f"""
  49.        COPY OFFSET 2 INTO AMAZON
  50.        FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Wallets.csv'
  51.        USING DELIMITERS ',', E'\n', '"'
  52.        NULL AS ''
  53.        BEST EFFORT""")
  54.  
  55.         # This adds data from Sunglasses.csv to the table
  56.         conn.execute(f"""
  57.        COPY OFFSET 2 INTO AMAZON
  58.        FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Sunglasses.csv'
  59.        USING DELIMITERS ',', E'\n', '"'
  60.        NULL AS ''
  61.        BEST EFFORT""")
  62.  
  63.     with monetdbe.connect(database) as conn:
  64.         # Print the results
  65.         print("Amazon Load Errors")
  66.         results = conn.execute("""SELECT * FROM sys.rejects""")
  67.         for r in results.fetchall():
  68.             print(r)
  69.  
  70.     with monetdbe.connect(database) as conn:
  71.         print("Number of records")
  72.         results = conn.execute("""SELECT COUNT(*) FROM AMAZON""")
  73.         for r in results.fetchall():
  74.             print(r)
  75.  
  76.     conn.close()
  77.  
  78. except Exception as inst:
  79.     print(type(inst))
  80.     print(inst.args)
  81.    
  82. #####################################
  83.  
  84. """
  85. Name: Jackson McAfee
  86. Date: 04/24/2023
  87. Assignment: Module 14: Adv Column Store Querying
  88. Due Date: 04/23/2023
  89. About this project: Creates AMAZON merge table and partitions it into AMAZON_WATCHES, AMAZON_WALLETS, and AMAZON_SUNGLASSES
  90. Assumptions:NA
  91. All work below was performed by Jackson McAfee
  92. """
  93.  
  94. # Using monetDb
  95. import monetdbe
  96.  
  97. # Path to the database file you want to create
  98. database = 'amazon.mdbe'
  99.  
  100. try:
  101.     with monetdbe.connect(database) as conn:
  102.         # Drops the table you are going to create if it exists
  103.         conn.set_autocommit(True)
  104.  
  105.         conn.execute("""DROP TABLE IF EXISTS AMAZON""")
  106.  
  107.         # Creates a table that contains all the attributes in your data file
  108.         # attributes are Brand (string), Total (int), Releases (int), #1 Release (int),  Lifetime Gross (int)
  109.         conn.execute("""CREATE MERGE TABLE AMAZON
  110.        (
  111.        Name TEXT,
  112.        Main_category TEXT,
  113.        Sub_category TEXT,
  114.        Image TEXT,
  115.        Link TEXT,
  116.        Ratings REAL,
  117.        No_of_ratings INTEGER,
  118.        Discounted_price TEXT,
  119.        Actual_price TEXT)
  120.        PARTITION BY VALUES ON (Sub_category)
  121.        ; """)
  122.        
  123.         conn.execute(
  124.          """ALTER TABLE AMAZON
  125.         ADD TABLE AMAZON_WATCHES
  126.         AS PARTITION IN (Watches)""")
  127.  
  128.         conn.execute(
  129.          """ALTER TABLE AMAZON
  130.         ADD TABLE AMAZON_WALLETS
  131.         AS PARTITION IN (Wallets)""")
  132.  
  133.         conn.execute(
  134.          """ALTER TABLE AMAZON
  135.         ADD TABLE AMAZON_SUNGLASSES
  136.         AS PARTITION IN (Sunglasses)""")
  137.  
  138.  
  139.     with monetdbe.connect(database) as conn:
  140.         results = conn.execute("""SELECT Count(*) FROM AMAZON""")
  141.         for r in results.fetchall():
  142.             print(r)
  143.  
  144.     with monetdbe.connect(database) as conn:
  145.         print("Load Errors")
  146.         results = conn.execute("""SELECT * FROM sys.rejects""")
  147.         for r in results.fetchall():
  148.             print(r)
  149.  
  150.     with monetdbe.connect(database) as conn:
  151.         results = conn.execute("""SELECT Count(*) FROM AMAZON_WATCHES""")
  152.         for r in results.fetchall():
  153.             print(r)
  154.  
  155.     conn.close()
  156.  
  157. except Exception as inst:
  158.     print(type(inst))  # the exception instance
  159.     print(inst.args)  # arguments stored in .args
  160.     print(inst)
  161.  
  162. #####################################
  163.  
  164. """
  165. Name: Jackson McAfee
  166. Date: 04/24/2023
  167. Assignment: Module 14: Adv Column Store Querying
  168. Due Date: 04/23/2023
  169. About this project: Creates a summary table for the amazon.mdbe database
  170. Assumptions:NA
  171. All work below was performed by Jackson McAfee
  172. """
  173.  
  174. import monetdbe
  175. import numpy
  176.  
  177. database = 'amazon.mdbe'
  178.  
  179. try:
  180.     with monetdbe.connect(database) as conn:
  181.         # Here we create a primary key, and use "NOT NULL" to prevent inserting invalid data
  182.         conn.set_autocommit(True)
  183.  
  184.         conn.execute(
  185.             """DROP TABLE IF EXISTS AMAZON_SUMMARY""")
  186.  
  187.         conn.execute(
  188.             """CREATE TABLE AMAZON_SUMMARY
  189.            (
  190.            CummSummNumRatings INTEGER,
  191.            Ratings FLOAT,
  192.            ActualPrice STRING)""")
  193.  
  194.         conn.execute(
  195.             """Delete from AMAZON_SUMMARY
  196.               """)
  197.  
  198.         # I do not know why this does not work, documentation did not help me at all.
  199.         # I assume it wants ints as a range instead of floats, but I just don't have
  200.         # the time to figure it out and I do not want to find a completely new database
  201.  
  202.         # I tried changing to ints  and running on CummSummNumRatings but it still did
  203.         # not work. I cannot tell how this is substantially different than the code in
  204.         # the other two files, but it is not doing anything for me.
  205.  
  206.         for ratings in numpy.arange(1, 5.1, 0.1):
  207.             conn.execute(
  208.                 """Insert into AMAZON_SUMMARY
  209.               (CummSumNumRatings,
  210.                Ratings,
  211.                ActualPrice) Values (0,?,0)""",float(ratings))
  212.  
  213.  
  214.         conn.execute(
  215.             """Update AMAZON_SUMMARY
  216.               Set CummSumNumRatings =
  217.               (Select SUM(No_of_Ratings)
  218.                from AMAZON
  219.                where AMAZON.No_of_Ratings =
  220.                AMAZON_SUMMARY.No_of_Ratings
  221.                """)
  222.  
  223.         conn.execute(
  224.             """Delete from AMAZON_SUMMARY
  225.               where CummSumNumRatings = 0 """)
  226.  
  227.  
  228.     with monetdbe.connect(database) as conn:
  229.         print("Load Errors")
  230.         results = conn.execute("""SELECT * FROM sys.rejects""")
  231.         for r in results.fetchall():
  232.             print(r)
  233.  
  234.     with monetdbe.connect(database) as conn:
  235.          print("Num Records")
  236.          results = conn.execute("""SELECT Count(*) FROM AMAZON_SUMMARY""")
  237.          for r in results.fetchall():
  238.              print(r)
  239.  
  240.     conn.close()
  241.  
  242. except Exception as inst:
  243.     print(type(inst))    # the exception instance
  244.     print(inst.args)     # arguments stored in .args
  245.     print(inst)
  246.  
  247.  
  248. #####################################
  249.  
  250. """
  251. Name: Jackson McAfee
  252. Date: 04/24/2023
  253. Assignment: Module 14: Adv Column Store Querying
  254. Due Date: 04/23/2023
  255. About this project: Performs a query on the amazon.mdbe database (specifically the AMAZON_WATCHES and AMAZON tables)
  256. Assumptions:NA
  257. All work below was performed by Jackson McAfee
  258. """
  259.  
  260. import monetdbe
  261. import time
  262. database = 'amazon.mdbe'
  263.  
  264. try:
  265.  
  266.     with monetdbe.connect(database) as conn1:
  267.         start = time.time()
  268.         results1 = conn1.execute("""
  269.         SELECT Name, Ratings, No_of_ratings
  270.         FROM AMAZON_WATCHES
  271.         Order by Ratings DESC
  272.         """)
  273.  
  274.         for r in results1.fetchall():
  275.             print(r)
  276.  
  277.         end = time.time()
  278.         executionTime = end-start
  279.         print("Execution Time:", executionTime)
  280.  
  281.         start = time.time()
  282.         results2 = conn1.execute("""
  283.         SELECT Name, Ratings, No_of_ratings
  284.         FROM AMAZON
  285.         Order by Ratings DESC
  286.         """)
  287.  
  288.         for r in results2.fetchall():
  289.             print(r)
  290.            
  291.         end = time.time()
  292.         executionTime = end-start
  293.         print("Execution Time:", executionTime)
  294.  
  295.     conn1.close()
  296. except Exception as inst:
  297.     print(type(inst))    # the exception instance
  298.     print(inst.args)     # arguments stored in .args
  299.     print(inst)
  300.  
  301. ##################################
  302.  
  303. """
  304. Name: Jackson McAfee
  305. Date: 04/24/2023
  306. Assignment: Module 14: Adv Column Store Querying
  307. Due Date: 04/23/2023
  308. About this project: Performs a query on the amazon.mdbe database (specifically the AMAZON_SUMMARY table)
  309. Assumptions:NA
  310. All work below was performed by Jackson McAfee
  311. """
  312.  
  313.  
  314. import monetdbe
  315. import time
  316.  
  317. database = 'amazon.mdbe'
  318.  
  319. try:
  320.     with monetdbe.connect(database) as conn:
  321.          start = time.time()
  322.          results = conn.execute("""
  323.         SELECT CummSummNumRatings,Ratings,ActualPrice
  324.         FROM AMAZON_SUMMARY
  325.         Where CummSummNumRatings < 100
  326.         Order by Ratings DESC
  327.         """)
  328.          for r in results.fetchall():
  329.              print(r)
  330.          end = time.time()
  331.          executionTime = end-start
  332.          print("Execution Time:", executionTime)
  333.  
  334.          start = time.time()
  335.          results2 = conn.execute("""
  336.         SELECT CummSummNumRatings,Ratings,ActualPrice
  337.         FROM AMAZON_SUMMARY
  338.         Where CummSummNumRatings < 100
  339.         Order by Ratings DESC
  340.         """)
  341.  
  342.          for r in results2.fetchall():
  343.             print(r)
  344.            
  345.          end = time.time()
  346.          executionTime = end-start
  347.          print("Execution Time:", executionTime)
  348.  
  349.     conn.close()
  350.  
  351. except Exception as inst:
  352.     print(type(inst))    # the exception instance
  353.     print(inst.args)     # arguments stored in .args
  354.     print(inst)
  355.  
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement