Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- """
- Name: Jackson McAfee
- Date: 04/24/2023
- Assignment: Module 14: Adv Column Store Querying
- Due Date: 04/23/2023
- About this project: Sets up main column store db for the amazon.mdbe database
- Assumptions:NA
- All work below was performed by Jackson McAfee
- """
- # Using monetDb
- import monetdbe
- # Path to the database file you want to create
- database = 'amazon.mdbe'
- try:
- with monetdbe.connect(database) as conn:
- # Drops the table you are going to create if it exists
- conn.set_autocommit(True)
- conn.execute("""DROP TABLE IF EXISTS AMAZON""")
- # Creates a table that contains all the attributes in your data file
- # attributes are Brand (string), Total (int), Releases (int), #1 Release (int), Lifetime Gross (int)
- conn.execute("""CREATE TABLE AMAZON
- (
- Name TEXT,
- Main_category TEXT,
- Sub_category TEXT,
- Image TEXT,
- Link TEXT,
- Ratings DOUBLE,
- No_of_ratings INTEGER,
- Discounted_price TEXT,
- Actual_price TEXT)""")
- # Load the table created with the data from the data file you selected
- # This adds data from Watches.csv to the table
- conn.execute(f"""
- COPY OFFSET 2 INTO AMAZON
- FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Watches.csv'
- USING DELIMITERS ',', E'\n', '"'
- NULL AS ''
- BEST EFFORT""")
- # This adds data from Wallets.csv to the table
- conn.execute(f"""
- COPY OFFSET 2 INTO AMAZON
- FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Wallets.csv'
- USING DELIMITERS ',', E'\n', '"'
- NULL AS ''
- BEST EFFORT""")
- # This adds data from Sunglasses.csv to the table
- conn.execute(f"""
- COPY OFFSET 2 INTO AMAZON
- FROM R'C:\\Users\Jackson McAfee\\Documents\\Programming\\mod14\\data\\Sunglasses.csv'
- USING DELIMITERS ',', E'\n', '"'
- NULL AS ''
- BEST EFFORT""")
- with monetdbe.connect(database) as conn:
- # Print the results
- print("Amazon Load Errors")
- results = conn.execute("""SELECT * FROM sys.rejects""")
- for r in results.fetchall():
- print(r)
- with monetdbe.connect(database) as conn:
- print("Number of records")
- results = conn.execute("""SELECT COUNT(*) FROM AMAZON""")
- for r in results.fetchall():
- print(r)
- conn.close()
- except Exception as inst:
- print(type(inst))
- print(inst.args)
- #####################################
- """
- Name: Jackson McAfee
- Date: 04/24/2023
- Assignment: Module 14: Adv Column Store Querying
- Due Date: 04/23/2023
- About this project: Creates AMAZON merge table and partitions it into AMAZON_WATCHES, AMAZON_WALLETS, and AMAZON_SUNGLASSES
- Assumptions:NA
- All work below was performed by Jackson McAfee
- """
- # Using monetDb
- import monetdbe
- # Path to the database file you want to create
- database = 'amazon.mdbe'
- try:
- with monetdbe.connect(database) as conn:
- # Drops the table you are going to create if it exists
- conn.set_autocommit(True)
- conn.execute("""DROP TABLE IF EXISTS AMAZON""")
- # Creates a table that contains all the attributes in your data file
- # attributes are Brand (string), Total (int), Releases (int), #1 Release (int), Lifetime Gross (int)
- conn.execute("""CREATE MERGE TABLE AMAZON
- (
- Name TEXT,
- Main_category TEXT,
- Sub_category TEXT,
- Image TEXT,
- Link TEXT,
- Ratings REAL,
- No_of_ratings INTEGER,
- Discounted_price TEXT,
- Actual_price TEXT)
- PARTITION BY VALUES ON (Sub_category)
- ; """)
- conn.execute(
- """ALTER TABLE AMAZON
- ADD TABLE AMAZON_WATCHES
- AS PARTITION IN (Watches)""")
- conn.execute(
- """ALTER TABLE AMAZON
- ADD TABLE AMAZON_WALLETS
- AS PARTITION IN (Wallets)""")
- conn.execute(
- """ALTER TABLE AMAZON
- ADD TABLE AMAZON_SUNGLASSES
- AS PARTITION IN (Sunglasses)""")
- with monetdbe.connect(database) as conn:
- results = conn.execute("""SELECT Count(*) FROM AMAZON""")
- for r in results.fetchall():
- print(r)
- with monetdbe.connect(database) as conn:
- print("Load Errors")
- results = conn.execute("""SELECT * FROM sys.rejects""")
- for r in results.fetchall():
- print(r)
- with monetdbe.connect(database) as conn:
- results = conn.execute("""SELECT Count(*) FROM AMAZON_WATCHES""")
- for r in results.fetchall():
- print(r)
- conn.close()
- except Exception as inst:
- print(type(inst)) # the exception instance
- print(inst.args) # arguments stored in .args
- print(inst)
- #####################################
- """
- Name: Jackson McAfee
- Date: 04/24/2023
- Assignment: Module 14: Adv Column Store Querying
- Due Date: 04/23/2023
- About this project: Creates a summary table for the amazon.mdbe database
- Assumptions:NA
- All work below was performed by Jackson McAfee
- """
- import monetdbe
- import numpy
- database = 'amazon.mdbe'
- try:
- with monetdbe.connect(database) as conn:
- # Here we create a primary key, and use "NOT NULL" to prevent inserting invalid data
- conn.set_autocommit(True)
- conn.execute(
- """DROP TABLE IF EXISTS AMAZON_SUMMARY""")
- conn.execute(
- """CREATE TABLE AMAZON_SUMMARY
- (
- CummSummNumRatings INTEGER,
- Ratings FLOAT,
- ActualPrice STRING)""")
- conn.execute(
- """Delete from AMAZON_SUMMARY
- """)
- # I do not know why this does not work, documentation did not help me at all.
- # I assume it wants ints as a range instead of floats, but I just don't have
- # the time to figure it out and I do not want to find a completely new database
- # I tried changing to ints and running on CummSummNumRatings but it still did
- # not work. I cannot tell how this is substantially different than the code in
- # the other two files, but it is not doing anything for me.
- for ratings in numpy.arange(1, 5.1, 0.1):
- conn.execute(
- """Insert into AMAZON_SUMMARY
- (CummSumNumRatings,
- Ratings,
- ActualPrice) Values (0,?,0)""",float(ratings))
- conn.execute(
- """Update AMAZON_SUMMARY
- Set CummSumNumRatings =
- (Select SUM(No_of_Ratings)
- from AMAZON
- where AMAZON.No_of_Ratings =
- AMAZON_SUMMARY.No_of_Ratings
- """)
- conn.execute(
- """Delete from AMAZON_SUMMARY
- where CummSumNumRatings = 0 """)
- with monetdbe.connect(database) as conn:
- print("Load Errors")
- results = conn.execute("""SELECT * FROM sys.rejects""")
- for r in results.fetchall():
- print(r)
- with monetdbe.connect(database) as conn:
- print("Num Records")
- results = conn.execute("""SELECT Count(*) FROM AMAZON_SUMMARY""")
- for r in results.fetchall():
- print(r)
- conn.close()
- except Exception as inst:
- print(type(inst)) # the exception instance
- print(inst.args) # arguments stored in .args
- print(inst)
- #####################################
- """
- Name: Jackson McAfee
- Date: 04/24/2023
- Assignment: Module 14: Adv Column Store Querying
- Due Date: 04/23/2023
- About this project: Performs a query on the amazon.mdbe database (specifically the AMAZON_WATCHES and AMAZON tables)
- Assumptions:NA
- All work below was performed by Jackson McAfee
- """
- import monetdbe
- import time
- database = 'amazon.mdbe'
- try:
- with monetdbe.connect(database) as conn1:
- start = time.time()
- results1 = conn1.execute("""
- SELECT Name, Ratings, No_of_ratings
- FROM AMAZON_WATCHES
- Order by Ratings DESC
- """)
- for r in results1.fetchall():
- print(r)
- end = time.time()
- executionTime = end-start
- print("Execution Time:", executionTime)
- start = time.time()
- results2 = conn1.execute("""
- SELECT Name, Ratings, No_of_ratings
- FROM AMAZON
- Order by Ratings DESC
- """)
- for r in results2.fetchall():
- print(r)
- end = time.time()
- executionTime = end-start
- print("Execution Time:", executionTime)
- conn1.close()
- except Exception as inst:
- print(type(inst)) # the exception instance
- print(inst.args) # arguments stored in .args
- print(inst)
- ##################################
- """
- Name: Jackson McAfee
- Date: 04/24/2023
- Assignment: Module 14: Adv Column Store Querying
- Due Date: 04/23/2023
- About this project: Performs a query on the amazon.mdbe database (specifically the AMAZON_SUMMARY table)
- Assumptions:NA
- All work below was performed by Jackson McAfee
- """
- import monetdbe
- import time
- database = 'amazon.mdbe'
- try:
- with monetdbe.connect(database) as conn:
- start = time.time()
- results = conn.execute("""
- SELECT CummSummNumRatings,Ratings,ActualPrice
- FROM AMAZON_SUMMARY
- Where CummSummNumRatings < 100
- Order by Ratings DESC
- """)
- for r in results.fetchall():
- print(r)
- end = time.time()
- executionTime = end-start
- print("Execution Time:", executionTime)
- start = time.time()
- results2 = conn.execute("""
- SELECT CummSummNumRatings,Ratings,ActualPrice
- FROM AMAZON_SUMMARY
- Where CummSummNumRatings < 100
- Order by Ratings DESC
- """)
- for r in results2.fetchall():
- print(r)
- end = time.time()
- executionTime = end-start
- print("Execution Time:", executionTime)
- conn.close()
- except Exception as inst:
- print(type(inst)) # the exception instance
- print(inst.args) # arguments stored in .args
- print(inst)
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement