pmkhlv

Untitled

Jun 10th, 2022 (edited)
1,894
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 4.17 KB | None | 0 0
  1.  
  2. import psycopg2
  3. import pandas as pd
  4. import numpy as np
  5.  
  6. files = ('customer_research.csv', 'user_orders_log.csv', 'user_activity_log.csv')
  7.  
  8. # df_order_log = pd.read_csv("https://storage.yandexcloud.net/s3-sprint3/cohort_1/aa-tolmachev/TWpBeU1pMHdOQzB5T1ZReE1Eb3lNam93TmdsaFlTMTBiMnh0WVdOb1pYWT0=/user_order_log.csv") )
  9.  
  10. for FILE_NAME in files:
  11.     storage_url = f"https://storage.yandexcloud.net/s3-sprint3/cohort_2/pmkhlv/TWpBeU1pMHdOaTB4TUZReE56b3hNem8wT1Fsd2JXdG9iSFk9/{FILE_NAME}"
  12.     df = pd.read_csv(storage_url)
  13.     df.to_csv("/lessons/2. Анализ вводных по задаче/7. Использование файлов и подключение к БД/Задание 1/stage/" + FILE_NAME)
  14.  
  15.  
  16.  
  17.  
  18.  
  19. conn = psycopg2.connect("host='localhost' port='5432' dbname='de' user='jovyan' password='jovyan' ")
  20. cur = conn.cursor()
  21.  
  22. customer_research = pd.read_csv('/lessons/2. Анализ вводных по задаче/7. Использование файлов и подключение к БД/Задание 1/stage/customer_research.csv').iloc[: , 1:]
  23. user_activity_log = pd.read_csv('/lessons/2. Анализ вводных по задаче/7. Использование файлов и подключение к БД/Задание 1/stage/user_activity_log.csv').iloc[: , 1:]
  24. user_order_log = pd.read_csv('/lessons/2. Анализ вводных по задаче/7. Использование файлов и подключение к БД/Задание 1/stage/user_order_log.csv').iloc[: , 1:]
  25.  
  26. print(customer_research.shape)
  27.  
  28. #insert_uol = "insert into staging.user_order_log (id, date_time, city_id, city_name, customer_id, first_name, last_name, item_id, item_name, quantity, payment_amount) VALUES {s_val};"
  29. insert_cr = "insert into staging.customer_research (id, date_id, category_id, geo_id, sales_qty, sales_amt) VALUES {s_val};"
  30. customer_research.insert(0, 'id', range(1, len(customer_research)+1))
  31.  
  32. #insert_ual = "insert into staging.user_activity_log (id, date_time, action_id, customer_id, quantity) VALUES {s_val};"
  33.  
  34.  
  35. step = int(user_orders_log.shape[0] / 100)
  36. while i <= user_orders_log.shape[0]:
  37.     print(i, end='\r')
  38.     s_val = str([tuple(x) for x in user_orders_log.loc[i:i + step].to_numpy()])[1:-1]
  39.     cur.execute(insert_s.replace('{s_val}',s_val))
  40.     conn.commit()
  41.     i += step+1
  42.  
  43. CREATE TABLE staging.customer_research (
  44.     id serial4 NOT NULL,
  45.     date_id timestamp NULL,
  46.     category_id int4 NULL,
  47.     geo_id int4 NULL,
  48.     sales_qty int4 NULL,
  49.     sales_amt numeric(14, 2) NULL,
  50.     PRIMARY KEY (id)
  51. );-- staging.user_activity_log definition
  52.  
  53. -- Drop table
  54.  
  55. -- DROP TABLE staging.user_activity_log;
  56.  
  57. CREATE TABLE staging.user_activity_log (
  58.     id serial4 NOT NULL,
  59.     date_time timestamp NULL,
  60.     action_id int8 NULL,
  61.     customer_id int8 NULL,
  62.     quantity int8 NULL,
  63.     PRIMARY KEY (id)
  64. );-- staging.user_order_log definition
  65.  
  66. -- Drop table
  67.  
  68. -- DROP TABLE staging.user_order_log;
  69.  
  70. CREATE TABLE staging.user_order_log (
  71.     id serial4 NOT NULL,
  72.     date_time timestamp NULL,
  73.     city_id int4 NULL,
  74.     city_name varchar(100) NULL,
  75.     customer_id int8 NULL,
  76.     first_name varchar(100) NULL,
  77.     last_name varchar(100) NULL,
  78.     item_id int4 NULL,
  79.     item_name varchar(100) NULL,
  80.     quantity int8 NULL,
  81.     payment_amount numeric(14, 2) NULL,
  82.     PRIMARY KEY (id)
  83. );
  84.  
  85. CREATE TABLE prod.custom_research (
  86.   date_id timestamp not null,
  87.   geo_id bigint not null,
  88.   sales_qty bigint not null,
  89.   sales_amt bigint not null);
  90.  
  91. insert into prod.custom_research (date_id, geo_id, sales_qty, sales_amt)
  92. select date_id, geo_id, sales_qty, sales_amt from staging.customer_research;
  93. ALTER TABLE prod.custom_research RENAME TO customer_research;
  94.  
  95. CREATE TABLE prod.user_activity_log (
  96.   Date_time timestamp not null,
  97.   Customer_id bigint not null);
  98.  
  99. insert into prod.user_activity_log (Date_time, Customer_id)
  100. select Date_time, Customer_id from staging.user_activity_log;
  101.  
  102. CREATE TABLE prod.user_order_log (
  103.   Date_time timestamp not null,
  104.   Customer_id bigint not null,
  105.   Quantity bigint not null,
  106.   Payment_amount numeric(14, 2) not null);
  107.  
  108. insert into prod.user_order_log (Date_time, Customer_id, Quantity, Payment_amount)
  109. select Date_time, Customer_id, Quantity, Payment_amount from staging.user_order_log;
Advertisement
Add Comment
Please, Sign In to add comment