Lesnic

Final version

May 3rd, 2021 (edited)
408
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
Python 6.02 KB | None | 0 0
  1. #0.10612400000000001 sec
  2. from clickhouse_driver import Client
  3. import subprocess
  4. import time
  5.  
  6.  
  7. def temp(arr):
  8.     res = 'CREATE TABLE IF NOT EXISTS ' + arr[0] + '\n(\n'
  9.     for i in range(1, len(arr), 2):
  10.         if i != len(arr) - 2:
  11.             res += arr[i] + ' ' + arr[i + 1] + ',\n'
  12.         else:
  13.             res += arr[i] + ' ' + arr[i + 1] + '\n'
  14.     res += ') ENGINE = MergeTree ORDER BY tuple()'
  15.     return res
  16.  
  17.  
  18. tables = [
  19.  ['actor',
  20.   'actor_id', 'Int32',
  21.   'first_name', 'String',
  22.   'last_name', 'String',
  23.   'last_update', 'DateTime64'
  24.   ],
  25.  ['actor_info',
  26.   'actor_id', 'Int32',
  27.   'first_name', 'String',
  28.   'last_name', 'String',
  29.   'film_info', 'String'
  30.   ],
  31.  ['customer_list',
  32.   'id', 'Int32',
  33.   'name', 'String',
  34.   'address', 'String',
  35.   'zipcode', 'String',
  36.   'phone', 'String',
  37.   'city', 'String',
  38.   'country', 'String',
  39.   'notes', 'String',
  40.   'sid', 'Int16'
  41.   ],
  42.  ['film_list',
  43.   'fid', 'Int32',
  44.   'title', 'String',
  45.   'description', 'String',
  46.   'category', 'String',
  47.   'price', 'Decimal(4, 2)',
  48.   'length', 'Int16',
  49.   'rating', 'String',
  50.   'actors', 'String'
  51.   ],
  52.  ['nicer_but_slower_film_list',
  53.   'fid', 'Int32',
  54.   'title', 'String',
  55.   'description', 'String',
  56.   'category', 'String',
  57.   'price', 'Decimal(4, 2)',
  58.   'length', 'Int16',
  59.   'rating', 'String',
  60.   'actors', 'String'
  61.   ],
  62.  ['sales_by_film_category',
  63.   'category', 'String',
  64.   'total_sales', 'Decimal(6, 2)'
  65.   ],
  66.  ['store',
  67.   'store_id', 'Int32',
  68.   'manager_staff_id', 'Int16',
  69.   'address_id', 'Int16',
  70.   'last_update', 'DateTime'
  71.   ],
  72.  ['sales_by_store',
  73.   'store', 'String',
  74.   'manager', 'String',
  75.   'total_sales', 'Decimal(7, 2)'
  76.   ],
  77.  ['staff_list',
  78.   'id', 'Int32',
  79.   'name', 'String',
  80.   'address', 'String',
  81.   'zipcode', 'String',
  82.   'phone', 'String',
  83.   'city', 'String',
  84.   'country', 'String',
  85.   'sid', 'Int16'
  86.   ],
  87.  ['address',
  88.   'address_id', 'Int32',
  89.   'address', 'String',
  90.   'address2', 'String',
  91.   'district', 'String',
  92.   'city_id', 'Int16',
  93.   'postal_code', 'String',
  94.   'phone', 'String',
  95.   'last_update', 'DateTime'
  96.   ],
  97.  ['category',
  98.   'category_id', 'Int32',
  99.   'name', 'String',
  100.   'last_update', 'DateTime'
  101.   ],
  102.  ['city',
  103.   'city_id', 'Int32',
  104.   'city', 'String',
  105.   'country_id', 'Int16',
  106.   'last_update', 'DateTime'
  107.   ],
  108.  ['country',
  109.   'country_id', 'Int32',
  110.   'country', 'String',
  111.   'last_update', 'DateTime'
  112.   ],
  113.  ['customer',
  114.   'customer_id', 'Int32',
  115.   'store_id', 'Int16',
  116.   'first_name', 'String',
  117.   'last_name', 'String',
  118.   'email', 'String',
  119.   'address_id', 'Int16',
  120.   'activebool', 'UInt8',
  121.   'create_date', 'Date',
  122.   'last_update', 'DateTime64',
  123.   'active', 'Int32'
  124.   ],
  125.  ['film_actor',
  126.   'actor_id', 'Int16',
  127.   'film_id', 'Int16',
  128.   'last_update', 'DateTime'
  129.   ],
  130.  ['film_category',
  131.   'film_id', 'Int16',
  132.   'category_id', 'Int16',
  133.   'last_update', 'DateTime'
  134.   ],
  135.  ['inventory',
  136.   'inventory_id', 'Int32',
  137.   'film_id', 'Int16',
  138.   'store_id', 'Int16',
  139.   'last_update', 'DateTime'
  140.   ],
  141.  ['language',
  142.   'language_id', 'Int32',
  143.   'name', 'String',
  144.   'last_update', 'DateTime'
  145.   ],
  146.  ['rental',
  147.   'rental_id', 'Int32',
  148.   'rental_date', 'DateTime',
  149.   'inventory_id', 'Int32',
  150.   'customer_id', 'Int16',
  151.   'return_date', 'DateTime',
  152.   'staff_id', 'Int16',
  153.   'last_update', 'DateTime'
  154.   ],
  155.  ['staff',
  156.   'staff_id', 'Int32',
  157.   'first_name', 'String',
  158.   'last_name', 'String',
  159.   'address_id', 'Int16',
  160.   'email', 'String',
  161.   'store_id', 'Int16',
  162.   'active', 'UInt8',
  163.   'username', 'String',
  164.   'password', 'String',
  165.   'last_update', 'DateTime64',
  166.   'picture', 'String'
  167.   ],
  168.  ['payment',
  169.   'payment_id', 'Int32',
  170.   'customer_id', 'Int16',
  171.   'staff_id', 'Int16',
  172.   'rental_id', 'Int32',
  173.   'amount', 'Decimal(5, 2)',
  174.   'payment_date', 'DateTime64'
  175.   ],
  176.  ['film',
  177.   'film_id', 'Int32',
  178.   'title', 'String',
  179.   'description', 'String',
  180.   'release_year', 'Int32',
  181.   'language_id', 'Int16',
  182.   'rental_duration', 'Int16',
  183.   'rental_rate', 'Decimal(4, 2)',
  184.   'length', 'Int16',
  185.   'replacement_cost', 'Decimal(5, 2)',
  186.   'rating', 'String',
  187.   'last_update', 'DateTime64',
  188.   'special_features', 'String',
  189.   'fulltext', 'String',
  190.   ]
  191. ]
  192.  
  193. client = Client('localhost')
  194. #client.execute('DROP DATABASE default')
  195. t = time.clock()
  196. for i in tables:
  197.     client.execute(temp(i))
  198.  
  199. for i in tables:
  200.     if i[0] == 'customer' or i[0] == 'staff':
  201.         f = open('/media/sf_im/' + i[0] + '.csv', 'r')
  202.         temp = ''
  203.         for now in f:
  204.             temp += now.replace(',t,', ',1,').replace(',f,', ',0,').replace('\\', '')
  205.         f.close()
  206.         f = open('/media/sf_im/' + i[0] + '.csv', 'w')
  207.         f.write(temp)
  208.         f.close()
  209.     elif i[0] == 'film':
  210.         f = open('/media/sf_im/' + i[0] + '.csv', 'r')
  211.         temp = ''
  212.         for now in f:
  213.             temp += now.replace('{', '').replace('}', '')
  214.         f.close()
  215.         f = open('/media/sf_im/' + i[0] + '.csv', 'w')
  216.         temp = temp.replace('\'', ' ')
  217.         f.write(temp)
  218.         f.close()
  219.  
  220.     res = "clickhouse-client --query=\"INSERT INTO " + i[0] + " FORMAT CSV\" < /media/sf_im/" + i[0] + ".csv"
  221. #    print(res)
  222.     subprocess.run(res, shell=True)
  223. print(time.clock() - t)
  224.  
  225. '''
  226. import psycopg2
  227.  
  228. path = 'C:/im/'
  229. fields = open(path + 'field.txt', 'w')
  230. con = psycopg2.connect(
  231.    database="dvd",
  232.    user="postgres",
  233.    password="postgres",
  234.    host="127.0.0.1",
  235.    port="5432"
  236. )
  237. cursor = con.cursor()
  238. cursor.execute('SELECT table_name FROM information_schema.tables \
  239. WHERE table_schema NOT IN (\'information_schema\',\'pg_catalog\');')
  240. tables = []
  241. for row in cursor:
  242.    tables += [row[0]]
  243. for row in tables:
  244.    new_file = open(path + row + '.csv', 'w')
  245.    new_file.close()
  246.  
  247.    cursor.execute('SELECT column_name, data_type \
  248. FROM INFORMATION_SCHEMA.COLUMNS \
  249. WHERE table_name = \'' + row + '\';')
  250.    temp = [row]
  251.    for i in cursor:
  252.        temp += [i[0].replace(' ', ''), i[1].replace(' ', '')]
  253.    fields.write(' '.join(temp) + '\n')
  254.  
  255.    cursor.execute('COPY (SELECT * FROM "' + row + '") TO \'' + path + row + '.csv\' CSV;')
  256. fields.close()
  257. '''
Add Comment
Please, Sign In to add comment