Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- from sqlalchemy import create_engine
- import cx_Oracle
- dsn_tns = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=<host>)(PORT=1521))
- (CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=<servicename>)))"
- pwd = input('Please type in password:')
- engine = create_engine('oracle+cx_oracle://myusername:' + pwd + '@%s' % dsn_tns)
- df.to_sql('test_table', engine.connect(), if_exists='replace')
- id name premium created_date init_p term_number uprate value score group action_reason
- 160442353 LDP: Review 1295.619617 2014-01-20 1130.75 1 7 -42 236.328243 6 pass
- 164623435 TRU: Referral 453.224880 2014-05-20 0.00 11 NaN -55 38.783290 1 suppress
- id int64
- name object
- premium float64
- created_date object
- init_p float64
- term_number float64
- uprate float64
- value float64
- score float64
- group int64
- action_reason object
- import pandas as pd
- import cx_Oracle
- from sqlalchemy import types, create_engine
- #######################################################
- ### DB connection strings config
- #######################################################
- tns = """
- (DESCRIPTION =
- (ADDRESS = (PROTOCOL = TCP)(HOST = my-db-scan)(PORT = 1521))
- (CONNECT_DATA =
- (SERVER = DEDICATED)
- (SERVICE_NAME = my_service_name)
- )
- )
- """
- usr = "test"
- pwd = "my_oracle_password"
- engine = create_engine('oracle+cx_oracle://%s:%s@%s' % (usr, pwd, tns))
- # sample DF [shape: `(2000, 11)`]
- # i took your 2 rows DF and replicated it: `df = pd.concat([df]* 10**3, ignore_index=True)`
- df = pd.read_csv('/path/to/file.csv')
- In [61]: df.shape
- Out[61]: (2000, 11)
- In [62]: df.info()
- <class 'pandas.core.frame.DataFrame'>
- RangeIndex: 2000 entries, 0 to 1999
- Data columns (total 11 columns):
- id 2000 non-null int64
- name 2000 non-null object
- premium 2000 non-null float64
- created_date 2000 non-null datetime64[ns]
- init_p 2000 non-null float64
- term_number 2000 non-null int64
- uprate 1000 non-null float64
- value 2000 non-null int64
- score 2000 non-null float64
- group 2000 non-null int64
- action_reason 2000 non-null object
- dtypes: datetime64[ns](1), float64(4), int64(4), object(2)
- memory usage: 172.0+ KB
- In [57]: df.shape
- Out[57]: (2000, 11)
- In [58]: %timeit -n 1 -r 1 df.to_sql('test_table', engine, index=False, if_exists='replace')
- 1 loop, best of 1: 16 s per loop
- AAA> desc test.test_table
- Name Null? Type
- ------------------------------- -------- ------------------
- ID NUMBER(19)
- NAME CLOB # !!!
- PREMIUM FLOAT(126)
- CREATED_DATE DATE
- INIT_P FLOAT(126)
- TERM_NUMBER NUMBER(19)
- UPRATE FLOAT(126)
- VALUE NUMBER(19)
- SCORE FLOAT(126)
- group NUMBER(19)
- ACTION_REASON CLOB # !!!
- In [59]: dtyp = {c:types.VARCHAR(df[c].str.len().max())
- ...: for c in df.columns[df.dtypes == 'object'].tolist()}
- ...:
- In [60]: %timeit -n 1 -r 1 df.to_sql('test_table', engine, index=False, if_exists='replace', dtype=dtyp)
- 1 loop, best of 1: 335 ms per loop
- AAA> desc test.test_table
- Name Null? Type
- ----------------------------- -------- ---------------------
- ID NUMBER(19)
- NAME VARCHAR2(13 CHAR) # !!!
- PREMIUM FLOAT(126)
- CREATED_DATE DATE
- INIT_P FLOAT(126)
- TERM_NUMBER NUMBER(19)
- UPRATE FLOAT(126)
- VALUE NUMBER(19)
- SCORE FLOAT(126)
- group NUMBER(19)
- ACTION_REASON VARCHAR2(8 CHAR) # !!!
- In [69]: df.shape
- Out[69]: (200000, 11)
- In [70]: %timeit -n 1 -r 1 df.to_sql('test_table', engine, index=False, if_exists='replace', dtype=dtyp, chunksize=10**4)
- 1 loop, best of 1: 4.68 s per loop
- dtyp = {c:types.VARCHAR(df[c].str.len().max())
- for c in df.columns[df.dtypes == 'object'].tolist()}
- df.to_sql(..., dtype=dtyp)
- from sqlalchemy import types, create_engine
Add Comment
Please, Sign In to add comment