Guest User

Untitled

a guest
Feb 15th, 2019
252
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 4.59 KB | None | 0 0
  1. from sqlalchemy import create_engine
  2. import cx_Oracle
  3. dsn_tns = "(DESCRIPTION=(ADDRESS=(PROTOCOL=TCP)(HOST=<host>)(PORT=1521))
  4. (CONNECT_DATA=(SERVER=DEDICATED)(SERVICE_NAME=<servicename>)))"
  5. pwd = input('Please type in password:')
  6. engine = create_engine('oracle+cx_oracle://myusername:' + pwd + '@%s' % dsn_tns)
  7. df.to_sql('test_table', engine.connect(), if_exists='replace')
  8.  
  9. id name premium created_date init_p term_number uprate value score group action_reason
  10. 160442353 LDP: Review 1295.619617 2014-01-20 1130.75 1 7 -42 236.328243 6 pass
  11. 164623435 TRU: Referral 453.224880 2014-05-20 0.00 11 NaN -55 38.783290 1 suppress
  12.  
  13. id int64
  14. name object
  15. premium float64
  16. created_date object
  17. init_p float64
  18. term_number float64
  19. uprate float64
  20. value float64
  21. score float64
  22. group int64
  23. action_reason object
  24.  
  25. import pandas as pd
  26. import cx_Oracle
  27. from sqlalchemy import types, create_engine
  28.  
  29. #######################################################
  30. ### DB connection strings config
  31. #######################################################
  32. tns = """
  33. (DESCRIPTION =
  34. (ADDRESS = (PROTOCOL = TCP)(HOST = my-db-scan)(PORT = 1521))
  35. (CONNECT_DATA =
  36. (SERVER = DEDICATED)
  37. (SERVICE_NAME = my_service_name)
  38. )
  39. )
  40. """
  41.  
  42. usr = "test"
  43. pwd = "my_oracle_password"
  44.  
  45. engine = create_engine('oracle+cx_oracle://%s:%s@%s' % (usr, pwd, tns))
  46.  
  47. # sample DF [shape: `(2000, 11)`]
  48. # i took your 2 rows DF and replicated it: `df = pd.concat([df]* 10**3, ignore_index=True)`
  49. df = pd.read_csv('/path/to/file.csv')
  50.  
  51. In [61]: df.shape
  52. Out[61]: (2000, 11)
  53.  
  54. In [62]: df.info()
  55. <class 'pandas.core.frame.DataFrame'>
  56. RangeIndex: 2000 entries, 0 to 1999
  57. Data columns (total 11 columns):
  58. id 2000 non-null int64
  59. name 2000 non-null object
  60. premium 2000 non-null float64
  61. created_date 2000 non-null datetime64[ns]
  62. init_p 2000 non-null float64
  63. term_number 2000 non-null int64
  64. uprate 1000 non-null float64
  65. value 2000 non-null int64
  66. score 2000 non-null float64
  67. group 2000 non-null int64
  68. action_reason 2000 non-null object
  69. dtypes: datetime64[ns](1), float64(4), int64(4), object(2)
  70. memory usage: 172.0+ KB
  71.  
  72. In [57]: df.shape
  73. Out[57]: (2000, 11)
  74.  
  75. In [58]: %timeit -n 1 -r 1 df.to_sql('test_table', engine, index=False, if_exists='replace')
  76. 1 loop, best of 1: 16 s per loop
  77.  
  78. AAA> desc test.test_table
  79. Name Null? Type
  80. ------------------------------- -------- ------------------
  81. ID NUMBER(19)
  82. NAME CLOB # !!!
  83. PREMIUM FLOAT(126)
  84. CREATED_DATE DATE
  85. INIT_P FLOAT(126)
  86. TERM_NUMBER NUMBER(19)
  87. UPRATE FLOAT(126)
  88. VALUE NUMBER(19)
  89. SCORE FLOAT(126)
  90. group NUMBER(19)
  91. ACTION_REASON CLOB # !!!
  92.  
  93. In [59]: dtyp = {c:types.VARCHAR(df[c].str.len().max())
  94. ...: for c in df.columns[df.dtypes == 'object'].tolist()}
  95. ...:
  96.  
  97. In [60]: %timeit -n 1 -r 1 df.to_sql('test_table', engine, index=False, if_exists='replace', dtype=dtyp)
  98. 1 loop, best of 1: 335 ms per loop
  99.  
  100. AAA> desc test.test_table
  101. Name Null? Type
  102. ----------------------------- -------- ---------------------
  103. ID NUMBER(19)
  104. NAME VARCHAR2(13 CHAR) # !!!
  105. PREMIUM FLOAT(126)
  106. CREATED_DATE DATE
  107. INIT_P FLOAT(126)
  108. TERM_NUMBER NUMBER(19)
  109. UPRATE FLOAT(126)
  110. VALUE NUMBER(19)
  111. SCORE FLOAT(126)
  112. group NUMBER(19)
  113. ACTION_REASON VARCHAR2(8 CHAR) # !!!
  114.  
  115. In [69]: df.shape
  116. Out[69]: (200000, 11)
  117.  
  118. In [70]: %timeit -n 1 -r 1 df.to_sql('test_table', engine, index=False, if_exists='replace', dtype=dtyp, chunksize=10**4)
  119. 1 loop, best of 1: 4.68 s per loop
  120.  
  121. dtyp = {c:types.VARCHAR(df[c].str.len().max())
  122. for c in df.columns[df.dtypes == 'object'].tolist()}
  123.  
  124. df.to_sql(..., dtype=dtyp)
  125.  
  126. from sqlalchemy import types, create_engine
Add Comment
Please, Sign In to add comment