Advertisement
Guest User

Untitled

a guest
Jul 22nd, 2019
182
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 7.03 KB | None | 0 0
  1. import numpy as np
  2. import pandas as pd # Importamos pandas bajo el pseudonimo
  3.  
  4. # Rango de fechas 8 dias
  5. fechas = pd.date_range('27/2/2019', periods=8)
  6. # Creo un dataframe
  7. df = pd.DataFrame(np.random.random((8, 4)), index=fechas,
  8. columns=['A', 'B', 'C', 'D'])
  9.  
  10. # Extraigo una Serie del dataframe
  11. s = df['A']
  12. # Selecciono una de las fechas de mi lista
  13. # Que defini como indice
  14. print(s[fechas[1]])
  15. # 0.9948947196784879
  16.  
  17. # Si queremos intercambiar columnas
  18. df[['B', 'A']] = df[['A', 'B']] # No se debe hacer aunque funcione
  19. # Forma correcta
  20. df.loc[:, ['B', 'A']] = df[['A', 'B']].to_numpy()
  21. print(df[['A', 'B']])
  22. # A B
  23. # 2019-02-27 0.319319 0.671448
  24. # 2019-02-28 0.405093 0.960463
  25. # 2019-03-01 0.542734 0.261184
  26. # 2019-03-02 0.172890 0.700732
  27. # 2019-03-03 0.665918 0.585571
  28. # 2019-03-04 0.459141 0.068089
  29. # 2019-03-05 0.447317 0.493405
  30. # 2019-03-06 0.661500 0.877922
  31. # Anotacion Location Choices
  32.  
  33. # Series
  34. sa = pd.Series([1, 2, 3], index=list('abc'))
  35. # Seleccionar Indices Rapido
  36. print(sa.b)
  37. # 2
  38.  
  39. # Seleccionar columnas de un dataset rapido
  40. print(df.A)
  41. # 2019-02-27 0.319319
  42. # 2019-02-28 0.405093
  43. # 2019-03-01 0.542734
  44. # 2019-03-02 0.172890
  45. # 2019-03-03 0.665918
  46. # 2019-03-04 0.459141
  47. # 2019-03-05 0.447317
  48. # 2019-03-06 0.661500
  49. # Freq: D, Name: A, dtype: float64
  50.  
  51. # Reasignar un indice
  52. sa.a = 5
  53. print(sa)
  54. # a 5
  55. # b 2
  56. # c 3
  57. # dtype: int64
  58.  
  59. # Solo si existe
  60. df.A = list(range(len(df)))
  61. print(df)
  62. # A B C D
  63. # 2019-02-27 0 0.671448 0.161066 0.059666
  64. # 2019-02-28 1 0.960463 0.822715 0.814657
  65. # 2019-03-01 2 0.261184 0.885155 0.660747
  66. # 2019-03-02 3 0.700732 0.935873 0.901624
  67. # 2019-03-03 4 0.585571 0.983047 0.062070
  68. # 2019-03-04 5 0.068089 0.737237 0.233875
  69. # 2019-03-05 6 0.493405 0.226257 0.809233
  70. # 2019-03-06 7 0.877922 0.786466 0.182065
  71.  
  72. # Si no existe
  73. df.X = list(range(len(df))) # Lanza Error
  74. # Pandas doesn't allow columns to be created via a new attribute name
  75. df['X'] = list(range(len(df))) # Forma correcta
  76. df
  77. # Asignar una fila
  78. df.iloc[1] = [1, 2, 3, 4]
  79. print(df)
  80. # A B C D
  81. # 2019-02-27 0 0.671448 0.161066 0.0596664
  82. # 2019-02-28 1 2 3 4
  83. # 2019-03-01 2 0.261184 0.885155 0.660747
  84. # 2019-03-02 3 0.700732 0.935873 0.901624
  85. # 2019-03-03 4 0.585571 0.983047 0.0620701
  86. # 2019-03-04 5 0.0680889 0.737237 0.233875
  87. # 2019-03-05 6 0.493405 0.226257 0.809233
  88. # 2019-03-06 7 0.877922 0.786466 0.182065
  89.  
  90. # Slices
  91. # Series
  92. s[:2] # Primeros 2 Elementos
  93. # 2019-02-27 0.686824
  94. # 2019-02-28 0.868448
  95. # Freq: D, Name: A, dtype: float64
  96.  
  97. s[::2] # Todos los elementos con paso 2
  98. # 2019-02-27 0.686824
  99. # 2019-03-01 0.199276
  100. # 2019-03-03 0.805328
  101. # 2019-03-05 0.006065
  102. # Freq: 2D, Name: A, dtype: float64
  103.  
  104. s[::-1] # Reversa de indices
  105. # 2019-03-06 0.764515
  106. # 2019-03-05 0.006065
  107. # 2019-03-04 0.120355
  108. # 2019-03-03 0.805328
  109. # 2019-03-02 0.966972
  110. # 2019-03-01 0.199276
  111. # 2019-02-28 0.868448
  112. # 2019-02-27 0.686824
  113. # Freq: -1D, Name: A, dtype: float64
  114.  
  115. s[5::-1] # ultimos 6 elementos al revez
  116. # 2019-03-04 0.120355
  117. # 2019-03-03 0.805328
  118. # 2019-03-02 0.966972
  119. # 2019-03-01 0.199276
  120. # 2019-02-28 0.868448
  121. # 2019-02-27 0.686824
  122. # Freq: -1D, Name: A, dtype: float64
  123.  
  124. s2 = s.copy() # Pasa lo mismo que numpy con la memoria
  125. s2[:5] = 0
  126. print(s2)
  127. # 2019-02-27 0.000000
  128. # 2019-02-28 0.000000
  129. # 2019-03-01 0.000000
  130. # 2019-03-02 0.000000
  131. # 2019-03-03 0.000000
  132. # 2019-03-04 0.120355
  133. # 2019-03-05 0.006065
  134. # 2019-03-06 0.764515
  135. # Freq: D, Name: A, dtype: float64
  136.  
  137. # Dataframe
  138. df[:3] # Primeros 3 elementos
  139. # A B C D
  140. # 2019-02-27 0 0.671448 0.161066 0.0596664
  141. # 2019-02-28 1 2 3 4
  142. # 2019-03-01 2 0.261184 0.885155 0.660747
  143.  
  144. df[2::-1] # Ultimos 3 elementos al revez
  145. # A B C D
  146. # 2019-03-01 2 0.261184 0.885155 0.660747
  147. # 2019-02-28 1 2 3 4
  148. # 2019-02-27 0 0.671448 0.161066 0.0596664-
  149.  
  150. # Advertencia
  151. df.loc[2:3] # No se pueden usar Slice con loc
  152. # TypeError: cannot do slice indexing on
  153.  
  154. # Anot3
  155. s1 = pd.Series(np.random.randn(6), index=list('abcdef'))
  156. print(s1[3:])
  157. # d -0.047107
  158. # e -0.180345
  159. # f 0.334778
  160. # dtype: float64
  161.  
  162. s1.loc['d':]
  163. # d -0.047107
  164. # e -0.180345
  165. # f 0.334778
  166. # dtype: float64
  167.  
  168. s1.loc['b']
  169. # 0.8256215434880477
  170.  
  171. # Seleccionar filas del dataframe
  172. df.index = list('abcedfgh') # Reasignar indice
  173. df.loc[['a', 'e', 'f'], :] # Todas las columnas de los indices a e f
  174. # A B C D
  175. # a 0 0.671448 0.161066 0.0596664
  176. # e 3 0.700732 0.935873 0.901624
  177. # f 5 0.0680889 0.737237 0.233875
  178.  
  179. df.loc['a':'c', 'A':'B'] # Filas: a hasta c, columnas: 'A' hasta 'B'
  180. # A B
  181. # a 0 0.671448
  182. # b 1 2
  183. # c 2 0.261184
  184.  
  185. df.loc['a']
  186. # A 0
  187. # B 0.671448
  188. # C 0.161066
  189. # D 0.0596664
  190. # Name: a, dtype: object
  191.  
  192. # Arrays de booleanos
  193. df.loc['a'] > 0.1
  194. # Selecciona todas las columnas donde a sea mayor a 0.1
  195. # A False
  196. # B True
  197. # C True
  198. # D False
  199. # Name: a, dtype: bool
  200.  
  201. df.loc[:, df.loc['a'] > 0.5] # Queries
  202. # a 0.671448
  203. # b 2
  204.  
  205. df.loc['a', 'A'] # Indice a de la columna A
  206. # 0
  207.  
  208. # Con indices
  209. s1 = pd.Series(np.random.randint(5), index=list(range(0, 10, 2)))
  210. print(s1)
  211. # 0 2
  212. # 2 2
  213. # 4 2
  214. # 6 2
  215. # 8 2
  216. # dtype: int64
  217.  
  218. s1.iloc[:3] # Primeros 3 elementos de una serie
  219. # 0 2
  220. # 2 2
  221. # 4 2
  222. # dtype: int64
  223.  
  224. df.iloc[:3] # Primeros 3 elementos de un dataframe
  225. # A B C D
  226. # a 0 0.671448 0.161066 0.0596664
  227. # b 1 2 3 4
  228. # c 2 0.261184 0.885155 0.660747
  229.  
  230. df.iloc[:3, :2] # Primeras 3 filas y primeras 2 columnas
  231. # A B
  232. # a 0 0.671448
  233. # b 1 2
  234. # c 2 0.261184
  235.  
  236. df.iloc[[1, 4, 5], [1, 2]] # Segundo, Quito, Sexto elemento
  237. # De segunda y tercera columna
  238.  
  239. # B C
  240. # b 2 3
  241. # d 0.585571 0.983047
  242. # f 0.0680889 0.737237
  243.  
  244. df.iloc[1:3, :] # Todas las columnas del segundo al cuarto indice
  245. # A B C D
  246. # b 1 2 3 4
  247. # c 2 0.261184 0.885155 0.660747
  248.  
  249. df.head().iloc[:, 1:3] # Todos los indices
  250. # De la segunda a cuarta columna
  251.  
  252. # B C
  253. # a 0.671448 0.161066
  254. # b 2 3
  255. # c 0.261184 0.885155
  256. # e 0.700732 0.935873
  257. # d 0.585571 0.983047
  258.  
  259. # Querys
  260. # SQL
  261. # SELECT FROM df WHERE B>1
  262. df.loc[lambda df: df.B > 1, :]
  263. # A B C D
  264. # b 1 2 3 4
  265.  
  266. # SQL
  267. # SELECT A, B FROM df
  268. df.loc[:, lambda df: ['A', 'B']]
  269. # A B
  270. # a 0 0.671448
  271. # b 1 2
  272. # c 2 0.261184
  273. # e 3 0.700732
  274. # d 4 0.585571
  275. # f 5 0.0680889
  276. # g 6 0.493405
  277. # h 7 0.877922
  278.  
  279. # SELECT A, B FROM df
  280. df.iloc[:, lambda df: [0, 1]]
  281. # A B
  282. # a 0 0.671448
  283. # b 1 2
  284. # c 2 0.261184
  285. # e 3 0.700732
  286. # d 4 0.585571
  287. # f 5 0.0680889
  288. # g 6 0.493405
  289. # h 7 0.877922
  290.  
  291. # SELECT A FROM df
  292. df[lambda df: df.columns[0]]
  293. # a 0
  294. # b 1
  295. # c 2
  296. # e 3
  297. # d 4
  298. # f 5
  299. # g 6
  300. # h 7
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement