Advertisement
Guest User

Untitled

a guest
Apr 23rd, 2014
33
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 5.55 KB | None | 0 0
  1. >>> df['profile']
  2. date
  3. 2015-01-01 00:00:00 3.000000
  4. 2015-01-01 01:00:00 3.000143
  5. 2015-01-01 02:00:00 3.000287
  6. 2015-01-01 03:00:00 3.000430
  7. 2015-01-01 04:00:00 3.000574
  8. ...
  9. 2015-12-31 20:00:00 2.999426
  10. 2015-12-31 21:00:00 2.999570
  11. 2015-12-31 22:00:00 2.999713
  12. 2015-12-31 23:00:00 2.999857
  13. Freq: H, Name: profile, Length: 8760
  14.  
  15. ### Deviation on monthly basis
  16. >>> dev_monthly = np.random.uniform(0.5, 1.5, len(df['profile'].groupby(df.index.month).aggregate(np.sum)))
  17.  
  18.  
  19. >>> df['profile_monthly'] = (df['profile'].groupby(df.index.month).aggregate(np.sum) * dev_monthly).reindex(df)
  20.  
  21. >>> df['profile_monthly']
  22. date
  23. 2015-01-01 00:00:00 NaN
  24. 2015-01-01 01:00:00 NaN
  25. 2015-01-01 02:00:00 NaN
  26. ...
  27. 2015-12-31 22:00:00 NaN
  28. 2015-12-31 23:00:00 NaN
  29. Freq: H, Name: profile_monthly, Length: 8760
  30.  
  31. In [105]: df = DataFrame({'profile': normal(3, 0.1, size=10000)}, pd.date_range(start='2015-01-
  32. 01', freq='H', periods=10000))
  33.  
  34. In [106]: df['profile_monthly'] = df.profile.resample('M', how='sum')
  35.  
  36. In [107]: df
  37. Out[107]:
  38. profile profile_monthly
  39. 2015-01-01 00:00:00 2.8328 NaN
  40. 2015-01-01 01:00:00 3.0607 NaN
  41. 2015-01-01 02:00:00 3.0138 NaN
  42. 2015-01-01 03:00:00 3.0402 NaN
  43. 2015-01-01 04:00:00 3.0335 NaN
  44. 2015-01-01 05:00:00 3.0087 NaN
  45. 2015-01-01 06:00:00 3.0557 NaN
  46. 2015-01-01 07:00:00 2.9280 NaN
  47. 2015-01-01 08:00:00 3.1359 NaN
  48. 2015-01-01 09:00:00 2.9681 NaN
  49. 2015-01-01 10:00:00 3.1240 NaN
  50. 2015-01-01 11:00:00 3.0635 NaN
  51. 2015-01-01 12:00:00 2.9206 NaN
  52. 2015-01-01 13:00:00 3.0714 NaN
  53. 2015-01-01 14:00:00 3.0688 NaN
  54. 2015-01-01 15:00:00 3.0703 NaN
  55. 2015-01-01 16:00:00 2.9102 NaN
  56. 2015-01-01 17:00:00 2.9368 NaN
  57. 2015-01-01 18:00:00 3.0864 NaN
  58. 2015-01-01 19:00:00 3.2124 NaN
  59. 2015-01-01 20:00:00 2.8988 NaN
  60. 2015-01-01 21:00:00 3.0659 NaN
  61. 2015-01-01 22:00:00 2.7973 NaN
  62. 2015-01-01 23:00:00 3.0824 NaN
  63. 2015-01-02 00:00:00 3.0199 NaN
  64. ... ...
  65.  
  66. [10000 rows x 2 columns]
  67.  
  68. In [108]: df.dropna()
  69. Out[108]:
  70. profile profile_monthly
  71. 2015-01-31 2.9769 2230.9931
  72. 2015-02-28 2.9930 2016.1045
  73. 2015-03-31 2.7817 2232.4096
  74. 2015-04-30 3.1695 2158.7834
  75. 2015-05-31 2.9040 2236.5962
  76. 2015-06-30 2.8697 2162.7784
  77. 2015-07-31 2.9278 2231.7232
  78. 2015-08-31 2.8289 2236.4603
  79. 2015-09-30 3.0368 2163.5916
  80. 2015-10-31 3.1517 2233.2285
  81. 2015-11-30 3.0450 2158.6998
  82. 2015-12-31 2.8261 2228.5550
  83. 2016-01-31 3.0264 2229.2221
  84.  
  85. [13 rows x 2 columns]
  86.  
  87. In [110]: df.fillna(method='bfill')
  88. Out[110]:
  89. profile profile_monthly
  90. 2015-01-01 00:00:00 2.8328 2230.9931
  91. 2015-01-01 01:00:00 3.0607 2230.9931
  92. 2015-01-01 02:00:00 3.0138 2230.9931
  93. 2015-01-01 03:00:00 3.0402 2230.9931
  94. 2015-01-01 04:00:00 3.0335 2230.9931
  95. 2015-01-01 05:00:00 3.0087 2230.9931
  96. 2015-01-01 06:00:00 3.0557 2230.9931
  97. 2015-01-01 07:00:00 2.9280 2230.9931
  98. 2015-01-01 08:00:00 3.1359 2230.9931
  99. 2015-01-01 09:00:00 2.9681 2230.9931
  100. 2015-01-01 10:00:00 3.1240 2230.9931
  101. 2015-01-01 11:00:00 3.0635 2230.9931
  102. 2015-01-01 12:00:00 2.9206 2230.9931
  103. 2015-01-01 13:00:00 3.0714 2230.9931
  104. 2015-01-01 14:00:00 3.0688 2230.9931
  105. 2015-01-01 15:00:00 3.0703 2230.9931
  106. 2015-01-01 16:00:00 2.9102 2230.9931
  107. 2015-01-01 17:00:00 2.9368 2230.9931
  108. 2015-01-01 18:00:00 3.0864 2230.9931
  109. 2015-01-01 19:00:00 3.2124 2230.9931
  110. 2015-01-01 20:00:00 2.8988 2230.9931
  111. 2015-01-01 21:00:00 3.0659 2230.9931
  112. 2015-01-01 22:00:00 2.7973 2230.9931
  113. 2015-01-01 23:00:00 3.0824 2230.9931
  114. 2015-01-02 00:00:00 3.0199 2230.9931
  115. ... ...
  116.  
  117. [10000 rows x 2 columns]
  118.  
  119. >>> df.fillna(method='bfill')[np.logical_and(df.index.month==12, df.index.day==31)]
  120. profile profile_monthly
  121. 2015-12-31 00:00:00 2.926504 2232.288997
  122. 2015-12-31 01:00:00 3.008543 2234.470731
  123. 2015-12-31 02:00:00 2.930133 2234.470731
  124. 2015-12-31 03:00:00 3.078552 2234.470731
  125. 2015-12-31 04:00:00 3.141578 2234.470731
  126. 2015-12-31 05:00:00 3.061820 2234.470731
  127. 2015-12-31 06:00:00 2.981626 2234.470731
  128. 2015-12-31 07:00:00 3.010749 2234.470731
  129. 2015-12-31 08:00:00 2.878577 2234.470731
  130. 2015-12-31 09:00:00 2.915487 2234.470731
  131. 2015-12-31 10:00:00 3.072721 2234.470731
  132. 2015-12-31 11:00:00 3.087866 2234.470731
  133. 2015-12-31 12:00:00 3.089208 2234.470731
  134. 2015-12-31 13:00:00 2.957047 2234.470731
  135. 2015-12-31 14:00:00 3.002072 2234.470731
  136. 2015-12-31 15:00:00 3.106656 2234.470731
  137. 2015-12-31 16:00:00 3.100891 2234.470731
  138. 2015-12-31 17:00:00 3.077835 2234.470731
  139. 2015-12-31 18:00:00 3.032497 2234.470731
  140. 2015-12-31 19:00:00 2.959838 2234.470731
  141. 2015-12-31 20:00:00 2.878819 2234.470731
  142. 2015-12-31 21:00:00 3.041171 2234.470731
  143. 2015-12-31 22:00:00 3.061970 2234.470731
  144. 2015-12-31 23:00:00 3.019011 2234.470731
  145.  
  146. [24 rows x 2 columns]
  147.  
  148. >>> AA = df.groupby((df.index.year, df.index.month)).aggregate(np.mean)
  149. >>> AA['dev'] = np.random.randn(0,1,len(AA))
  150. >>> df['dev'] = AA.ix[zip(df.index.year, df.index.month)]['dev'].values
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement