anchormodeling

Data Condensation

Aug 8th, 2019
829
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. -- 45 years of daily prices
  2. -- https://www.macrotrends.net/2535/coffee-prices-historical-chart-data  (KC)
  3. -- https://www.macrotrends.net/2531/soybean-prices-historical-chart-data (ZS)
  4.  
  5. drop table if exists #timeseries;
  6.  
  7. create table #timeseries (
  8.     Classification char(2),
  9.     Timepoint date,
  10.     Measure money,
  11.     primary key (
  12.         Classification,
  13.         Timepoint desc
  14.     )
  15. );
  16.  
  17. insert into #timeseries (
  18.     Classification,
  19.     Timepoint,
  20.     Measure
  21. )
  22. select 'KC',[date], [price] from Price_KC
  23. union all
  24. select 'ZS',[date], [price] from Price_ZS
  25.  
  26. declare @windowSize int = 3;
  27.  
  28. -- select * from #timeseries;
  29.  
  30. drop table if exists #timeseries_with_mm;
  31.  
  32. select distinct
  33.     series.Classification,
  34.     series.Timepoint,
  35.     series.Measure,
  36.     percentile_cont(0.5) within group (
  37.         order by windowed_measures.Measure
  38.     ) over (
  39.         partition by series.Classification, series.Timepoint
  40.     ) as MovingMedian
  41. into
  42.     #timeseries_with_mm
  43. from
  44.     #timeseries series
  45. cross apply (
  46.     select
  47.         Measure
  48.     from
  49.         #timeseries window
  50.     where
  51.         window.Classification = series.Classification
  52.     and
  53.         window.Timepoint <= series.Timepoint
  54.     order by
  55.         Classification, Timepoint desc
  56.     offset 0 rows
  57.     fetch next @windowSize rows only
  58. ) windowed_measures;
  59.  
  60. alter table #timeseries_with_mm
  61. add primary key (
  62.     Classification,
  63.     Timepoint desc
  64. );
  65.  
  66. -- select * from #timeseries_with_mm where Classification = 'KC' order by Timepoint asc;
  67.  
  68. declare @trendPoints int = 3;
  69.  
  70. drop table if exists #timeseries_with_mm_ma_md;
  71.  
  72. select
  73.     series.Classification,
  74.     series.Timepoint,
  75.     series.Measure,
  76.     series.MovingMedian,
  77.     avg(windowed_measures.MovingMedian) as MovingAverageMovingMedian,
  78.     stdevp(windowed_measures.MovingMedian) as MovingDeviationMovingMedian
  79. into
  80.     #timeseries_with_mm_ma_md
  81. from
  82.     #timeseries_with_mm series
  83. outer apply (
  84.     select
  85.         MovingMedian
  86.     from
  87.         #timeseries_with_mm window
  88.     where
  89.         window.Classification = series.Classification
  90.     and
  91.         window.Timepoint <= series.Timepoint
  92.     order by
  93.         Classification, Timepoint desc
  94.     offset 1 rows -- 3 moving medians offset by 1 requires 6 measures
  95.     fetch next @trendPoints rows only
  96. ) windowed_measures
  97. group by
  98.     series.Classification,
  99.     series.Timepoint,
  100.     series.Measure,
  101.     series.MovingMedian;
  102.  
  103. alter table #timeseries_with_mm_ma_md
  104. add primary key (
  105.     Classification,
  106.     Timepoint desc
  107. );
  108.  
  109. -- accept fluctuations within 3% of the average value
  110. declare @averageComponent float = 0.03;
  111. -- accept fluctuations up to three standard deviations
  112. declare @deviationComponent float = 3.0;
  113.  
  114. drop table if exists Measure_Analysis;
  115.  
  116. select
  117.     Classification,
  118.     Timepoint,
  119.     Measure,
  120.     Trend,
  121.     case
  122.         when outlier.Trend is not null
  123.         then (Measure - MovingMedian) / (Measure + MovingMedian)
  124.     end as Significance,
  125.     margin.Tolerance,
  126.     MovingMedian
  127. into
  128.     Measure_Analysis
  129. from
  130.     #timeseries_with_mm_ma_md
  131. cross apply (
  132.     values (
  133.         @averageComponent * MovingAverageMovingMedian + @deviationComponent * MovingDeviationMovingMedian
  134.     )
  135. ) margin (Tolerance)
  136. cross apply (
  137.     values (
  138.         case
  139.             when Measure < MovingMedian - margin.Tolerance then '-'
  140.             when Measure > MovingMedian + margin.Tolerance then '+'
  141.         end    
  142.     )
  143. ) outlier (Trend)
  144. order by
  145.     Classification,
  146.     Timepoint desc;
  147.  
  148. alter table Measure_Analysis
  149. add primary key (
  150.     Classification,
  151.     Timepoint desc
  152. );
  153.  
  154. drop table if exists Measure_Condensed;
  155.  
  156. select
  157.     Classification,
  158.     Timepoint,
  159.     Measure,
  160.     Trend,
  161.     Significance
  162. into
  163.     Measure_Condensed
  164. from (
  165.     select
  166.         trending_and_following_rows.Classification,
  167.         trending_and_following_rows.Timepoint,
  168.         trending_and_following_rows.Measure,
  169.         trending_and_following_rows.Trend,
  170.         trending_and_following_rows.Significance
  171.     from
  172.         Measure_Analysis analysis
  173.     cross apply (
  174.         select
  175.             Classification,
  176.             Timepoint,
  177.             Measure,
  178.             Trend,
  179.             Significance
  180.         from
  181.             Measure_Analysis window
  182.         where
  183.             window.Classification = analysis.Classification
  184.         and
  185.             window.Timepoint >= analysis.Timepoint
  186.         order by
  187.             Classification,
  188.             Timepoint asc
  189.         offset 0 rows
  190.         fetch next 2 rows only
  191.     ) trending_and_following_rows
  192.     where
  193.         analysis.Trend is not null
  194.     union
  195.     select
  196.         trending_and_preceding_rows.Classification,
  197.         trending_and_preceding_rows.Timepoint,
  198.         trending_and_preceding_rows.Measure,
  199.         trending_and_preceding_rows.Trend,
  200.         trending_and_preceding_rows.Significance
  201.     from
  202.         Measure_Analysis analysis
  203.     cross apply (
  204.         select
  205.             Classification,
  206.             Timepoint,
  207.             Measure,
  208.             Trend,
  209.             Significance
  210.         from
  211.             Measure_Analysis window
  212.         where
  213.             window.Classification = analysis.Classification
  214.         and
  215.             window.Timepoint <= analysis.Timepoint
  216.         order by
  217.             Classification,
  218.             Timepoint desc
  219.         offset 0 rows
  220.         fetch next 2 rows only
  221.     ) trending_and_preceding_rows
  222.     where
  223.         analysis.Trend is not null
  224.     union
  225.     select
  226.         analysis.Classification,
  227.         analysis.Timepoint,
  228.         analysis.Measure,
  229.         analysis.Trend,
  230.         analysis.Significance
  231.     from (
  232.         select
  233.             Classification,
  234.             min(Timepoint) as FirstTimepoint,
  235.             max(Timepoint) as LastTimepoint
  236.         from
  237.             Measure_Analysis
  238.         group by
  239.             Classification     
  240.     ) first_and_last
  241.     join
  242.         Measure_Analysis analysis
  243.     on
  244.         analysis.Classification = first_and_last.Classification
  245.     and
  246.         analysis.Timepoint in (first_and_last.FirstTimepoint, first_and_last.LastTimepoint)
  247. ) condensed;
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×