Advertisement
anchormodeling

Data Condensation

Aug 8th, 2019
1,198
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
T-SQL 5.30 KB | None | 0 0
  1. -- 45 years of daily prices
  2. -- https://www.macrotrends.net/2535/coffee-prices-historical-chart-data  (KC)
  3. -- https://www.macrotrends.net/2531/soybean-prices-historical-chart-data (ZS)
  4.  
  5. drop table if exists #timeseries;
  6.  
  7. create table #timeseries (
  8.     Classification char(2),
  9.     Timepoint date,
  10.     Measure money,
  11.     primary key (
  12.         Classification,
  13.         Timepoint desc
  14.     )
  15. );
  16.  
  17. insert into #timeseries (
  18.     Classification,
  19.     Timepoint,
  20.     Measure
  21. )
  22. select 'KC',[date], [price] from Price_KC
  23. union all
  24. select 'ZS',[date], [price] from Price_ZS
  25.  
  26. declare @windowSize int = 3;
  27.  
  28. -- select * from #timeseries;
  29.  
  30. drop table if exists #timeseries_with_mm;
  31.  
  32. select distinct
  33.     series.Classification,
  34.     series.Timepoint,
  35.     series.Measure,
  36.     percentile_cont(0.5) within group (
  37.         order by windowed_measures.Measure
  38.     ) over (
  39.         partition by series.Classification, series.Timepoint
  40.     ) as MovingMedian
  41. into
  42.     #timeseries_with_mm
  43. from
  44.     #timeseries series
  45. cross apply (
  46.     select
  47.         Measure
  48.     from
  49.         #timeseries window
  50.     where
  51.         window.Classification = series.Classification
  52.     and
  53.         window.Timepoint <= series.Timepoint
  54.     order by
  55.         Classification, Timepoint desc
  56.     offset 0 rows
  57.     fetch next @windowSize rows only
  58. ) windowed_measures;
  59.  
  60. alter table #timeseries_with_mm
  61. add primary key (
  62.     Classification,
  63.     Timepoint desc
  64. );
  65.  
  66. -- select * from #timeseries_with_mm where Classification = 'KC' order by Timepoint asc;
  67.  
  68. declare @trendPoints int = 3;
  69.  
  70. drop table if exists #timeseries_with_mm_ma_md;
  71.  
  72. select
  73.     series.Classification,
  74.     series.Timepoint,
  75.     series.Measure,
  76.     series.MovingMedian,
  77.     avg(windowed_measures.MovingMedian) as MovingAverageMovingMedian,
  78.     stdevp(windowed_measures.MovingMedian) as MovingDeviationMovingMedian
  79. into
  80.     #timeseries_with_mm_ma_md
  81. from
  82.     #timeseries_with_mm series
  83. outer apply (
  84.     select
  85.         MovingMedian
  86.     from
  87.         #timeseries_with_mm window
  88.     where
  89.         window.Classification = series.Classification
  90.     and
  91.         window.Timepoint <= series.Timepoint
  92.     order by
  93.         Classification, Timepoint desc
  94.     offset 1 rows -- 3 moving medians offset by 1 requires 6 measures
  95.     fetch next @trendPoints rows only
  96. ) windowed_measures
  97. group by
  98.     series.Classification,
  99.     series.Timepoint,
  100.     series.Measure,
  101.     series.MovingMedian;
  102.  
  103. alter table #timeseries_with_mm_ma_md
  104. add primary key (
  105.     Classification,
  106.     Timepoint desc
  107. );
  108.  
  109. -- accept fluctuations within 3% of the average value
  110. declare @averageComponent float = 0.03;
  111. -- accept fluctuations up to three standard deviations
  112. declare @deviationComponent float = 3.0;
  113.  
  114. drop table if exists Measure_Analysis;
  115.  
  116. select
  117.     Classification,
  118.     Timepoint,
  119.     Measure,
  120.     Trend,
  121.     case
  122.         when outlier.Trend is not null
  123.         then (Measure - MovingMedian) / (Measure + MovingMedian)
  124.     end as Significance,
  125.     margin.Tolerance,
  126.     MovingMedian
  127. into
  128.     Measure_Analysis
  129. from
  130.     #timeseries_with_mm_ma_md
  131. cross apply (
  132.     values (
  133.         @averageComponent * MovingAverageMovingMedian + @deviationComponent * MovingDeviationMovingMedian
  134.     )
  135. ) margin (Tolerance)
  136. cross apply (
  137.     values (
  138.         case
  139.             when Measure < MovingMedian - margin.Tolerance then '-'
  140.             when Measure > MovingMedian + margin.Tolerance then '+'
  141.         end    
  142.     )
  143. ) outlier (Trend)
  144. order by
  145.     Classification,
  146.     Timepoint desc;
  147.  
  148. alter table Measure_Analysis
  149. add primary key (
  150.     Classification,
  151.     Timepoint desc
  152. );
  153.  
  154. drop table if exists Measure_Condensed;
  155.  
  156. select
  157.     Classification,
  158.     Timepoint,
  159.     Measure,
  160.     Trend,
  161.     Significance
  162. into
  163.     Measure_Condensed
  164. from (
  165.     select
  166.         trending_and_following_rows.Classification,
  167.         trending_and_following_rows.Timepoint,
  168.         trending_and_following_rows.Measure,
  169.         trending_and_following_rows.Trend,
  170.         trending_and_following_rows.Significance
  171.     from
  172.         Measure_Analysis analysis
  173.     cross apply (
  174.         select
  175.             Classification,
  176.             Timepoint,
  177.             Measure,
  178.             Trend,
  179.             Significance
  180.         from
  181.             Measure_Analysis window
  182.         where
  183.             window.Classification = analysis.Classification
  184.         and
  185.             window.Timepoint >= analysis.Timepoint
  186.         order by
  187.             Classification,
  188.             Timepoint asc
  189.         offset 0 rows
  190.         fetch next 2 rows only
  191.     ) trending_and_following_rows
  192.     where
  193.         analysis.Trend is not null
  194.     union
  195.     select
  196.         trending_and_preceding_rows.Classification,
  197.         trending_and_preceding_rows.Timepoint,
  198.         trending_and_preceding_rows.Measure,
  199.         trending_and_preceding_rows.Trend,
  200.         trending_and_preceding_rows.Significance
  201.     from
  202.         Measure_Analysis analysis
  203.     cross apply (
  204.         select
  205.             Classification,
  206.             Timepoint,
  207.             Measure,
  208.             Trend,
  209.             Significance
  210.         from
  211.             Measure_Analysis window
  212.         where
  213.             window.Classification = analysis.Classification
  214.         and
  215.             window.Timepoint <= analysis.Timepoint
  216.         order by
  217.             Classification,
  218.             Timepoint desc
  219.         offset 0 rows
  220.         fetch next 2 rows only
  221.     ) trending_and_preceding_rows
  222.     where
  223.         analysis.Trend is not null
  224.     union
  225.     select
  226.         analysis.Classification,
  227.         analysis.Timepoint,
  228.         analysis.Measure,
  229.         analysis.Trend,
  230.         analysis.Significance
  231.     from (
  232.         select
  233.             Classification,
  234.             min(Timepoint) as FirstTimepoint,
  235.             max(Timepoint) as LastTimepoint
  236.         from
  237.             Measure_Analysis
  238.         group by
  239.             Classification     
  240.     ) first_and_last
  241.     join
  242.         Measure_Analysis analysis
  243.     on
  244.         analysis.Classification = first_and_last.Classification
  245.     and
  246.         analysis.Timepoint in (first_and_last.FirstTimepoint, first_and_last.LastTimepoint)
  247. ) condensed;
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement