Insac

Independent sampling

Nov 8th, 2016
272
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
SQL 1.00 KB | None | 0 0
  1. -- the test is on INNER JOIN to better clarify the impact of independently sample the data
  2.  
  3. CREATE volatile TABLE t1 (
  4. col  INT
  5. )
  6. PRIMARY INDEX (col)
  7. ON commit preserve ROWS;
  8.  
  9. CREATE volatile TABLE t2 (
  10. col  INT
  11. )
  12. PRIMARY INDEX (col)
  13. ON commit preserve ROWS;
  14.  
  15. INSERT INTO t1 (1);
  16. INSERT INTO t1 (2);
  17. INSERT INTO t1 (3);
  18. INSERT INTO t1 (4);
  19. INSERT INTO t1 (5);
  20.  
  21. INSERT INTO t2 SELECT col FROM t1;
  22.  
  23. -- it always return one row
  24. SELECT a.col FROM t1  a JOIN t2  b  ON a.col=b.col sample 1;
  25.  
  26. -- here I first tried to do the testcase with a single table
  27. -- it always return one row (it surprised me, but I'd assume it is an optimization to sample the same set from the same table)
  28. SELECT a.col FROM (SELECT * FROM t1 sample 1) a JOIN (SELECT * FROM t1 sample 1) b  ON a.col=b.col;
  29.  
  30. -- here we use two different tables, and the sample is working independently
  31. -- it doesn't return always a match
  32. SELECT a.col FROM (SELECT * FROM t1 sample 1) a JOIN (SELECT * FROM t2 sample 1) b  ON a.col=b.col;
Advertisement
Add Comment
Please, Sign In to add comment