Guest User

Untitled

a guest
Nov 19th, 2018
92
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 2.38 KB | None | 0 0
  1. -- 4.1.1 ユーザーの評価回数平均 --
  2. td query -w -d book_crossing_dataset "
  3. SELECT ROUND(AVG(cnt)) AS avg
  4. FROM
  5. (
  6. SELECT user_id, COUNT(book_rating) AS cnt
  7. FROM ratings
  8. GROUP BY user_id
  9. ) t1
  10. "
  11.  
  12. -- 4.1.2 ユーザーの評価回数分布 --
  13. td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_count.csv "
  14. SELECT cnt, user_cnt, ROUND(user_cnt/total_user_cnt*1000)/10 AS rate
  15. FROM
  16. (
  17. SELECT cnt, COUNT(*) AS user_cnt, 1 AS one
  18. FROM
  19. (
  20. SELECT user_id, COUNT(book_rating) AS cnt
  21. FROM ratings
  22. GROUP BY user_id
  23. ) t1
  24. GROUP BY cnt
  25. ORDER BY cnt
  26. LIMIT 50
  27. ) o1
  28. JOIN
  29. (
  30. SELECT COUNT(distinct user_id) AS total_user_cnt, 1 AS one
  31. FROM ratings
  32. ) o2
  33. ON
  34. (o1.one=o2.one)
  35. "
  36.  
  37. -- 4.2 ユーザーの評価平均分布 (0<book_rating, 5<=count(book_rating)) --
  38. td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_avg.csv "
  39. SELECT avg, COUNT(*)
  40. FROM
  41. (
  42. SELECT user_id, ROUND(AVG(book_rating)/0.5)*0.5 AS avg, COUNT(book_rating) AS cnt
  43. FROM ratings
  44. WHERE 0 < book_rating
  45. GROUP BY user_id
  46. HAVING 5 <= COUNT(book_rating)
  47. ) t1
  48. GROUP BY avg
  49. ORDER BY avg
  50. "
  51.  
  52. -- 4.3 ユーザーの年代別×評価平均分布 (0<book_rating, 5<=count(book_rating)) --
  53. td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_avg_by_generation.csv "
  54. SELECT o1.generation, avg, ROUND(cnt/total_cnt*1000)/10 AS rate
  55. FROM
  56. (
  57. SELECT generation, avg, COUNT(*) AS cnt
  58. FROM
  59. (
  60. SELECT user_id, ROUND(AVG(book_rating)/0.5)*0.5 AS avg, COUNT(book_rating) AS cnt
  61. FROM ratings
  62. WHERE 0 < book_rating
  63. GROUP BY user_id
  64. HAVING 5 <= COUNT(book_rating)
  65. ) t1
  66. JOIN
  67. (
  68. SELECT user_id, CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation
  69. FROM users
  70. WHERE 10 < IF(age='NULL',0,CAST(age AS INT))
  71. AND IF(age='NULL',0,CAST(age AS INT)) <= 65
  72. ) t2
  73. ON
  74. (t1.user_id=t2.user_id)
  75. GROUP BY generation,avg
  76. ) o1
  77. JOIN
  78. (
  79. SELECT generation, COUNT(*) AS total_cnt
  80. FROM
  81. (
  82. SELECT user_id, COUNT(book_rating)
  83. FROM ratings
  84. WHERE 0 < book_rating
  85. GROUP BY user_id
  86. HAVING 5 <= COUNT(book_rating)
  87. ) t1
  88. JOIN
  89. (
  90. SELECT user_id, CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation
  91. FROM users
  92. WHERE 10 < IF(age='NULL',0,CAST(age AS INT))
  93. AND IF(age='NULL',0,CAST(age AS INT)) <= 65
  94. ) t2
  95. ON
  96. (t1.user_id=t2.user_id)
  97. GROUP BY generation
  98. ) o2
  99. ON
  100. (o1.generation=o2.generation)
  101. ORDER BY generation, avg
  102. "
Add Comment
Please, Sign In to add comment