Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- -- 4.1.1 ユーザーの評価回数平均 --
- td query -w -d book_crossing_dataset "
- SELECT ROUND(AVG(cnt)) AS avg
- FROM
- (
- SELECT user_id, COUNT(book_rating) AS cnt
- FROM ratings
- GROUP BY user_id
- ) t1
- "
- -- 4.1.2 ユーザーの評価回数分布 --
- td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_count.csv "
- SELECT cnt, user_cnt, ROUND(user_cnt/total_user_cnt*1000)/10 AS rate
- FROM
- (
- SELECT cnt, COUNT(*) AS user_cnt, 1 AS one
- FROM
- (
- SELECT user_id, COUNT(book_rating) AS cnt
- FROM ratings
- GROUP BY user_id
- ) t1
- GROUP BY cnt
- ORDER BY cnt
- LIMIT 50
- ) o1
- JOIN
- (
- SELECT COUNT(distinct user_id) AS total_user_cnt, 1 AS one
- FROM ratings
- ) o2
- ON
- (o1.one=o2.one)
- "
- -- 4.2 ユーザーの評価平均分布 (0<book_rating, 5<=count(book_rating)) --
- td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_avg.csv "
- SELECT avg, COUNT(*)
- FROM
- (
- SELECT user_id, ROUND(AVG(book_rating)/0.5)*0.5 AS avg, COUNT(book_rating) AS cnt
- FROM ratings
- WHERE 0 < book_rating
- GROUP BY user_id
- HAVING 5 <= COUNT(book_rating)
- ) t1
- GROUP BY avg
- ORDER BY avg
- "
- -- 4.3 ユーザーの年代別×評価平均分布 (0<book_rating, 5<=count(book_rating)) --
- td query -w -d book_crossing_dataset -f csv -o review_user_dist_of_review_avg_by_generation.csv "
- SELECT o1.generation, avg, ROUND(cnt/total_cnt*1000)/10 AS rate
- FROM
- (
- SELECT generation, avg, COUNT(*) AS cnt
- FROM
- (
- SELECT user_id, ROUND(AVG(book_rating)/0.5)*0.5 AS avg, COUNT(book_rating) AS cnt
- FROM ratings
- WHERE 0 < book_rating
- GROUP BY user_id
- HAVING 5 <= COUNT(book_rating)
- ) t1
- JOIN
- (
- SELECT user_id, CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation
- FROM users
- WHERE 10 < IF(age='NULL',0,CAST(age AS INT))
- AND IF(age='NULL',0,CAST(age AS INT)) <= 65
- ) t2
- ON
- (t1.user_id=t2.user_id)
- GROUP BY generation,avg
- ) o1
- JOIN
- (
- SELECT generation, COUNT(*) AS total_cnt
- FROM
- (
- SELECT user_id, COUNT(book_rating)
- FROM ratings
- WHERE 0 < book_rating
- GROUP BY user_id
- HAVING 5 <= COUNT(book_rating)
- ) t1
- JOIN
- (
- SELECT user_id, CEIL(IF(age='NULL',0,CAST(age AS INT))/5)*5 AS generation
- FROM users
- WHERE 10 < IF(age='NULL',0,CAST(age AS INT))
- AND IF(age='NULL',0,CAST(age AS INT)) <= 65
- ) t2
- ON
- (t1.user_id=t2.user_id)
- GROUP BY generation
- ) o2
- ON
- (o1.generation=o2.generation)
- ORDER BY generation, avg
- "
Add Comment
Please, Sign In to add comment