Guest User

Untitled

a guest
Jul 20th, 2018
89
0
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
text 6.79 KB | None | 0 0
  1. # Run with: merb -a runner -r lib/workers/scorer.rb
  2.  
  3. DataMapper.read_from!(:job)
  4.  
  5. class Scorer < NfsLockable
  6. def transaction
  7. Merb.logger.info "Calculating popularity scores"
  8.  
  9. # Vars
  10. scored_cutoff = 1.months.ago.to_date #Older than this is not scored, set to 0
  11. cutoff = 1.week.ago.to_date #Events older than this do not apply to their score
  12. new_profile_cutoff = 1.day.ago.to_date #Profiles older than this do not get a boost
  13. base_popularity = 180
  14.  
  15. # Zero out everyone who hasn't logged in in a month
  16. Merb.logger.info "Zeroing out dormant profiles"
  17. Profile.all(:last_login.lte => scored_cutoff).update!(:popularity => base_popularity, :paid_popularity => base_popularity )
  18.  
  19. #Construct scoring query
  20. log_repo_name = repository(:logging).adapter.uri.path.split("/").last
  21. sql = <<SQL
  22. SELECT profile_id,
  23. new_profile_score + paid_score + activated_score + logged_in_score + recent_messages_received_score + recent_replies_score +
  24. ifnull( reply_rate_score, 0) + ifnull( spam_score, 0) +
  25. recent_profile_views_score + public_photos_score + private_photos_score + recent_photo_upload_score + blocks_score + base_score as popularity,
  26. paid_modifier FROM
  27. ( SELECT profile_id,
  28. is_new_profile * 50 as new_profile_score,
  29. is_paid * 0 as paid_score,
  30. is_activated * 10 as activated_score,
  31. -- If has logged in within the last week, rank higher than those within last month. Older than that are zeroed.
  32. has_logged_in * 15 as logged_in_score,
  33. -- Recent inbound messages indicate good profiles
  34. LEAST( messages_received * 2, 20 ) as recent_messages_received_score,
  35. -- Recent outbound replies is good for women
  36. LEAST( messages_replied * 20, 50 ) * gender as recent_replies_score,
  37. -- Rate of reply to incoming messages for all time is important for women
  38. LEAST( messages_replied / messages_received * messages_replied * 2, 50 ) * gender as reply_rate_score,
  39. -- To punish spammers, rate of reply to your messages is important
  40. -- If less than 16 messages sent for every 1 received penalize
  41. - LEAST( GREATEST( messages_sent / responses_to_messages_sent - 15, 0 ) * 10, 70 ) as spam_score,
  42. -- Recent profile views
  43. LEAST( profile_views * 1, 30 ) as recent_profile_views_score,
  44. -- Current Public Photo Count
  45. LEAST( public_photos * 5, 15 ) as public_photos_score,
  46. -- Current Private Photo Count
  47. LEAST( private_photos * 10, 10 ) as private_photos_score,
  48. -- Photos uploaded in last week
  49. LEAST( photos_uploaded * 10, 20 ) as recent_photo_upload_score,
  50. -- Profiles with more than one block indicate poor profile quality and/or spammers
  51. - LEAST( GREATEST( blocks - 1, 0 ) * 30, 60) as blocks_score,
  52. -- Sphinx doesn't allow negative numbers so adjust all up to avoid
  53. -- Spam penalty (70) + Block Penalty (60) + Paid Modifier (50)
  54. 180 as base_score,
  55. -- If searched by a paid member, demote bot results
  56. is_bot * -50 AS paid_modifier
  57. FROM
  58. -- Raw data for use in scoring
  59. (SELECT
  60. profiles.id as profile_id,
  61. profiles.gender = 'f' as gender,
  62. profiles.created_at > '#{new_profile_cutoff}' AS is_new_profile,
  63. profiles.subscription_status IN ('reactivated_subscription', 'currently_subscribed') AS is_paid,
  64. users.user_type = 'decoy' AS is_bot,
  65. profiles.last_login > '#{cutoff}' AS has_logged_in,
  66. profiles.activated_at IS NOT NULL as is_activated,
  67. site_id as site,
  68. -- Messages received in last week from distinct senders (reward freshness)
  69. ( SELECT count( DISTINCT messages.sender_id) FROM messages WHERE recipient_id = profiles.id AND messages.created_at > '#{cutoff}') AS messages_received,
  70. -- Messages replied to in last week with distinct reciever (reward freshness)
  71. ( SELECT count( DISTINCT messages.recipient_id ) FROM messages WHERE sender_id = profiles.id AND messages.created_at > '#{cutoff}' AND in_reply_to_id IS NOT NULL) AS messages_replied,
  72. -- People who have viewed their profile in the last week (reward freshness)
  73. ( SELECT count( DISTINCT profile_interactions.profile_id) FROM #{log_repo_name}.profile_interactions WHERE with_id = profiles.id AND action = 'viewed_profile_of' AND
  74. profile_interactions.at > '#{cutoff}' ) as profile_views,
  75. -- Responses (for calculating response rate for messages sent to them)
  76. ( SELECT count( DISTINCT recipient_id ) FROM messages WHERE sender_id = profiles.id AND in_reply_to_id IS NOT NULL AND thread_id IN ( SELECT messages.id FROM messages WHERE recipient_id = profiles.id AND in_reply_to_id IS NULL AND type != 'AccessGrantedMessage' ) ) as responses_to_inbound_contacts,
  77. -- Initial Contacts (for calculating response rate for messages sent to them)
  78. ( SELECT count( DISTINCT messages.sender_id ) FROM messages WHERE recipient_id = profiles.id AND in_reply_to_id IS NULL AND type != 'AccessGrantedMessage' ) as inbound_contacts,
  79. -- Messages sent (for calculating response rate for messages they sent)
  80. ( SELECT count(messages.id) FROM messages WHERE sender_id = profiles.id AND in_reply_to_id IS NULL) as messages_sent,
  81. -- Messages sent that are responded to (for calculating response rate for messages they sent)
  82. ( SELECT count(DISTINCT inbound.thread_id) FROM messages outbound, messages inbound WHERE outbound.sender_id = profiles.id AND outbound.in_reply_to_id IS NULL AND outbound.id = inbound.thread_id AND inbound.thread_id != inbound.id ) as responses_to_messages_sent,
  83. -- Approved Public Photos
  84. ( SELECT count(photos.id) FROM photos WHERE profile_id = profiles.id AND private = 0 AND moderation != 'd') as public_photos,
  85. -- Approved Private Photos
  86. ( SELECT count(photos.id) FROM photos WHERE profile_id = profiles.id AND private = 1 AND moderation != 'd') as private_photos,
  87. -- Photos uploaded in last week (reward freshness)
  88. ( SELECT count(photos.id) FROM photos WHERE profile_id = profiles.id AND moderation != 'd' AND photos.created_at > '#{cutoff}') as photos_uploaded,
  89. -- Blocks (penalize)
  90. ( SELECT count(profile_blocks.block_id) FROM profile_blocks WHERE block_id = profiles.id ) as blocks
  91. FROM profiles
  92. JOIN users ON profiles.user_id = users.id
  93. WHERE profiles.last_login > '#{scored_cutoff}' AND deleted_at IS NULL AND profiles.status != 'fraud') as scores
  94. ) as popularity_scores
  95. SQL
  96.  
  97. # Get all scores from slave
  98. Merb.logger.info "Calculating Scores"
  99. scores = Profile.repository.adapter(true).query(sql)
  100.  
  101. # Update master
  102. Merb.logger.info "Updating Scores"
  103. scores.in_groups_of(1000) do |score_group|
  104. score_group.compact.each do |score|
  105. Profile.all(:id => score.profile_id).update!(:popularity => score.popularity, :paid_popularity => score.popularity + score.paid_modifier)
  106. end
  107. Merb.logger.info "... updating scores ..."
  108. sleep(5)
  109. end
  110. Merb.logger.info "Score Update Complete"
  111. end
  112. end
  113.  
  114. Scorer.new().execute
Add Comment
Please, Sign In to add comment