Guest User

An introduction to Collective Intelligence using joind.in data from #phpuk2011

a guest
Feb 28th, 2011
652
Never
Not a member of Pastebin yet? Sign Up, it unlocks many cool features!
  1. <?php
  2. /**
  3.  * Which talks should I have seen?
  4.  *
  5.  * An introduction to Collective Intelligence using joind.in data from #phpuk2011.
  6.  *
  7.  * @author Dave Gardner <dave@davegardner.me.uk> @davegardnerisme
  8.  */
  9.  
  10. define('WHO_AM_I', 'davegardnerisme');
  11.  
  12. // curl --data-binary '{"request":{"action":{"type":"gettalks","data":{"event_id":"506"}}}}' \
  13. //      --request POST \
  14. //      --header "Content-Type: application/json" \
  15. //      "http://joind.in/api/event"
  16.  
  17. // Phase 1: grab ratings via the Join.in API
  18.  
  19. $userRatings = array();     // [userId][talkId] = rating
  20. $talkTitles = array();      // we'll store these for later
  21.  
  22. $talks = joindInApi('event', 'gettalks', array('event_id' => 506));
  23. foreach ($talks as $talk)
  24. {
  25.     $talkTitles[$talk['ID']] = $talk['talk_title'];
  26.     echo $talk['ID'] . "\t" . $talk['talk_title'] . "\n";
  27.     $comments = joindInApi('talk', 'getcomments', array('talk_id' => $talk['ID']));
  28.     foreach ($comments as $comment)
  29.     {
  30.         echo ' -> ' . $comment['uname'] . "\t" . $comment['rating'] . "\n";
  31.         $userRatings[$comment['uname']][$talk['ID']] = $comment['rating'];
  32.     }
  33. }
  34.  
  35. echo "\n";
  36.  
  37. // Phase 2: Calculate user similarity (via Pearson correlation)
  38.  
  39. $pearson = array();
  40.  
  41. $users = array_keys($userRatings);
  42. foreach ($users as $user1)
  43. {
  44.     foreach ($users as $user2)
  45.     {
  46.         if ($user1 !== $user2 && !isset($pearson[$user1][$user2]))
  47.         {
  48.             $value = calculatePearson(
  49.                     $userRatings,
  50.                     $user1,
  51.                     $user2
  52.                     );
  53.             $pearson[$user1][$user2] = $value;
  54.             $pearson[$user2][$user1] = $value;
  55.             echo $user1 . "\t" . $user2 . "\t" . $value . "\n";
  56.         }
  57.     }
  58. }
  59.  
  60. echo "\nLike me:\n";
  61.  
  62. arsort($pearson[WHO_AM_I]);
  63. foreach ($pearson[WHO_AM_I] as $user => $value)
  64. {
  65.     echo $user . "\t" . $value . "\n";
  66. }
  67.  
  68. // Phase 3: Get recommendations
  69.  
  70. echo "\nRecommended talks:\n";
  71.  
  72. $recommendations = getRecommendations($userRatings, WHO_AM_I, $pearson);
  73. foreach ($recommendations as $talkId => $recommendation)
  74. {
  75.     echo $talkId . "\t" . $talkTitles[$talkId] . " ($recommendation)\n";
  76. }
  77.  
  78. /**
  79.  * Get recommendations
  80.  *
  81.  * Return recommendations on talks I _should_ have seen (if I could have!)
  82.  *
  83.  * @param array $userRatings Our user ratings; [userId][talkId] = rating
  84.  * @param string $user The user to get recommendations for
  85.  * @param array $similarities The similarities of all users; [user1][user2] = #
  86.  *
  87.  * @return array [talkId] = <how much you should have seen it!>
  88.  */
  89. function getRecommendations(array $userRatings, $user, array $similarities)
  90. {
  91.     $totals = array();
  92.     $similaritySums = array();
  93.  
  94.     foreach ($userRatings as $compareWithUser => $talksWithRatings)
  95.     {
  96.         // don't compare against self
  97.         if ($user === $compareWithUser)
  98.         {
  99.             continue;
  100.         }
  101.  
  102.         // how similar?
  103.         $similarity = $similarities[$user][$compareWithUser];
  104.         // ignore users if they aren't similar (<=0)
  105.         if ($similarity <= 0)
  106.         {
  107.             continue;
  108.         }
  109.  
  110.         foreach ($talksWithRatings as $talkId => $rating)
  111.         {
  112.             // skip if I saw this talk
  113.             if (isset($userRatings[$user][$talkId]))
  114.             {
  115.                 continue;
  116.             }
  117.             if (!isset($totals[$talkId]))
  118.             {
  119.                 $totals[$talkId] = 0;
  120.             }
  121.             $totals[$talkId] += $rating * $similarity;
  122.             if (!isset($similaritySums[$talkId]))
  123.             {
  124.                 $similaritySums[$talkId] = 0;
  125.             }
  126.             $similaritySums[$talkId] += $similarity;
  127.         } // end foreach talks
  128.     } // end foreach users
  129.  
  130.     // generate normalised list
  131.     foreach ($totals as $talkId => &$score)
  132.     {
  133.         $score /= $similaritySums[$talkId];
  134.     }
  135.  
  136.     arsort($totals);
  137.  
  138.     return $totals;
  139. }
  140.  
  141. /**
  142.  * Calculate pearson distance
  143.  *
  144.  * This calculates the pearson correlation between user1 and user2; a measure
  145.  * of how similar users are.
  146.  *
  147.  * @param array $userRatings Our array of user ratings; [userId][talkId] = rating
  148.  * @param string $user1 The first userId
  149.  * @param string $user2 The second userId
  150.  *
  151.  * @return integer|float A number between -1 and 1, where -1 indicates very
  152.  *      dissimilar, and 1 indicates very similar
  153.  */
  154. function calculatePearson($userRatings, $user1, $user2)
  155. {
  156.     // get list of talks both have rated
  157.     $talks = array_keys(array_intersect_key(
  158.             $userRatings[$user1],
  159.             $userRatings[$user2]
  160.             ));
  161.     $numBothHaveRated = count($talks);
  162.     if ($numBothHaveRated === 0)
  163.     {
  164.         $pearson = 0;
  165.     }
  166.     else
  167.     {
  168.         $sumOfRatingsUser1 = 0;
  169.         $sumOfSquareOfRatingsUser1 = 0;
  170.         $sumOfRatingsUser2 = 0;
  171.         $sumOfSquareOfRatingsUser2 = 0;
  172.         $sumOfProducts = 0;
  173.  
  174.         foreach ($talks as $talkId)
  175.         {
  176.             $sumOfRatingsUser1 += $userRatings[$user1][$talkId];
  177.             $sumOfSquareOfRatingsUser1 += pow($userRatings[$user1][$talkId], 2);
  178.             $sumOfRatingsUser2 += $userRatings[$user2][$talkId];
  179.             $sumOfSquareOfRatingsUser2 += pow($userRatings[$user2][$talkId], 2);
  180.             $sumOfProducts += $userRatings[$user1][$talkId] * $userRatings[$user2][$talkId];
  181.         }
  182.  
  183.         // calculate pearson
  184.         $numerator = $sumOfProducts - ($sumOfRatingsUser1 * $sumOfRatingsUser2 / $numBothHaveRated);
  185.         $denominator = sqrt(
  186.                 ($sumOfSquareOfRatingsUser1 - pow($sumOfRatingsUser1, 2) / $numBothHaveRated)
  187.               * ($sumOfSquareOfRatingsUser2 - pow($sumOfRatingsUser2, 2) / $numBothHaveRated)
  188.                 );
  189.         if ($denominator == 0)
  190.         {
  191.             $pearson = 0;
  192.         }
  193.         else
  194.         {
  195.             $pearson = $numerator / $denominator;
  196.         }
  197.     }
  198.  
  199.     return $pearson;
  200. }
  201.  
  202. /**
  203.  * Hit the Joind.in API
  204.  *
  205.  * @param string $endPoint API end point, eg: "event" to hit event API
  206.  * @param string $action The desired action, eg: "gettalks"
  207.  * @param array $params Any params to send
  208.  *
  209.  * @return array Decoded JSON data
  210.  */
  211. function joindInApi($endPoint, $action, array $params = array())
  212. {
  213.     $requestData = array(
  214.         'request' => array(
  215.             'action' => array(
  216.                 'type' => $action,
  217.                 'data' => $params
  218.             )
  219.         )
  220.     );
  221.     $options = array(
  222.         CURLOPT_RETURNTRANSFER => TRUE,     // return web page
  223.         CURLOPT_HEADER         => FALSE,    // don't return headers
  224.         CURLOPT_FOLLOWLOCATION => TRUE,     // follow redirects
  225.         CURLOPT_ENCODING       => '',       // handle all encodings
  226.         CURLOPT_USERAGENT      => 'DAVE!',  // who am i
  227.         CURLOPT_AUTOREFERER    => TRUE,     // set referer on redirect
  228.         CURLOPT_CONNECTTIMEOUT => 120,      // timeout on connect
  229.         CURLOPT_TIMEOUT        => 120,      // timeout on response
  230.         CURLOPT_MAXREDIRS      => 10,       // stop after 10 redirects
  231.         CURLOPT_HTTPHEADER     => array('Content-Type: application/json'),
  232.         CURLOPT_POSTFIELDS     => json_encode($requestData)
  233.     );
  234.  
  235.     $ch = curl_init('http://joind.in/api/' . $endPoint);
  236.     curl_setopt_array($ch, $options);
  237.     $content = curl_exec($ch);
  238.     $err = curl_errno($ch);
  239.     $errmsg = curl_error($ch);
  240.     $header = curl_getinfo($ch);
  241.     curl_close($ch);
  242.  
  243.     return json_decode($content, TRUE);
  244. }
RAW Paste Data

Adblocker detected! Please consider disabling it...

We've detected AdBlock Plus or some other adblocking software preventing Pastebin.com from fully loading.

We don't have any obnoxious sound, or popup ads, we actively block these annoying types of ads!

Please add Pastebin.com to your ad blocker whitelist or disable your adblocking software.

×