Pastebin launched a little side project called VERYVIRAL.com, check it out ;-) Want more features on Pastebin? Sign Up, it's FREE!
Guest

An introduction to Collective Intelligence using joind.in data from #phpuk2011

By: a guest on Feb 28th, 2011  |  syntax: PHP  |  size: 7.42 KB  |  views: 427  |  expires: Never
download  |  raw  |  embed  |  report abuse  |  print
Text below is selected. Please press Ctrl+C to copy to your clipboard. (⌘+C on Mac)
  1. <?php
  2. /**
  3.  * Which talks should I have seen?
  4.  *
  5.  * An introduction to Collective Intelligence using joind.in data from #phpuk2011.
  6.  *
  7.  * @author Dave Gardner <dave@davegardner.me.uk> @davegardnerisme
  8.  */
  9.  
  10. define('WHO_AM_I', 'davegardnerisme');
  11.  
  12. // curl --data-binary '{"request":{"action":{"type":"gettalks","data":{"event_id":"506"}}}}' \
  13. //      --request POST \
  14. //      --header "Content-Type: application/json" \
  15. //      "http://joind.in/api/event"
  16.  
  17. // Phase 1: grab ratings via the Join.in API
  18.  
  19. $userRatings = array();     // [userId][talkId] = rating
  20. $talkTitles = array();      // we'll store these for later
  21.  
  22. $talks = joindInApi('event', 'gettalks', array('event_id' => 506));
  23. foreach ($talks as $talk)
  24. {
  25.     $talkTitles[$talk['ID']] = $talk['talk_title'];
  26.     echo $talk['ID'] . "\t" . $talk['talk_title'] . "\n";
  27.     $comments = joindInApi('talk', 'getcomments', array('talk_id' => $talk['ID']));
  28.     foreach ($comments as $comment)
  29.     {
  30.         echo ' -> ' . $comment['uname'] . "\t" . $comment['rating'] . "\n";
  31.         $userRatings[$comment['uname']][$talk['ID']] = $comment['rating'];
  32.     }
  33. }
  34.  
  35. echo "\n";
  36.  
  37. // Phase 2: Calculate user similarity (via Pearson correlation)
  38.  
  39. $pearson = array();
  40.  
  41. $users = array_keys($userRatings);
  42. foreach ($users as $user1)
  43. {
  44.     foreach ($users as $user2)
  45.     {
  46.         if ($user1 !== $user2 && !isset($pearson[$user1][$user2]))
  47.         {
  48.             $value = calculatePearson(
  49.                     $userRatings,
  50.                     $user1,
  51.                     $user2
  52.                     );
  53.             $pearson[$user1][$user2] = $value;
  54.             $pearson[$user2][$user1] = $value;
  55.             echo $user1 . "\t" . $user2 . "\t" . $value . "\n";
  56.         }
  57.     }
  58. }
  59.  
  60. echo "\nLike me:\n";
  61.  
  62. arsort($pearson[WHO_AM_I]);
  63. foreach ($pearson[WHO_AM_I] as $user => $value)
  64. {
  65.     echo $user . "\t" . $value . "\n";
  66. }
  67.  
  68. // Phase 3: Get recommendations
  69.  
  70. echo "\nRecommended talks:\n";
  71.  
  72. $recommendations = getRecommendations($userRatings, WHO_AM_I, $pearson);
  73. foreach ($recommendations as $talkId => $recommendation)
  74. {
  75.     echo $talkId . "\t" . $talkTitles[$talkId] . " ($recommendation)\n";
  76. }
  77.  
  78. /**
  79.  * Get recommendations
  80.  *
  81.  * Return recommendations on talks I _should_ have seen (if I could have!)
  82.  *
  83.  * @param array $userRatings Our user ratings; [userId][talkId] = rating
  84.  * @param string $user The user to get recommendations for
  85.  * @param array $similarities The similarities of all users; [user1][user2] = #
  86.  *
  87.  * @return array [talkId] = <how much you should have seen it!>
  88.  */
  89. function getRecommendations(array $userRatings, $user, array $similarities)
  90. {
  91.     $totals = array();
  92.     $similaritySums = array();
  93.  
  94.     foreach ($userRatings as $compareWithUser => $talksWithRatings)
  95.     {
  96.         // don't compare against self
  97.         if ($user === $compareWithUser)
  98.         {
  99.             continue;
  100.         }
  101.  
  102.         // how similar?
  103.         $similarity = $similarities[$user][$compareWithUser];
  104.         // ignore users if they aren't similar (<=0)
  105.         if ($similarity <= 0)
  106.         {
  107.             continue;
  108.         }
  109.  
  110.         foreach ($talksWithRatings as $talkId => $rating)
  111.         {
  112.             // skip if I saw this talk
  113.             if (isset($userRatings[$user][$talkId]))
  114.             {
  115.                 continue;
  116.             }
  117.             if (!isset($totals[$talkId]))
  118.             {
  119.                 $totals[$talkId] = 0;
  120.             }
  121.             $totals[$talkId] += $rating * $similarity;
  122.             if (!isset($similaritySums[$talkId]))
  123.             {
  124.                 $similaritySums[$talkId] = 0;
  125.             }
  126.             $similaritySums[$talkId] += $similarity;
  127.         } // end foreach talks
  128.     } // end foreach users
  129.  
  130.     // generate normalised list
  131.     foreach ($totals as $talkId => &$score)
  132.     {
  133.         $score /= $similaritySums[$talkId];
  134.     }
  135.  
  136.     arsort($totals);
  137.  
  138.     return $totals;
  139. }
  140.  
  141. /**
  142.  * Calculate pearson distance
  143.  *
  144.  * This calculates the pearson correlation between user1 and user2; a measure
  145.  * of how similar users are.
  146.  *
  147.  * @param array $userRatings Our array of user ratings; [userId][talkId] = rating
  148.  * @param string $user1 The first userId
  149.  * @param string $user2 The second userId
  150.  *
  151.  * @return integer|float A number between -1 and 1, where -1 indicates very
  152.  *      dissimilar, and 1 indicates very similar
  153.  */
  154. function calculatePearson($userRatings, $user1, $user2)
  155. {
  156.     // get list of talks both have rated
  157.     $talks = array_keys(array_intersect_key(
  158.             $userRatings[$user1],
  159.             $userRatings[$user2]
  160.             ));
  161.     $numBothHaveRated = count($talks);
  162.     if ($numBothHaveRated === 0)
  163.     {
  164.         $pearson = 0;
  165.     }
  166.     else
  167.     {
  168.         $sumOfRatingsUser1 = 0;
  169.         $sumOfSquareOfRatingsUser1 = 0;
  170.         $sumOfRatingsUser2 = 0;
  171.         $sumOfSquareOfRatingsUser2 = 0;
  172.         $sumOfProducts = 0;
  173.  
  174.         foreach ($talks as $talkId)
  175.         {
  176.             $sumOfRatingsUser1 += $userRatings[$user1][$talkId];
  177.             $sumOfSquareOfRatingsUser1 += pow($userRatings[$user1][$talkId], 2);
  178.             $sumOfRatingsUser2 += $userRatings[$user2][$talkId];
  179.             $sumOfSquareOfRatingsUser2 += pow($userRatings[$user2][$talkId], 2);
  180.             $sumOfProducts += $userRatings[$user1][$talkId] * $userRatings[$user2][$talkId];
  181.         }
  182.  
  183.         // calculate pearson
  184.         $numerator = $sumOfProducts - ($sumOfRatingsUser1 * $sumOfRatingsUser2 / $numBothHaveRated);
  185.         $denominator = sqrt(
  186.                 ($sumOfSquareOfRatingsUser1 - pow($sumOfRatingsUser1, 2) / $numBothHaveRated)
  187.               * ($sumOfSquareOfRatingsUser2 - pow($sumOfRatingsUser2, 2) / $numBothHaveRated)
  188.                 );
  189.         if ($denominator == 0)
  190.         {
  191.             $pearson = 0;
  192.         }
  193.         else
  194.         {
  195.             $pearson = $numerator / $denominator;
  196.         }
  197.     }
  198.  
  199.     return $pearson;
  200. }
  201.  
  202. /**
  203.  * Hit the Joind.in API
  204.  *
  205.  * @param string $endPoint API end point, eg: "event" to hit event API
  206.  * @param string $action The desired action, eg: "gettalks"
  207.  * @param array $params Any params to send
  208.  *
  209.  * @return array Decoded JSON data
  210.  */
  211. function joindInApi($endPoint, $action, array $params = array())
  212. {
  213.     $requestData = array(
  214.         'request' => array(
  215.             'action' => array(
  216.                 'type' => $action,
  217.                 'data' => $params
  218.             )
  219.         )
  220.     );
  221.     $options = array(
  222.         CURLOPT_RETURNTRANSFER => TRUE,     // return web page
  223.         CURLOPT_HEADER         => FALSE,    // don't return headers
  224.         CURLOPT_FOLLOWLOCATION => TRUE,     // follow redirects
  225.         CURLOPT_ENCODING       => '',       // handle all encodings
  226.         CURLOPT_USERAGENT      => 'DAVE!',  // who am i
  227.         CURLOPT_AUTOREFERER    => TRUE,     // set referer on redirect
  228.         CURLOPT_CONNECTTIMEOUT => 120,      // timeout on connect
  229.         CURLOPT_TIMEOUT        => 120,      // timeout on response
  230.         CURLOPT_MAXREDIRS      => 10,       // stop after 10 redirects
  231.         CURLOPT_HTTPHEADER     => array('Content-Type: application/json'),
  232.         CURLOPT_POSTFIELDS     => json_encode($requestData)
  233.     );
  234.  
  235.     $ch = curl_init('http://joind.in/api/' . $endPoint);
  236.     curl_setopt_array($ch, $options);
  237.     $content = curl_exec($ch);
  238.     $err = curl_errno($ch);
  239.     $errmsg = curl_error($ch);
  240.     $header = curl_getinfo($ch);
  241.     curl_close($ch);
  242.  
  243.     return json_decode($content, TRUE);
  244. }