@davegardnerisme */ define('WHO_AM_I', 'davegardnerisme'); // curl --data-binary '{"request":{"action":{"type":"gettalks","data":{"event_id":"506"}}}}' \ // --request POST \ // --header "Content-Type: application/json" \ // "http://joind.in/api/event" // Phase 1: grab ratings via the Join.in API $userRatings = array(); // [userId][talkId] = rating $talkTitles = array(); // we'll store these for later $talks = joindInApi('event', 'gettalks', array('event_id' => 506)); foreach ($talks as $talk) { $talkTitles[$talk['ID']] = $talk['talk_title']; echo $talk['ID'] . "\t" . $talk['talk_title'] . "\n"; $comments = joindInApi('talk', 'getcomments', array('talk_id' => $talk['ID'])); foreach ($comments as $comment) { echo ' -> ' . $comment['uname'] . "\t" . $comment['rating'] . "\n"; $userRatings[$comment['uname']][$talk['ID']] = $comment['rating']; } } echo "\n"; // Phase 2: Calculate user similarity (via Pearson correlation) $pearson = array(); $users = array_keys($userRatings); foreach ($users as $user1) { foreach ($users as $user2) { if ($user1 !== $user2 && !isset($pearson[$user1][$user2])) { $value = calculatePearson( $userRatings, $user1, $user2 ); $pearson[$user1][$user2] = $value; $pearson[$user2][$user1] = $value; echo $user1 . "\t" . $user2 . "\t" . $value . "\n"; } } } echo "\nLike me:\n"; arsort($pearson[WHO_AM_I]); foreach ($pearson[WHO_AM_I] as $user => $value) { echo $user . "\t" . $value . "\n"; } // Phase 3: Get recommendations echo "\nRecommended talks:\n"; $recommendations = getRecommendations($userRatings, WHO_AM_I, $pearson); foreach ($recommendations as $talkId => $recommendation) { echo $talkId . "\t" . $talkTitles[$talkId] . " ($recommendation)\n"; } /** * Get recommendations * * Return recommendations on talks I _should_ have seen (if I could have!) * * @param array $userRatings Our user ratings; [userId][talkId] = rating * @param string $user The user to get recommendations for * @param array $similarities The similarities of all users; [user1][user2] = # * * @return array [talkId] = */ function getRecommendations(array $userRatings, $user, array $similarities) { $totals = array(); $similaritySums = array(); foreach ($userRatings as $compareWithUser => $talksWithRatings) { // don't compare against self if ($user === $compareWithUser) { continue; } // how similar? $similarity = $similarities[$user][$compareWithUser]; // ignore users if they aren't similar (<=0) if ($similarity <= 0) { continue; } foreach ($talksWithRatings as $talkId => $rating) { // skip if I saw this talk if (isset($userRatings[$user][$talkId])) { continue; } if (!isset($totals[$talkId])) { $totals[$talkId] = 0; } $totals[$talkId] += $rating * $similarity; if (!isset($similaritySums[$talkId])) { $similaritySums[$talkId] = 0; } $similaritySums[$talkId] += $similarity; } // end foreach talks } // end foreach users // generate normalised list foreach ($totals as $talkId => &$score) { $score /= $similaritySums[$talkId]; } arsort($totals); return $totals; } /** * Calculate pearson distance * * This calculates the pearson correlation between user1 and user2; a measure * of how similar users are. * * @param array $userRatings Our array of user ratings; [userId][talkId] = rating * @param string $user1 The first userId * @param string $user2 The second userId * * @return integer|float A number between -1 and 1, where -1 indicates very * dissimilar, and 1 indicates very similar */ function calculatePearson($userRatings, $user1, $user2) { // get list of talks both have rated $talks = array_keys(array_intersect_key( $userRatings[$user1], $userRatings[$user2] )); $numBothHaveRated = count($talks); if ($numBothHaveRated === 0) { $pearson = 0; } else { $sumOfRatingsUser1 = 0; $sumOfSquareOfRatingsUser1 = 0; $sumOfRatingsUser2 = 0; $sumOfSquareOfRatingsUser2 = 0; $sumOfProducts = 0; foreach ($talks as $talkId) { $sumOfRatingsUser1 += $userRatings[$user1][$talkId]; $sumOfSquareOfRatingsUser1 += pow($userRatings[$user1][$talkId], 2); $sumOfRatingsUser2 += $userRatings[$user2][$talkId]; $sumOfSquareOfRatingsUser2 += pow($userRatings[$user2][$talkId], 2); $sumOfProducts += $userRatings[$user1][$talkId] * $userRatings[$user2][$talkId]; } // calculate pearson $numerator = $sumOfProducts - ($sumOfRatingsUser1 * $sumOfRatingsUser2 / $numBothHaveRated); $denominator = sqrt( ($sumOfSquareOfRatingsUser1 - pow($sumOfRatingsUser1, 2) / $numBothHaveRated) * ($sumOfSquareOfRatingsUser2 - pow($sumOfRatingsUser2, 2) / $numBothHaveRated) ); if ($denominator == 0) { $pearson = 0; } else { $pearson = $numerator / $denominator; } } return $pearson; } /** * Hit the Joind.in API * * @param string $endPoint API end point, eg: "event" to hit event API * @param string $action The desired action, eg: "gettalks" * @param array $params Any params to send * * @return array Decoded JSON data */ function joindInApi($endPoint, $action, array $params = array()) { $requestData = array( 'request' => array( 'action' => array( 'type' => $action, 'data' => $params ) ) ); $options = array( CURLOPT_RETURNTRANSFER => TRUE, // return web page CURLOPT_HEADER => FALSE, // don't return headers CURLOPT_FOLLOWLOCATION => TRUE, // follow redirects CURLOPT_ENCODING => '', // handle all encodings CURLOPT_USERAGENT => 'DAVE!', // who am i CURLOPT_AUTOREFERER => TRUE, // set referer on redirect CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect CURLOPT_TIMEOUT => 120, // timeout on response CURLOPT_MAXREDIRS => 10, // stop after 10 redirects CURLOPT_HTTPHEADER => array('Content-Type: application/json'), CURLOPT_POSTFIELDS => json_encode($requestData) ); $ch = curl_init('http://joind.in/api/' . $endPoint); curl_setopt_array($ch, $options); $content = curl_exec($ch); $err = curl_errno($ch); $errmsg = curl_error($ch); $header = curl_getinfo($ch); curl_close($ch); return json_decode($content, TRUE); }