Advertisement
Not a member of Pastebin yet?
Sign Up,
it unlocks many cool features!
- <?php
- /**
- * Which talks should I have seen?
- *
- * An introduction to Collective Intelligence using joind.in data from #phpuk2011.
- *
- * @author Dave Gardner <dave@davegardner.me.uk> @davegardnerisme
- */
- define('WHO_AM_I', 'davegardnerisme');
- // curl --data-binary '{"request":{"action":{"type":"gettalks","data":{"event_id":"506"}}}}' \
- // --request POST \
- // --header "Content-Type: application/json" \
- // "http://joind.in/api/event"
- // Phase 1: grab ratings via the Join.in API
- $userRatings = array(); // [userId][talkId] = rating
- $talkTitles = array(); // we'll store these for later
- $talks = joindInApi('event', 'gettalks', array('event_id' => 506));
- foreach ($talks as $talk)
- {
- $talkTitles[$talk['ID']] = $talk['talk_title'];
- echo $talk['ID'] . "\t" . $talk['talk_title'] . "\n";
- $comments = joindInApi('talk', 'getcomments', array('talk_id' => $talk['ID']));
- foreach ($comments as $comment)
- {
- echo ' -> ' . $comment['uname'] . "\t" . $comment['rating'] . "\n";
- $userRatings[$comment['uname']][$talk['ID']] = $comment['rating'];
- }
- }
- echo "\n";
- // Phase 2: Calculate user similarity (via Pearson correlation)
- $pearson = array();
- $users = array_keys($userRatings);
- foreach ($users as $user1)
- {
- foreach ($users as $user2)
- {
- if ($user1 !== $user2 && !isset($pearson[$user1][$user2]))
- {
- $value = calculatePearson(
- $userRatings,
- $user1,
- $user2
- );
- $pearson[$user1][$user2] = $value;
- $pearson[$user2][$user1] = $value;
- echo $user1 . "\t" . $user2 . "\t" . $value . "\n";
- }
- }
- }
- echo "\nLike me:\n";
- arsort($pearson[WHO_AM_I]);
- foreach ($pearson[WHO_AM_I] as $user => $value)
- {
- echo $user . "\t" . $value . "\n";
- }
- // Phase 3: Get recommendations
- echo "\nRecommended talks:\n";
- $recommendations = getRecommendations($userRatings, WHO_AM_I, $pearson);
- foreach ($recommendations as $talkId => $recommendation)
- {
- echo $talkId . "\t" . $talkTitles[$talkId] . " ($recommendation)\n";
- }
- /**
- * Get recommendations
- *
- * Return recommendations on talks I _should_ have seen (if I could have!)
- *
- * @param array $userRatings Our user ratings; [userId][talkId] = rating
- * @param string $user The user to get recommendations for
- * @param array $similarities The similarities of all users; [user1][user2] = #
- *
- * @return array [talkId] = <how much you should have seen it!>
- */
- function getRecommendations(array $userRatings, $user, array $similarities)
- {
- $totals = array();
- $similaritySums = array();
- foreach ($userRatings as $compareWithUser => $talksWithRatings)
- {
- // don't compare against self
- if ($user === $compareWithUser)
- {
- continue;
- }
- // how similar?
- $similarity = $similarities[$user][$compareWithUser];
- // ignore users if they aren't similar (<=0)
- if ($similarity <= 0)
- {
- continue;
- }
- foreach ($talksWithRatings as $talkId => $rating)
- {
- // skip if I saw this talk
- if (isset($userRatings[$user][$talkId]))
- {
- continue;
- }
- if (!isset($totals[$talkId]))
- {
- $totals[$talkId] = 0;
- }
- $totals[$talkId] += $rating * $similarity;
- if (!isset($similaritySums[$talkId]))
- {
- $similaritySums[$talkId] = 0;
- }
- $similaritySums[$talkId] += $similarity;
- } // end foreach talks
- } // end foreach users
- // generate normalised list
- foreach ($totals as $talkId => &$score)
- {
- $score /= $similaritySums[$talkId];
- }
- arsort($totals);
- return $totals;
- }
- /**
- * Calculate pearson distance
- *
- * This calculates the pearson correlation between user1 and user2; a measure
- * of how similar users are.
- *
- * @param array $userRatings Our array of user ratings; [userId][talkId] = rating
- * @param string $user1 The first userId
- * @param string $user2 The second userId
- *
- * @return integer|float A number between -1 and 1, where -1 indicates very
- * dissimilar, and 1 indicates very similar
- */
- function calculatePearson($userRatings, $user1, $user2)
- {
- // get list of talks both have rated
- $talks = array_keys(array_intersect_key(
- $userRatings[$user1],
- $userRatings[$user2]
- ));
- $numBothHaveRated = count($talks);
- if ($numBothHaveRated === 0)
- {
- $pearson = 0;
- }
- else
- {
- $sumOfRatingsUser1 = 0;
- $sumOfSquareOfRatingsUser1 = 0;
- $sumOfRatingsUser2 = 0;
- $sumOfSquareOfRatingsUser2 = 0;
- $sumOfProducts = 0;
- foreach ($talks as $talkId)
- {
- $sumOfRatingsUser1 += $userRatings[$user1][$talkId];
- $sumOfSquareOfRatingsUser1 += pow($userRatings[$user1][$talkId], 2);
- $sumOfRatingsUser2 += $userRatings[$user2][$talkId];
- $sumOfSquareOfRatingsUser2 += pow($userRatings[$user2][$talkId], 2);
- $sumOfProducts += $userRatings[$user1][$talkId] * $userRatings[$user2][$talkId];
- }
- // calculate pearson
- $numerator = $sumOfProducts - ($sumOfRatingsUser1 * $sumOfRatingsUser2 / $numBothHaveRated);
- $denominator = sqrt(
- ($sumOfSquareOfRatingsUser1 - pow($sumOfRatingsUser1, 2) / $numBothHaveRated)
- * ($sumOfSquareOfRatingsUser2 - pow($sumOfRatingsUser2, 2) / $numBothHaveRated)
- );
- if ($denominator == 0)
- {
- $pearson = 0;
- }
- else
- {
- $pearson = $numerator / $denominator;
- }
- }
- return $pearson;
- }
- /**
- * Hit the Joind.in API
- *
- * @param string $endPoint API end point, eg: "event" to hit event API
- * @param string $action The desired action, eg: "gettalks"
- * @param array $params Any params to send
- *
- * @return array Decoded JSON data
- */
- function joindInApi($endPoint, $action, array $params = array())
- {
- $requestData = array(
- 'request' => array(
- 'action' => array(
- 'type' => $action,
- 'data' => $params
- )
- )
- );
- $options = array(
- CURLOPT_RETURNTRANSFER => TRUE, // return web page
- CURLOPT_HEADER => FALSE, // don't return headers
- CURLOPT_FOLLOWLOCATION => TRUE, // follow redirects
- CURLOPT_ENCODING => '', // handle all encodings
- CURLOPT_USERAGENT => 'DAVE!', // who am i
- CURLOPT_AUTOREFERER => TRUE, // set referer on redirect
- CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
- CURLOPT_TIMEOUT => 120, // timeout on response
- CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
- CURLOPT_HTTPHEADER => array('Content-Type: application/json'),
- CURLOPT_POSTFIELDS => json_encode($requestData)
- );
- $ch = curl_init('http://joind.in/api/' . $endPoint);
- curl_setopt_array($ch, $options);
- $content = curl_exec($ch);
- $err = curl_errno($ch);
- $errmsg = curl_error($ch);
- $header = curl_getinfo($ch);
- curl_close($ch);
- return json_decode($content, TRUE);
- }
Advertisement
Add Comment
Please, Sign In to add comment
Advertisement