<?php
/**
* Which talks should I have seen?
*
* An introduction to Collective Intelligence using joind.in data from #phpuk2011.
*
* @author Dave Gardner <dave@davegardner.me.uk> @davegardnerisme
*/
define('WHO_AM_I', 'davegardnerisme');
// curl --data-binary '{"request":{"action":{"type":"gettalks","data":{"event_id":"506"}}}}' \
// --request POST \
// --header "Content-Type: application/json" \
// "http://joind.in/api/event"
// Phase 1: grab ratings via the Join.in API
$userRatings = array(); // [userId][talkId] = rating
$talkTitles = array(); // we'll store these for later
$talks = joindInApi('event', 'gettalks', array('event_id' => 506));
foreach ($talks as $talk)
{
$talkTitles[$talk['ID']] = $talk['talk_title'];
echo $talk['ID'] . "\t" . $talk['talk_title'] . "\n";
$comments = joindInApi('talk', 'getcomments', array('talk_id' => $talk['ID']));
foreach ($comments as $comment)
{
echo ' -> ' . $comment['uname'] . "\t" . $comment['rating'] . "\n";
$userRatings[$comment['uname']][$talk['ID']] = $comment['rating'];
}
}
echo "\n";
// Phase 2: Calculate user similarity (via Pearson correlation)
$pearson = array();
$users = array_keys($userRatings);
foreach ($users as $user1)
{
foreach ($users as $user2)
{
if ($user1 !== $user2 && !isset($pearson[$user1][$user2]))
{
$value = calculatePearson(
$userRatings,
$user1,
$user2
);
$pearson[$user1][$user2] = $value;
$pearson[$user2][$user1] = $value;
echo $user1 . "\t" . $user2 . "\t" . $value . "\n";
}
}
}
echo "\nLike me:\n";
arsort($pearson[WHO_AM_I]);
foreach ($pearson[WHO_AM_I] as $user => $value)
{
echo $user . "\t" . $value . "\n";
}
// Phase 3: Get recommendations
echo "\nRecommended talks:\n";
$recommendations = getRecommendations($userRatings, WHO_AM_I, $pearson);
foreach ($recommendations as $talkId => $recommendation)
{
echo $talkId . "\t" . $talkTitles[$talkId] . " ($recommendation)\n";
}
/**
* Get recommendations
*
* Return recommendations on talks I _should_ have seen (if I could have!)
*
* @param array $userRatings Our user ratings; [userId][talkId] = rating
* @param string $user The user to get recommendations for
* @param array $similarities The similarities of all users; [user1][user2] = #
*
* @return array [talkId] = <how much you should have seen it!>
*/
function getRecommendations(array $userRatings, $user, array $similarities)
{
$totals = array();
$similaritySums = array();
foreach ($userRatings as $compareWithUser => $talksWithRatings)
{
// don't compare against self
if ($user === $compareWithUser)
{
continue;
}
// how similar?
$similarity = $similarities[$user][$compareWithUser];
// ignore users if they aren't similar (<=0)
if ($similarity <= 0)
{
continue;
}
foreach ($talksWithRatings as $talkId => $rating)
{
// skip if I saw this talk
if (isset($userRatings[$user][$talkId]))
{
continue;
}
if (!isset($totals[$talkId]))
{
$totals[$talkId] = 0;
}
$totals[$talkId] += $rating * $similarity;
if (!isset($similaritySums[$talkId]))
{
$similaritySums[$talkId] = 0;
}
$similaritySums[$talkId] += $similarity;
} // end foreach talks
} // end foreach users
// generate normalised list
foreach ($totals as $talkId => &$score)
{
$score /= $similaritySums[$talkId];
}
arsort($totals);
return $totals;
}
/**
* Calculate pearson distance
*
* This calculates the pearson correlation between user1 and user2; a measure
* of how similar users are.
*
* @param array $userRatings Our array of user ratings; [userId][talkId] = rating
* @param string $user1 The first userId
* @param string $user2 The second userId
*
* @return integer|float A number between -1 and 1, where -1 indicates very
* dissimilar, and 1 indicates very similar
*/
function calculatePearson($userRatings, $user1, $user2)
{
// get list of talks both have rated
$talks = array_keys(array_intersect_key(
$userRatings[$user1],
$userRatings[$user2]
));
$numBothHaveRated = count($talks);
if ($numBothHaveRated === 0)
{
$pearson = 0;
}
else
{
$sumOfRatingsUser1 = 0;
$sumOfSquareOfRatingsUser1 = 0;
$sumOfRatingsUser2 = 0;
$sumOfSquareOfRatingsUser2 = 0;
$sumOfProducts = 0;
foreach ($talks as $talkId)
{
$sumOfRatingsUser1 += $userRatings[$user1][$talkId];
$sumOfSquareOfRatingsUser1 += pow($userRatings[$user1][$talkId], 2);
$sumOfRatingsUser2 += $userRatings[$user2][$talkId];
$sumOfSquareOfRatingsUser2 += pow($userRatings[$user2][$talkId], 2);
$sumOfProducts += $userRatings[$user1][$talkId] * $userRatings[$user2][$talkId];
}
// calculate pearson
$numerator = $sumOfProducts - ($sumOfRatingsUser1 * $sumOfRatingsUser2 / $numBothHaveRated);
$denominator = sqrt(
($sumOfSquareOfRatingsUser1 - pow($sumOfRatingsUser1, 2) / $numBothHaveRated)
* ($sumOfSquareOfRatingsUser2 - pow($sumOfRatingsUser2, 2) / $numBothHaveRated)
);
if ($denominator == 0)
{
$pearson = 0;
}
else
{
$pearson = $numerator / $denominator;
}
}
return $pearson;
}
/**
* Hit the Joind.in API
*
* @param string $endPoint API end point, eg: "event" to hit event API
* @param string $action The desired action, eg: "gettalks"
* @param array $params Any params to send
*
* @return array Decoded JSON data
*/
function joindInApi($endPoint, $action, array $params = array())
{
$requestData = array(
'request' => array(
'action' => array(
'type' => $action,
'data' => $params
)
)
);
$options = array(
CURLOPT_RETURNTRANSFER => TRUE, // return web page
CURLOPT_HEADER => FALSE, // don't return headers
CURLOPT_FOLLOWLOCATION => TRUE, // follow redirects
CURLOPT_ENCODING => '', // handle all encodings
CURLOPT_USERAGENT => 'DAVE!', // who am i
CURLOPT_AUTOREFERER => TRUE, // set referer on redirect
CURLOPT_CONNECTTIMEOUT => 120, // timeout on connect
CURLOPT_TIMEOUT => 120, // timeout on response
CURLOPT_MAXREDIRS => 10, // stop after 10 redirects
CURLOPT_HTTPHEADER => array('Content-Type: application/json'),
CURLOPT_POSTFIELDS => json_encode($requestData)
);
$ch = curl_init('http://joind.in/api/' . $endPoint);
curl_setopt_array($ch, $options);
$content = curl_exec($ch);
$err = curl_errno($ch);
$errmsg = curl_error($ch);
$header = curl_getinfo($ch);
curl_close($ch);
return json_decode($content, TRUE);
}