同类事物推荐算法Slope one

时间:2022-12-07 12:57:13

同类事物推荐算法Slope one

 

应用方向:同类事物推荐

问题重现:
有n个人对事物A和事物B打分了,R(A->B)表示这n个人对A和对B打分的平均差(A-B);
有m个人对事物B和事物C打分了,R(C->B)表示这m个人对C和对B打分的平均差(C-B)。

现在某个用户对A的打分是Ra,对C的打分是Rc,那么这个人对B的打分可能是:
Rb = (n * (Ra - R(A->B)) + m * (Rc - R(C->B)))/(m+n)

实现代码(原创造人代码)

#########原作者实现SQL和PHP代码###########

CREATE TABLE rating (
    userID INT PRIMARY KEY,
    itemID INT NOT NULL,
    ratingValue INT NOT NULL,
    datetimestamp TIMESTAMP NOT NULL
);


CREATE TABLE dev (
  itemID1 int(11) NOT NULL default '0',
  itemID2 int(11) NOT NULL default '0',
  count int(11) NOT NULL default '0',
  sum int(11) NOT NULL default '0',
  PRIMARY KEY  (itemID1,itemID2)
);


# simple query to output 10 most liked items
# by people who rated item 1
SELECT itemID2, ( sum / count ) AS average
FROM dev
WHERE count > 2 AND itemID1 = 1
ORDER  BY ( sum / count ) DESC
LIMIT 10;

# Next part is sample PHP code.
####################################

// This code assumes $itemID is set to that of 
// the item that was just rated. 
// Get all of the user's rating pairs
$sql = "SELECT DISTINCT r.itemID, r2.ratingValue - r.ratingValue 
            as rating_difference
            FROM rating r, rating r2
            WHERE r.userID=$userID AND 
                    r2.itemID=$itemID AND 
                    r2.userID=$userID;";
$db_result = mysql_query($sql, $connection);
$num_rows = mysql_num_rows($db_result);
//For every one of the user's rating pairs, 
//update the dev table
while ($row = mysql_fetch_assoc($db_result)) {
    $other_itemID = $row["itemID"];
    $rating_difference = $row["rating_difference"];
    //if the pair ($itemID, $other_itemID) is already in the dev table
    //then we want to update 2 rows.
    if (mysql_num_rows(mysql_query("SELECT itemID1 
    FROM dev WHERE itemID1=$itemID AND itemID2=$other_itemID",
    $connection)) > 0)  {
        $sql = "UPDATE dev SET count=count+1, 
	sum=sum+$rating_difference WHERE itemID1=$itemID 
	AND itemID2=$other_itemID";
        mysql_query($sql, $connection);
	//We only want to update if the items are different                
        if ($itemID != $other_itemID) {
            $sql = "UPDATE dev SET count=count+1, 
	    sum=sum-$rating_difference 
	    WHERE (itemID1=$other_itemID AND itemID2=$itemID)";
            mysql_query($sql, $connection);
        }
    }
    else { //we want to insert 2 rows into the dev table
        $sql = "INSERT INTO dev VALUES ($itemID, $other_itemID,
        1, $rating_difference)";
        mysql_query($sql, $connection); 
	//We only want to insert if the items are different       
        if ($itemID != $other_itemID) {         
            $sql = "INSERT INTO dev VALUES ($other_itemID, 
	    $itemID, 1, -$rating_difference)";
            mysql_query($sql, $connection);
        }
    }    
}


function predict($userID, $itemID) {
    global $connection;    
    $denom = 0.0; //denominator
    $numer = 0.0; //numerator    
    $k = $itemID;    
    $sql = "SELECT r.itemID, r.ratingValue 
    FROM rating r WHERE r.userID=$userID AND r.itemID <> $itemID";
    $db_result = mysql_query($sql, $connection);        
    //for all items the user has rated
    while ($row = mysql_fetch_assoc($db_result))  {
        $j = $row["itemID"];
        $ratingValue = $row["ratingValue"];        
        //get the number of times k and j have both been rated by the same user
        $sql2 = "SELECT d.count, d.sum FROM dev d WHERE itemID1=$k AND itemID2=$j";
        $count_result = mysql_query($sql2, $connection);        
        //skip the calculation if it isn't found
        if(mysql_num_rows($count_result) > 0)  {
            $count = mysql_result($count_result, 0, "count");
            $sum = mysql_result($count_result, 0, "sum");            
            //calculate the average
            $average = $sum / $count;            
            //increment denominator by count
            $denom += $count;            
            //increment the numerator
            $numer += $count * ($average + $ratingValue);
        }        
    }    
    if ($denom == 0)
        return 0;
    else
        return ($numer / $denom);
}


function predict_all($userID ) {
    $sql2 = "SELECT d.itemID1 as 'item', sum(d.count) as 'denom', 
    sum(d.sum + d.count*r.ratingValue) as 'numer' FROM item i, rating r,
    dev d WHERE r.userID=$userID 
    AND d.itemID1<>r.itemID 
    AND d.itemID2=r.itemID GROUP BY d.itemID1";
    return mysql_query($sql2, $connection);
}