如何在Mahout中支持mongodb

时间:2022-11-18 04:11:33

目前,NOSQL运用越来越广,大量的日志数据都存储到mongodb了。但是在mahout中对mongo的支持并不好,使用起来十分复杂
例如 mahout0.9

public final class MongoDBDataModel implements DataModel {
//.....此处省略
private void buildModel() throws UnknownHostException {
userIsObject = false;
itemIsObject = false;
idCounter = 0;
preferenceIsString = true;
Mongo mongoDDBB = new Mongo(mongoHost, mongoPort);
DB db = mongoDDBB.getDB(mongoDB);
mongoTimestamp = new Date(0);
FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
if (!mongoAuth || db.authenticate(mongoUsername, mongoPassword.toCharArray())) {
collection = db.getCollection(mongoCollection);
collectionMap = db.getCollection(mongoMapCollection);
DBObject indexObj = new BasicDBObject();
indexObj.put("element_id", 1);
collectionMap.ensureIndex(indexObj);
indexObj = new BasicDBObject();
indexObj.put("long_value", 1);
collectionMap.ensureIndex(indexObj);
collectionMap.remove(new BasicDBObject());
DBCursor cursor = collection.find();
while (cursor.hasNext()) {
Map<String,Object> user = (Map<String,Object>) cursor.next().toMap();
if (!user.containsKey("deleted_at")) {
long userID = Long.parseLong(fromIdToLong(getID(user.get(mongoUserID), true), true));
long itemID = Long.parseLong(fromIdToLong(getID(user.get(mongoItemID), false), false));
float ratingValue = getPreference(user.get(mongoPreference));
Collection<Preference> userPrefs = userIDPrefMap.get(userID);
if (userPrefs == null) {
userPrefs = Lists.newArrayListWithCapacity(2);
userIDPrefMap.put(userID, userPrefs);
}
userPrefs.add(new GenericPreference(userID, itemID, ratingValue));
if (user.containsKey("created_at")
&& mongoTimestamp.compareTo(getDate(user.get("created_at"))) < 0) {
mongoTimestamp = getDate(user.get("created_at"));
}
}
}
}
delegate = new GenericDataModel(GenericDataModel.toDataMap(userIDPrefMap, true));
}
// ......
}

在这个实现中,ensureIndex是mongo2.0 时代产物。。。都什么年代了。
另外逻辑混乱,明明只需要给GenericDataModel赋值就好了嘛。
fromIdToLong函数实现明显有bug。。。

所以,改了,采用组合模式来实现
MongoModel .java

package com.resc.datamodel;

import org.apache.mahout.cf.taste.model.DataModel;
import com.resc.datamodel.MongoModel ;
import com.google.common.collect.Lists;
import com.mongodb.BasicDBObject;
import com.mongodb.DBObject;
import com.mongodb.Mongo;
import com.resc.main.MainProcess;
import com.mongodb.DB;
import com.mongodb.DBCollection;
import com.mongodb.DBCursor;
import org.apache.mahout.cf.taste.model.DataModel;
import org.apache.mahout.cf.taste.model.Preference;
import org.apache.mahout.cf.taste.model.PreferenceArray;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.apache.mahout.cf.taste.common.NoSuchUserException;
import org.apache.mahout.cf.taste.impl.common.FastByIDMap;
import org.apache.mahout.cf.taste.impl.model.GenericDataModel;
import org.apache.mahout.cf.taste.impl.model.GenericPreference;
import org.apache.mahout.cf.taste.common.NoSuchItemException;
import java.net.UnknownHostException;
import java.util.Collection;
import java.util.Map;

public final class MongoModel implements CustomDataModel{
private static final Logger log = LoggerFactory.getLogger(MongoModel.class);
private static final String DEFAULT_USER_FIELD = "user";
private static final String DEFAULT_ITEM_FIELD = "item";
private static final String DEFAULT_RATING_FIELD = "score";
private static final String DEFAULT_IPADDR = "127.0.0.1";
private static final int DEFAULT_PORT = 27017;
private static final String DEFAULT_DBNMAE = "recommend";
private static final String DEFAULT_TABLE_NMAE = "preference";
private String m_strHost;
private int m_iPort;
private String m_strDBName;
private String m_strTableName;
private String m_strUserFieldName;
private String m_strItemFileldName;
private String m_strScoreFieldName;
private DataModel m_oDataModel;

public MongoModel(String strHost, int port, String strDBName, String strTableName, String strUserFiledName,
String strItemFiledName, String strScoreFiledName) throws UnknownHostException {
m_strHost = strHost != "" ? strHost : DEFAULT_IPADDR;
m_iPort = port > 0 ? port : DEFAULT_PORT;
m_strDBName = strDBName != "" ? strDBName : DEFAULT_DBNMAE;
m_strTableName = strTableName != "" ? strTableName : DEFAULT_TABLE_NMAE;
m_strUserFieldName = strUserFiledName != "" ? strUserFiledName : DEFAULT_USER_FIELD;
m_strItemFileldName = strItemFiledName != "" ? strItemFiledName : DEFAULT_ITEM_FIELD;
m_strScoreFieldName = strScoreFiledName != "" ? strScoreFiledName : DEFAULT_RATING_FIELD;
BuildModel() ;
}

private void BuildModel() throws UnknownHostException {
Mongo mongoDDBB = new Mongo(m_strHost, m_iPort);
DB db = mongoDDBB.getDB(m_strDBName);
DBCollection collection = db.getCollection(m_strTableName);

DBCursor cursor = collection.find();
FastByIDMap<Collection<Preference>> userIDPrefMap = new FastByIDMap<Collection<Preference>>();
while (cursor.hasNext()) {
Map<String, Object> a = (Map<String, Object>) cursor.next().toMap();
long uid = Object2Long(a.get( m_strUserFieldName ));
long item = Object2Long(a.get(m_strItemFileldName ));
float ratingValue = getPreference(a.get(m_strScoreFieldName));
log.info(String.valueOf(uid) + "\t\t" + String.valueOf(item) + "\t\t" + String.valueOf(ratingValue));

Collection<Preference> userPrefs = userIDPrefMap.get(uid);
if (userPrefs == null) {
userPrefs = Lists.newArrayListWithCapacity(2);
userIDPrefMap.put(uid, userPrefs);
}
userPrefs.add(new GenericPreference(uid, item, ratingValue));
}
m_oDataModel = new GenericDataModel(GenericDataModel.toDataMap(userIDPrefMap, true));
}

public DataModel DataModel() {
return m_oDataModel;
}

private static long Object2Long(Object o) {
if (o != null) {
return Double.valueOf(o.toString()).longValue();
}
return 0;
}

private static float getPreference(Object value) {
if (value != null) {
if (value.getClass().getName().contains("String")) {
return Float.parseFloat(value.toString());
} else {
return Double.valueOf(value.toString()).floatValue();
}
} else {
return 0.5f;
}
}

}

这样使用时候就方便很多了
CustomDataModel _d = new MongoModel(“xxxxxx”, 5713 , “数据库名称” , “评价表” , “存储user的列的名称” ,”存储item的列的名称” ,”存储评分的列的名称”) ;
DataModel oDataModel = _d.DataModel() ;
UserSimilarity userSimilarity = new PearsonCorrelationSimilarity(oDataModel);
……