第四章 排序:智能收件箱

时间:2022-12-11 07:38:53
library(tm) library(ggplot2) library(dplyr) library(reshape2) library(tidyr) library(lubridate)
data.path<-'D:\\ML_for_Hackers-master\\03-Classification\\data\\'
easyham.path<-paste0(data.path,'easy_ham\\')
easyham.path

‘D:\ML_for_Hackers-master\03-Classification\data\easy_ham\’

提取邮件有用信息(发送者、主题、日期、内容等)

parse.email<-function(path){
    full.msg<-msg.full(path)
    date<-get.date(full.msg)
    from<-get.from(full.msg)
    subj<-get.subject(full.msg)
    msg<-get.msg(full.msg)
    return(c(date,from,subj,msg,path))
}

读取一封邮件

msg.full<-function(path){
    con<-file(path,'r')
    msg<-readLines(con,encoding='latin1')
    close(con)
    return(msg)
}

提取邮件地址

get.from<-function(msg.vec){
     from<-msg.vec[grepl('From: ',msg.vec)]
     from<-strsplit(from,'[":<> ]')[[1]]
     from<-from[which(from != "" & from != " ")]
     return(from[grepl('@',from)][1])
}

提取邮件正文

get.msg<-function(msg.vec){
    msg <- msg.vec[seq(which(msg.vec == "")[1]+1,length(msg.vec))]
    return(paste(msg,collapse='\n'))
}

提取邮件主题

get.subject<-function(msg.vec){
    subj<-msg.vec[grepl('Subject: ',msg.vec)]
    if(length(subj)>0){
        return(strsplit(subj,'Subject: ')[[1]][2])
    }
    else{
        return('')
    }
}

邮件接收日期

get.date<-function(msg.vec){
    date.grep<-grepl('^Date: ',msg.vec)
    date.grep<-which(date.grep == TRUE)
    date<-msg.vec[date.grep[1]]
    date<-strsplit(date,'\\+|\\-|: ')[[1]][2]
    date<-gsub('^\\s+|\\s+$','',date)
    return(strtrim(date,25))
}
easyham.docs<-dir(easyham.path)
easyham.docs<-easyham.docs[which(easyham.docs!='cmds')]
easyham.parse<-lapply(easyham.docs,function(p) parse.email(paste0(easyham.path,p))) ## lapply函数分别对各个邮件进行处理
ehparse.matrix<-do.call(rbind,easyham.parse)
allparse.df<-data.frame(ehparse.matrix,stringsAsFactors=F)
names(allparse.df)<-c('Date','From.Email','Subject','Message','Path')
head(allparse.df[,1:3])
Date From.Email Subject
Thu, 22 Aug 2002 18:26:25 kre@munnari.OZ.AU Re: New Sequences Window
Thu, 22 Aug 2002 12:46:18 steve.burt@cursor-system.com [zzzzteana] RE: Alexander
Thu, 22 Aug 2002 13:52:38 timc@2ubh.com [zzzzteana] Moscow bomber
Thu, 22 Aug 2002 09:15:25 monty@roscom.com [IRR] Klez: The Virus That Won’t Die
Thu, 22 Aug 2002 14:38:22 Stewart.Smith@ee.ed.ac.uk Re: [zzzzteana] Nothing like mama used to make
Thu, 22 Aug 2002 14:50:31 martin@srv0.ems.ed.ac.uk Re: [zzzzteana] Nothing like mama used to make

日期格式转化

date.converter<-function(dates,pattern1,pattern2){
    pattern1.convert<-strptime(dates,pattern1)
    pattern2.convert<-strptime(dates,pattern2)
    pattern1.convert[is.na(pattern1.convert)]<-pattern2.convert[is.na(pattern1.convert)]
    return(pattern1.convert)
}
Sys.setlocale("LC_TIME", "C")
pattern1 <- "%a,%d %b %Y %H:%M:%S"
pattern2 <- "%d %b %Y %H:%M:%S"

‘C’

head(priority.df[,c(1,2,3,5)])
Date From.Email Subject Path
1061 2002-01-31 22:44:14 robinderbains@shaw.ca please help a newbie compile mplayer :-) D:\ML_for_Hackers-master\03-Classification\data\easy_ham\01061.6610124afa2a5844d41951439d1c1068
1062 2002-02-01 00:53:41 lance_tt@bellsouth.net re: please help a newbie compile mplayer :-) D:\ML_for_Hackers-master\03-Classification\data\easy_ham\01062.ef7955b391f9b161f3f2106c8cda5edb
1063 2002-02-01 02:01:44 robinderbains@shaw.ca re: please help a newbie compile mplayer :-) D:\ML_for_Hackers-master\03-Classification\data\easy_ham\01063.ad3449bd2890a29828ac3978ca8c02ab
1064 2002-02-01 10:29:23 matthias@egwn.net re: please help a newbie compile mplayer :-) D:\ML_for_Hackers-master\03-Classification\data\easy_ham\01064.9f4fc60b4e27bba3561e322c82d5f7ff
1070 2002-02-01 12:42:02 bfrench@ematic.com prob. w/ install/uninstall D:\ML_for_Hackers-master\03-Classification\data\easy_ham\01070.6e34c1053a1840779780a315fb083057
1072 2002-02-01 13:39:31 bfrench@ematic.com re: prob. w/ install/uninstall D:\ML_for_Hackers-master\03-Classification\data\easy_ham\01072.81ed44b31e111f9c1e47e53f4dfbefe3
allparse.df$Date<-date.converter(allparse.df$Date,pattern1,pattern2)
allparse.df$Subject<-tolower(allparse.df$Subject)
allparse.df$From.Email<-tolower(allparse.df$From.Email)

选取前一半时间的数据(训练数据)进行测试

priority.df<-allparse.df[with(allparse.df,order(Date)),]
priority.train<-priority.df[1:(round(nrow(priority.df)/2)),]

计算各个发送者发送邮件的数量

from.weight<-select(priority.train,-Date) %>% group_by(From.Email) %>% summarise(Freq=length(Subject))
head(from.weight)
From.Email Freq
abbo@impression.nu 1
adam@homeport.org 1
admin@networksonline.com 1
admin@shellworld.net 1
admin@wexoe.dk 1
aeriksson@fastmail.fm 6
from.weight_map <-subset(from.weight,Freq>6)
from.weight_map<-from.weight_map[order(from.weight_map$Freq),]
tail(from.weight_map)
From.Email Freq
skip@pobox.com 24
yyyy@spamassassin.taint.org 25
garym@canada.com 29
pudge@perl.org 34
tomwhore@slack.net 37
tim.one@comcast.net 45

取次数大于6的数据画图

ggplot(from.weight_map,aes(x=reorder(From.Email,Freq),y=Freq))+geom_bar(stat="identity",color='green')+coord_flip()

第四章 排序:智能收件箱

取不同对数类型的差异

test1<-data.frame(y=from.weight[order(from.weight$Freq),]$Freq,x=1:nrow(from.weight),type=rep(1,nrow(from.weight)))
test2<-data.frame(y=log(from.weight[order(from.weight$Freq),]$Freq),x=1:nrow(from.weight),type=rep(2,nrow(from.weight)))
test3<-data.frame(y=log10(from.weight[order(from.weight$Freq),]$Freq),x=1:nrow(from.weight),type=rep(3,nrow(from.weight)))
test<-rbind(test1,test2,test3)
ggplot(test,aes(x,y,color=type,group = type))+geom_line()+ylab('接收邮件数')+xlab('')

第四章 排序:智能收件箱

from.weight<-transform(from.weight,Weight=log(Freq+1))

找出回复他人的邮件

find.threads<-function(email.df){
    response.threads<-strsplit(email.df$Subject,'re: ')
    is.thread<-sapply(response.threads,function(subj) ifelse(subj[1]=='',TRUE,FALSE))

        threads<-response.threads[is.thread]
        senders<-email.df$From.Email[is.thread]
        threads<-sapply(threads,function(t) paste(t[2:length(t)],collapse='re: '))
        return(cbind(senders,threads))
}
threads.matrix<-find.threads(priority.train)
head(threads.matrix)
senders threads
lance_tt@bellsouth.net please help a newbie compile mplayer :-)
robinderbains@shaw.ca please help a newbie compile mplayer :-)
matthias@egwn.net please help a newbie compile mplayer :-)
bfrench@ematic.com prob. w/ install/uninstall
marmot-linux@shaw.ca prob. w/ install/uninstall
matthias@egwn.net http://apt.nixia.no/

计算每个邮件回复他人的数量及构建权重

email.thread<-function(thread.matrix){
    sender<-threads.matrix[,1]
    senders.freq<-table(sender)
    senders.matrix<-cbind(names(senders.freq),senders.freq,log(senders.freq+1))
    senders.df<-data.frame(senders.matrix,stringsAsFactors=FALSE)
    row.names(senders.df)<-1:nrow(senders.df)
    names(senders.df)<-c('From.Email','Freq','Weight')
    senders.df$Freq<-as.numeric(senders.df$Freq)
    senders.df$Weight<-as.numeric(senders.df$Weight)
    return(senders.df)
}
senders.df<-email.thread(threads.matrix)
head(senders.df)
From.Email Freq Weight
adam@homeport.org 1 0.6931472
aeriksson@fastmail.fm 5 1.7917595
albert.white@ireland.sun.com 1 0.6931472
alex@netwindows.org 1 0.6931472
andr@sandy.ru 1 0.6931472
andris@aernet.ru 1 0.6931472

计算同一主题的线程时间差,并赋值

get.threads<-function(threads.matrix,email.df){
    threads<-unique(threads.matrix[,2])
    thread.counts<-lapply(threads,function(t) thread.counts(t,email.df))
        thread.matrix<-do.call(rbind,thread.counts)
        return(cbind(threads,thread.matrix))
}
thread.counts<-function(thread,email.df){
    thread.times<-email.df$Date[which(email.df$Subject==thread|email.df$Subject==paste('re:',thread))]
    freq<-length(thread.times)
    min.times<-min(thread.times)
    max.times<-max(thread.times)
    time.span<-as.numeric(difftime(max.times,min.times,units='secs'))
    if(freq<2){
        return(c(NA,NA,NA))
    }
    else{
        trans.weight<-freq/time.span
        log.trans.weight<-10+log(trans.weight,base=10)
        return(c(freq,time.span,log.trans.weight))
    }
}
thread.weights<-get.threads(threads.matrix,priority.train)
thread.weights<-data.frame(thread.weights,stringsAsFactors=FALSE)
names(thread.weights)<-c('Thread','Freq','Response','Weight')
thread.weights$Freq<-as.numeric(thread.weights$Freq)
thread.weights$Response<-as.numeric(thread.weights$Response)
thread.weights$Weight<-as.numeric(thread.weights$Weight)
thread.weights<-subset(thread.weights,is.na(thread.weights$Freq)==FALSE)
head(thread.weights)
Thread Freq Response Weight
please help a newbie compile mplayer :-) 4 42309 5.975627
prob. w/ install/uninstall 4 23745 6.226488
http://apt.nixia.no/ 10 265303 5.576258
problems with ‘apt-get -f install’ 3 55960 5.729244
problems with apt update 2 6347 6.498461
about apt, kernel updates and dist-upgrade 5 240238 5.318328

计算主题的词频

term.counts<-function(term.vec,control){
    vec.corpus<-Corpus(VectorSource(term.vec))
    vec.tdm<-TermDocumentMatrix(vec.corpus,control=control)
    return(rowSums(as.matrix(vec.tdm)))
}
thread.terms<-term.counts(thread.weights$Thread,control=list(strpwords=stopwords()))
head(thread.terms)
compile
2
help
2
mplayer
2
newbie
2
please
2
install
2
thread.terms<-names(thread.terms)
term.weights<-sapply(thread.terms,function(t) mean(thread.weights$Weight[grepl(t,thread.weights$Thread,fixed=TRUE)])) ##计算存在关键词的主题的权重的均值
term.weights<-data.frame(list(Term=names(term.weights),Weight=term.weights),stringsAsFactors=FALSE,row.names=1:length(term.weights))
head(term.weights)
Term Weight
compile 5.803255
help 5.427126
mplayer 6.724644
newbie 5.444172
please 6.309005
install 5.977866

计算邮件内容的词频,并据此赋权重

msg.terms<-term.counts(priority.train$Message,control=list(stopwords=stopwords(),removePunctuation=TRUE,removeNumbers=TRUE))
msg.weights<-data.frame(list(Term=names(msg.terms),Weight=log(msg.terms,base=10)),stringsAsFactors=FALSE,row.names=1:length(msg.terms))
msg.weights<-subset(msg.weights,Weight>0)
head(msg.weights)
Term Weight
anyway 1.875061
appreciated 1.176091
apt 2.255273
can 3.077004
directory 2.056905
document 1.278754

最终的训练数据

head(from.weight)                    ##社交特征
head(senders.df)                    ##发件人在线程内的活跃度(re:)
head(thread.weights)                ##线程(一个主题存在多个邮件)的活跃度
head(term.weights)                 ##活跃线程的词项
head(msg.weights)                  ##所有邮件共有词项
From.Email Freq Weight
abbo@impression.nu 1 0.6931472
adam@homeport.org 1 0.6931472
admin@networksonline.com 1 0.6931472
admin@shellworld.net 1 0.6931472
admin@wexoe.dk 1 0.6931472
aeriksson@fastmail.fm 6 1.9459101
From.Email Freq Weight
adam@homeport.org 1 0.6931472
aeriksson@fastmail.fm 5 1.7917595
albert.white@ireland.sun.com 1 0.6931472
alex@netwindows.org 1 0.6931472
andr@sandy.ru 1 0.6931472
andris@aernet.ru 1 0.6931472
Thread Freq Response Weight
please help a newbie compile mplayer :-) 4 42309 5.975627
prob. w/ install/uninstall 4 23745 6.226488
http://apt.nixia.no/ 10 265303 5.576258
problems with ‘apt-get -f install’ 3 55960 5.729244
problems with apt update 2 6347 6.498461
about apt, kernel updates and dist-upgrade 5 240238 5.318328
Term Weight
compile 5.803255
help 5.427126
mplayer 6.724644
newbie 5.444172
please 6.309005
install 5.977866
Term Weight
anyway 1.875061
appreciated 1.176091
apt 2.255273
can 3.077004
directory 2.056905
document 1.278754

训练和测试排序算法

get.weights<- function(search.term,weight.df,term=TRUE){
    if(length(search.term)>0){
        if(term){
            term.match<-match(names(search.term),weight.df$Term)
        }
        else{
            term.match<-match(search.term,weight.df$Thread)
        }
        match.weights<-weight.df$Weight[which(!is.na(term.match))]
        if(length(match.weights)<1){
            return(1)
        }
        else{
            return(mean(match.weights))
        }
    }
    else{
        return(1)
    }
}

计算每封邮件的的权重(相乘)

rank.message<-function(path){
    msg<-parse.email(path)
    #Weighting based on message author  return(c(date,from,subj,msg,path))

    #First  is just on the total frequency
    from <- ifelse(length(which(from.weight$From.Email==msg[2]))>0,from.weight$Weight[which(from.weight$From.Email==msg[2])],1)

    #Second is based on senders in threads ,and threads themselves
    thread.from<-ifelse(length(which(senders.df$From.Email==msg[2]))>0,senders.df$Weight[which(senders.df$From.Email==msg[2])],1)

    subj<-strsplit(tolower(msg[3]),'re: ')
    is.thread<-ifelse(subj[[1]][1]=='',TRUE,FALSE)
    if(is.thread){
        activity<-get.weights(subj[[1]][2],thread.weights,term=FALSE)
    }
    else{
        activity=1
    }
    #Next,weight based on terms
    #Weight based on terms in threads
    thread.terms <- term.counts(msg[3],control=list(stopwords=stopwords()))
    thread.terms.weights<-get.weights(thread.terms,term.weights)

    #Weight baesd term in all messages
    msg.terms<-term.counts(msg[4],control=list(stopwords=stopwords(),removePunctuation=TRUE,removeNumbers=TRUE))
    msg.weights<-get.weights(msg.terms,msg.weights)

    #Calcuate rank by interacting all weights
    rank <- prod(from,thread.from,activity,thread.terms.weights,msg.weights)##连乘

    return(c(msg[1],msg[2],msg[3],rank))
}

训练集和测试集

##拆分训练集和测试集
train.paths<-priority.df$Path[1:(round(nrow(priority.df)/2))]
test.paths<-priority.df$Path[((round(nrow(priority.df)/2))+1):nrow(priority.df)]

训练数据集结果

train.ranks<-lapply(train.paths,rank.message)##计算评分
train.ranks.matrix<-do.call(rbind,train.ranks)
train.ranks.matrix<-cbind(train.paths,train.ranks.matrix,'TRAINING')
train.ranks.df<-data.frame(train.ranks.matrix,stringsAsFactors=FALSE)
names(train.ranks.df)<-c('Message','Date','From','Subj','Rank','Type')
train.ranks.df$Rank<-as.numeric(train.ranks.df$Rank)

priority.threshold<-median(train.ranks.df$Rank)##评分的中位数

train.ranks.df$Priority<-ifelse(train.ranks.df$Rank>=priority.threshold,1,0)
train.ranks.df$Date<-date.converter(train.ranks.df$Date,pattern1,pattern2)
nrow(priority.df)

2500

train.ranks.df[15,]
Message Date From Subj Rank Type Priority
15 D:\ML_for_Hackers-master\03-Classification\data\easy_ham\01078.e83af8e93466283be2ba03e34854682e 2002-02-02 08:11:08 lance_tt@bellsouth.net Re: problems with ‘apt-get -f install’ 371.1476 TRAINING 1
priority.threshold

59.2488866334203

ggplot(train.ranks.df,aes(x=Rank))+stat_density(aes(fill="darkred"))+geom_vline(aes(xintercept=priority.threshold),color="green",linetype="dashed")+geom_text(aes(x=priority.threshold,y=0,label=round(priority.threshold,3)))

第四章 排序:智能收件箱

sd(train.ranks.df$Rank)

227.684723068453

测试数据集

test.ranks<-lapply(test.paths,rank.message)##计算评分
test.ranks.matrix<-do.call(rbind,test.ranks)
test.ranks.matrix<-cbind(test.paths,test.ranks.matrix,'TEST')
test.ranks.df<-data.frame(test.ranks.matrix,stringsAsFactors=FALSE)
names(test.ranks.df)<-c('Message','Date','From','Subj','Rank','Type')
test.ranks.df$Rank<-as.numeric(test.ranks.df$Rank)
test.ranks.df$Date<-date.converter(test.ranks.df$Date,pattern1,pattern2)
head(test.ranks.df)
Message Date From Subj Rank Type
D:\ML_for_Hackers-master\03-Classification\data\easy_ham\00696.767a9ee8575785978ea5174d3ad3ee26 2002-09-21 19:13:52 fork_list@hotmail.com Re: sed /s/United States/Roman Empire/g 573.38792 TEST
D:\ML_for_Hackers-master\03-Classification\data\easy_ham\00697.edd28212eb2b368046311fd1918aae7d 2002-09-21 20:37:43 fork_list@hotmail.com E-Textiles Come into Style 16.69912 TEST
D:\ML_for_Hackers-master\03-Classification\data\easy_ham\00698.09cdefd75c1242540db1183f9fc54461 2002-09-21 22:59:23 joe@barrera.org flavor cystals 13.14839 TEST
D:\ML_for_Hackers-master\03-Classification\data\easy_ham\00702.bf064c61d1ba308535d0d8af3bcb1789 2002-09-22 03:00:48 cdale@techmonkeys.net Re: Oh my… 97.93984 TEST
D:\ML_for_Hackers-master\03-Classification\data\easy_ham\00699.29e599983f044aee500f3a58c34acffc 2002-09-22 03:13:08 owen@permafrost.net Re: sed /s/United States/Roman Empire/g 453.67502 TEST
D:\ML_for_Hackers-master\03-Classification\data\easy_ham\00700.7eee792482a5a8cf20f1e4225f905f6b 2002-09-22 03:20:22 tomwhore@slack.net [vox] Anarchist ‘Scavenger Hunt’ Raises D.C. Police Ire (fwd) 22.28577 TEST
ggplot(train.ranks.df,aes(x=Rank))+geom_density(aes(color='green'))+geom_density(data=test.ranks.df,aes(x=Rank,color='red'))

第四章 排序:智能收件箱

列出排序靠前的40个

reslut <-test.ranks.df[order(test.ranks.df$Rank,decreasing=TRUE),][1:40,c(2,3,4,5)]
row.names(reslut)<-1:nrow(reslut)
reslut
Date From Subj Rank
2002-09-23 18:02:05 matthias@egwn.net Re: sylpheed-claws 660.0621
2002-09-23 17:41:20 matthias@egwn.net Re: sylpheed-claws 648.4520
2002-09-21 19:13:52 fork_list@hotmail.com Re: sed /s/United States/Roman Empire/g 573.3879
2002-09-22 15:53:43 johnhall@evergo.net RE: sed /s/United States/Roman Empire/g 484.4433
2002-09-23 15:09:46 owen@permafrost.net Re: sed /s/United States/Roman Empire/g 470.8348
2002-09-22 03:13:08 owen@permafrost.net Re: sed /s/United States/Roman Empire/g 453.6750
2002-09-22 22:11:03 welch@panasas.com Re: bad focus/click behaviours 426.6982
2002-09-22 22:21:49 welch@panasas.com Re: bad focus/click behaviours 423.0334
2002-09-22 22:00:37 welch@panasas.com Re: bad focus/click behaviours 417.7473
2002-09-22 12:03:36 owen@permafrost.net Re: sed /s/United States/Roman Empire/g 407.2638
2002-09-22 22:08:16 welch@panasas.com Re: bad focus/click behaviours 404.7715
2002-09-23 11:18:16 dl@silcom.com Re: [VoID] a new low on the personals tip… 333.9248
2002-09-30 07:14:31 angles@aminvestments.com Re: New testing packages 320.5003
2002-09-27 12:33:23 ejw@cse.ucsc.edu RE: The Big Jump 282.4799
2002-09-25 23:09:39 aeriksson@fastmail.fm Re: Exmh && speed 232.7133
2002-09-23 19:22:24 tomwhore@slack.net Not just like a virgin…a virgin…birth 154.8486
2002-09-30 21:46:08 mark@talios.com Re: New testing packages 142.9437
2002-09-24 10:04:06 tomwhore@slack.net Re: 2002.06.00.00 141.7460
2002-09-28 19:37:42 garym@canada.com Re: EBusiness Webforms: cluetrain has left the station 130.8743
2002-10-03 21:58:55 garym@canada.com Re: Living Love - Another legacy of the 60’s 130.7779
2002-10-05 18:17:53 yyyy@spamassassin.taint.org Re: [SAtalk] razor2 auth? 127.4101
2002-09-23 10:43:01 elias@cse.ucsc.edu Re: sed /s/United States/Roman Empire/g 127.2770
2002-10-03 21:23:04 yyyy@spamassassin.taint.org Re: [SAtalk] 2.42: est release? 126.8429
2002-09-25 18:45:29 tomwhore@slack.net Re: Digital radio playlists are prohibited?! 126.7640
2002-09-23 21:51:03 tomwhore@slack.net Re: How about subsidizing SSL access to Google? 125.8781
2002-10-02 11:12:42 garym@canada.com Polit-spam 124.6367
2002-10-07 20:18:15 yyyy@spamassassin.taint.org Re: [SAtalk] Re: AWL bug in 2.42? 123.8745
2002-09-23 12:48:34 yyyy@spamassassin.taint.org Re: [SAtalk] Spam host? 123.7719
2002-10-09 18:23:20 yyyy@spamassassin.taint.org Re: [SAtalk] Re: fully-public corpus of mail available 119.9497
2002-09-28 15:54:42 garym@canada.com Re: EBusiness Webforms: cluetrain has left the station 117.8278
2002-09-30 22:59:46 matthias@egwn.net Re: apt 0.5.7 on RH 7.x 115.8525
2002-09-23 18:48:24 yyyy@spamassassin.taint.org Re: [Razor-users] razor unblessed reference 115.6458
2002-10-02 11:45:33 matthias@egwn.net freshrpms.net resources (was Re: use new apt to do null to RH8 114.8119
2002-10-02 10:53:24 matthias@egwn.net Re: use new apt to do null to RH8 upgrade? 114.5680
2002-09-30 23:28:59 eugen@leitl.org Re: Internet Archive bookmobile 114.2242
2002-10-01 12:12:44 matthias@egwn.net Re: use new apt to do null to RH8 upgrade? 114.1910
2002-09-30 22:00:02 matthias@egwn.net Re: apt 0.5.7 on RH 7.x 113.4832
2002-09-26 16:08:14 eugen@leitl.org Re: dumb question: X client behind a firewall? 112.4602
2002-09-30 21:50:46 matthias@egwn.net Re: ALSA Problem with Null kernel 111.9554
2002-10-02 18:57:28 matthias@egwn.net Re: use new apt to do null to RH8 upgrade? 110.3828