library(quadprog) library(lpSolve) library(e1071) library(rpart) std<-function(x) { return((x-mean(x))/sd(x)) } pos<-function(x) { return(ifelse(x>0,x,0)) } sign<-function(x) { return(ifelse(x>=0,1,-1)) } psi<-function(x) { return(ifelse(x>=0&x<=1,2*(1-x),1-sign(x))) } K<-function(x,y,kern="radial",sigma2=ifelse(is.vector(x),1,ncol(x))) { ## need to change value of kernel accordingly if(is.vector(x)) { if (kern=="linear") { return ( sum(x*y) ) } else { return ( exp(-sum((x-y)^2)/sigma2) ) } } else { if (kern=="linear") return ( x %*% t(y) ) else { res<-matrix(0,nrow(x),nrow(y)) for ( i in 1:nrow(x) ) { for ( j in 1:nrow(y) ) { res[i,j]<-exp(-sum((x[i,]-y[j,])^2)/sigma2) } } return (res) } } } Inum<-9 our<-function(train,test,fun.cost1=1,kern="radial") { I<-seq(0,1,length=Inum+2)[2:(Inum+1)] tempprob<-matrix(rep(0,nrow(test)*Inum),ncol=Inum) for ( i in 1:Inum ) { tempfit<-svm(factor(y)~.,train,kernel=kern,cost=fun.cost1,class.weights=c("-1"=I[i],"1"=1-I[i])) tempprob[,i]<-predict(tempfit,test) } res<-I[1]/2+apply(tempprob-1,1,sum)/(Inum+1) return(res) } logit=function(train,test,fun.cost1=1,kern="radial") { tempfit<-svm(factor(y)~.,train,kernel=kern,cost=fun.cost1) pred <- predict(tempfit,test, decision.values = TRUE) fun=attr(pred, "decision.values") res=(1+exp(-fun))^(-1) return(res) } inf.small=10^(-6) epsilon<-10^(-3) em.svm<-function(data.l,data.u,fun.cost1=1,initial=sample(c(-1,1),nrow(data.u),replace=T),kern="radial",foo=our,wgt=1) { ### initial: a vector of intial labels for unlabeled data ### kern: same as 'kernel' in svm routine ### foo: the function used to estimate probability' ### wgt: the ratio b/w misclassification cost for false negative over false positive x.l<-data.l[,-1] y.l<-data.l[,1] x.u=data.u p<-ncol(x.l) l<-nrow(x.l) u<-nrow(x.u) y.u=initial; prob<-rep(0.5,length(y.u)) ## Initialization res.old<-0; res<-1; y.u.old<-y.u while (min(abs(res.old-res))>10*epsilon*res & length(table(y.u))>1) { res.old<-c(res.old,res) data.train<-data.frame(cbind(y=c(y.l,y.u),rbind(x.l,x.u))) data.test<-data.frame(cbind(y=y.u,x.u)) prob<-as.vector(foo(data.train,data.test,fun.cost1=fun.cost1,kern=kern)) Dt<-cbind(rbind(diag(y.l),matrix(0,u,l)),rbind(matrix(0,l,u),diag(rep(1,u))),rbind(matrix(0,l,u),diag(rep(-1,u)))) Dmat<-t(Dt) %*% K(rbind(x.l,x.u),rbind(x.l,x.u),kern) %*% Dt + diag(rep(inf.small,l+2*u)) dvec<-rep(1,l+2*u) Amat<-t(rbind(t(rep(1,l+u))%*%Dt,diag(rep(1,l+2*u)),diag(rep(-1,l+2*u)))) bvec<-c(0,rep(0,l+2*u),rep(-u/l*fun.cost1,l)*ifelse(y.l>0,wgt,1),-prob*fun.cost1*wgt,(prob-1)*fun.cost1) meq<-1 sol<-solve.QP(Dmat, dvec, Amat, bvec, meq)$solution ## Solve QP if (as.character(sum(sol))=="NaN") break pred.l<-as.vector(K(x.l,rbind(x.l,x.u),kern) %*% Dt %*% sol) pred.u<-as.vector(K(x.u,rbind(x.l,x.u),kern) %*% Dt %*% sol) f.obj<-c(0,0,rep(u/l,l)*ifelse(y.l>0,wgt,1),prob*wgt,1-prob) ## Solve LP for constant term f.con<-rbind(cbind(y.l,-y.l,diag(rep(1,l)),matrix(0,l,2*u)),cbind(rep(1,u),rep(-1,u),matrix(0,u,l),diag(rep(1,u)),matrix(0,u,u)),cbind(rep(-1,u),rep(1,u),matrix(0,u,l),matrix(0,u,u),diag(rep(1,u)))) f.dir<-rep(">=",l+2*u) f.rhs<-c(1-y.l*pred.l,1-pred.u,1+pred.u) const.sol<-lp("min", f.obj, f.con, f.dir, f.rhs)$solution const<-const.sol[1]-const.sol[2] y.u<-sign(as.vector(pred.u+const)) y.u.old<-cbind(y.u.old,y.u) res<-fun.cost1*as.numeric(sum(pos(1-y.l*(pred.l+const))*ifelse(y.l>0,wgt,1))*u/l+sum(prob*pos(1-pred.u-const)*wgt)+sum((1-prob)*pos(1+pred.u+const))+(t(Dt %*% sol)%*% K(rbind(x.l,x.u),rbind(x.l,x.u),kern) %*% (Dt %*% sol))) } res.old<-res.old[-(1:2)] return(list(res=y.u.old[,order(res.old)[1]])) }