聚类算法之DBScan(Java实现)[转]
生活随笔
收集整理的这篇文章主要介绍了
聚类算法之DBScan(Java实现)[转]
小编觉得挺不错的,现在分享给大家,帮大家做个参考.
package orisun;import java.io.File;
import java.util.ArrayList;
import java.util.Vector;
import java.util.Iterator;public class DBScan {double Eps=3; //区域半径int MinPts=4; //密度//由于自己到自己的距离是0,所以自己也是自己的neighborpublic Vector<DataObject> getNeighbors(DataObject p,ArrayList<DataObject> objects){Vector<DataObject> neighbors=new Vector<DataObject>();Iterator<DataObject> iter=objects.iterator();while(iter.hasNext()){DataObject q=iter.next();double[] arr1=p.getVector();double[] arr2=q.getVector();int len=arr1.length;if(Global.calEditDist(arr1,arr2,len)<=Eps){ //使用编辑距离
// if(Global.calEuraDist(arr1, arr2, len)<=Eps){ //使用欧氏距离
// if(Global.calCityBlockDist(arr1, arr2, len)<=Eps){ //使用街区距离
// if(Global.calSinDist(arr1, arr2, len)<=Eps){ //使用向量夹角的正弦
neighbors.add(q);}}return neighbors;}public int dbscan(ArrayList<DataObject> objects){int clusterID=0;boolean AllVisited=false;while(!AllVisited){Iterator<DataObject> iter=objects.iterator();while(iter.hasNext()){DataObject p=iter.next();if(p.isVisited())continue;AllVisited=false;p.setVisited(true); //设为visited后就已经确定了它是核心点还是边界点Vector<DataObject> neighbors=getNeighbors(p,objects);if(neighbors.size()<MinPts){if(p.getCid()<=0)p.setCid(-1); //cid初始为0,表示未分类;分类后设置为一个正数;设置为-1表示噪声。}else{if(p.getCid()<=0){clusterID++;expandCluster(p,neighbors,clusterID,objects);}else{int iid=p.getCid();expandCluster(p,neighbors,iid,objects);}}AllVisited=true;}}return clusterID;}private void expandCluster(DataObject p, Vector<DataObject> neighbors,int clusterID,ArrayList<DataObject> objects) {p.setCid(clusterID);Iterator<DataObject> iter=neighbors.iterator();while(iter.hasNext()){DataObject q=iter.next();if(!q.isVisited()){q.setVisited(true);Vector<DataObject> qneighbors=getNeighbors(q,objects);if(qneighbors.size()>=MinPts){Iterator<DataObject> it=qneighbors.iterator();while(it.hasNext()){DataObject no=it.next();if(no.getCid()<=0)no.setCid(clusterID);}}}if(q.getCid()<=0){ //q不是任何簇的成员
q.setCid(clusterID);}}}public static void main(String[] args){DataSource datasource=new DataSource();//Eps=3,MinPts=4datasource.readMatrix(new File("/home/orisun/test/dot.mat"));datasource.readRLabel(new File("/home/orisun/test/dot.rlabel"));//Eps=2.5,MinPts=4
// datasource.readMatrix(new File("/home/orisun/text.normalized.mat"));
// datasource.readRLabel(new File("/home/orisun/text.rlabel"));DBScan ds=new DBScan();int clunum=ds.dbscan(datasource.objects);datasource.printResult(datasource.objects,clunum);}
}
转载于:https://www.cnblogs.com/wukong0214/p/3440482.html
总结
以上是生活随笔为你收集整理的聚类算法之DBScan(Java实现)[转]的全部内容,希望文章能够帮你解决所遇到的问题。
- 上一篇: 每次ubuntu12.04重启后,/et
- 下一篇: 维护没有源代码的遗留 Java 项目