00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014 #ifndef _OFFLINECLUSTER_HPP
00015 #define _OFFLINECLUSTER_HPP
00016 #include <set>
00017 #include "common_headers.hpp"
00018 #include "Index.hpp"
00019 #include "ClusterParam.hpp"
00020 #include "ClusterFactory.hpp"
00021 #include "SimFactory.hpp"
00022
00024 class OfflineCluster
00025 {
00026 public:
00028 OfflineCluster(const Index &ind,
00029 enum ClusterParam::simTypes simType = ClusterParam::COS,
00030 enum ClusterParam::clusterTypes clusterType = ClusterParam::CENTROID,
00031 enum ClusterParam::docModes docMode = ClusterParam::DMAX);
00032
00034 ~OfflineCluster();
00035
00038 vector<Cluster*> *kMeans(vector<DOCID_T> docIds, int numParts = 2,
00039 int maxIters = 100);
00040
00042 vector<Cluster*> *kMeans(Cluster *cluster, int numParts = 2,
00043 int maxIters = 100);
00044
00047 vector<Cluster*> *bisecting_kMeans(vector<DOCID_T> docIds, int numParts = 2,
00048 int numIters = 5, int maxIters = 100);
00049
00050 private:
00052 const SimilarityMethod *sim;
00054 ClusterFactory *factory;
00056 const Index &index;
00058 bool compareClusterSets(Cluster **, Cluster **, int n);
00060 vector <DOCID_T> selectSeeds(vector<DOCID_T> docIds, int num);
00062 Cluster *chooseSplit(vector<Cluster *> *working);
00064 double scoreSet(vector<Cluster *> *working);
00065 };
00066 #endif