@inproceedings{1bacc2d01c1944f3b78f0c3b85f307c0,
title = "Adaptive Noise Immune Cluster Ensemble Using Affinity Propagation",
abstract = "Cluster ensemble is one of the main branches in the ensemble learning area which is an important research focus in recent years. The objective of cluster ensemble is to combine multiple clustering solutions in a suitable way to improve the quality of the clustering result. In this paper, we design a new noise immune cluster ensemble framework named as AP2CE to tackle the challenges raised by noisy datasets. AP2CE not only takes advantage of the affinity propagation algorithm (AP) and the normalized cut algorithm (Ncut), but also possesses the characteristics of cluster ensemble. Compared with traditional cluster ensemble approaches, AP2CE is characterized by several properties. (1) It adopts multiple distance functions instead of a single Euclidean distance function to avoid the noise related to the distance function. (2) AP2CE applies AP to prune noisy attributes and generate a set of new datasets in the subspaces consists of representative attributes obtained by AP. (3) It avoids the explicit specification of the number of clusters. (4) AP2CE adopts the normalized cut algorithm as the consensus function to partition the consensus matrix and obtain the final result. In order to improve the performance of AP2CE, the adaptive AP2CE is designed, which makes use of an adaptive process to optimize a newly designed objective function. The experiments on both synthetic and real datasets show that (1) AP2CE works well on most of the datasets, in particular the noisy datasets; (2) AP2CE is a better choice for most of the datasets when compared with other cluster ensemble approaches; (3) AP2CE has the capability to provide more accurate, stable and robust results.",
keywords = "affinity propagation, cluster analysis, Cluster ensemble, noise, normalized cut",
author = "Zhiwen Yu and Le Li and Jiming LIU and Jun Zhang and Guoqiang Han",
note = "Funding Information: The authors are grateful for the constructive advice received from the anonymous reviewers of this paper. The work described in this paper was partially funded by the grant from the National High-Technology Research and Development Program (863 Program) of China No. 2013AA01A212, the grant from the NSFC for Distinguished Young Scholars 61125205, the grants from the NSFC Nos. 61332002, 61300044, and 61472145, the grant from the Guangdong Natural Science Funds for Distinguished Young Scholars (project No. S2013050014677), the Fundamental Research Funds for the Central Universities (project Nos. 2014G0007 and 2015PT016), the key lab of cloud computing and big data in Guangzhou (project No. SITGZ [2013]268-6), and Hong Kong Research Grants Council (project no. RGC/HKBU211212).",
year = "2015",
month = jul,
day = "6",
doi = "10.1109/TKDE.2015.2453162",
language = "English",
volume = "27",
series = "IEEE Transactions on Knowledge and Data Engineering",
publisher = "IEEE Computer Society",
pages = "3176--3189",
booktitle = "2016 IEEE 32nd International Conference on Data Engineering, ICDE 2016",
edition = "12",
}