@inproceedings{03ad6907a84d43a19698e6d41bb95723,
title = "Subspace clustering of text documents with feature weighting k-means algorithm",
abstract = "This paper presents a new method to solve the problem of clustering large and complex text data. The method is based on a new subspace clustering algorithm that automatically calculates the feature weights in the k-means clustering process. In clustering sparse text data the feature weights are used to discover clusters from subspaces of the document vector space and identify key words that represent the semantics of the clusters. We present a modification of the published algorithm to solve the sparsity problem that occurs in text clustering. Experimental results on real-world text data have shown that the new method outper-formed the Standard K Means and Bisection-KMeans algorithms, while still maintaining efficiency of the k-means clustering process.",
keywords = "Cluster Interpretation, Feature Weighting, High Dimensional Data, Subspace Clustering, Text Mining",
author = "Liping Jing and Ng, {Michael K.} and Jun Xu and Huang, {Joshua Zhexue}",
note = "Publisher Copyright: {\textcopyright} Springer-Verlag Berlin Heidelberg 2005; 9th Pacific-Asia Conference on Advances in Knowledge Discovery and Data Mining, PAKDD 2005 ; Conference date: 18-05-2005 Through 20-05-2005",
year = "2005",
month = may,
day = "10",
doi = "10.1007/11430919_94",
language = "English",
isbn = "3540260765",
series = "Lecture Notes in Computer Science",
publisher = "Springer Berlin Heidelberg",
pages = "802--812",
editor = "Ho, {Tu Bao} and David Cheung and Huan Liu",
booktitle = "Advances in Knowledge Discovery and Data Mining",
edition = "1st",
url = "https://www.jaist.ac.jp/PAKDD-05/, https://www.jaist.ac.jp/PAKDD-05/",
}