@inproceedings{6e1a110857524a088e9f9eb661c44e8a,
title = "Discretizing numerical attributes in decision tree for big data analysis",
abstract = "The decision tree induction learning is a typical machine learning approach which has been extensively applied for data mining and knowledge discovery. For numerical data and mixed data, discretization is an essential pre-processing step of decision tree learning. However, when coping with big data, most of the existing discretization approaches will not be quite efficient from the practical viewpoint. Accordingly, we propose a new discretization method based on windowing and hierarchical clustering to improve the performance of conventional decision tree for big data analysis. The proposed method not only provides a faster process of discretizing numerical attributes with the competent classification accuracy, but also reduces the size of the decision tree. Experiments show the efficacy of the proposed method on the real data sets.",
keywords = "Big Data, Discretization, Hierarchical Clustering, Noise, Numerical Attribute, Window",
author = "Yiqun Zhang and Cheung, {Yiu Ming}",
note = "Publisher Copyright: {\textcopyright} 2014 IEEE.; 14th IEEE International Conference on Data Mining Workshops, ICDMW 2014 ; Conference date: 14-12-2014",
year = "2015",
month = jan,
day = "26",
doi = "10.1109/ICDMW.2014.103",
language = "English",
series = "IEEE International Conference on Data Mining Workshops, ICDMW",
publisher = "IEEE Computer Society",
number = "January",
pages = "1150--1157",
editor = "Zhi-Hua Zhou and Wei Wang and Ravi Kumar and Hannu Toivonen and Jian Pei and {Zhexue Huang}, Joshua and Xindong Wu",
booktitle = "Proceedings - 14th IEEE International Conference on Data Mining Workshops, ICDMW 2014",
address = "United States",
edition = "January",
}