@techreport{TR-IC-07-31,
number = {IC-07-31},
author = {Leonardo M. Rocha and
Alexandre X. Falcão and
Luis G. P. Meloni},
title = {Data Clustering based on Optimum-Path Forest and Probability Density Function},
month = {October},
year = {2007},
institution = {Institute of Computing, University of Campinas},
note = {In English, 20 pages.
\par\selectlanguage{english}\textbf{Abstract}
The identification of natural groups in a dataset is reduced to an
\emph{optimum-path forest} problem in a graph. We define a graph
whose nodes are data samples and whose arcs connect \emph{adjacent}
samples in a feature space. The nodes are weighted by their
probability density values and different choices of a
\emph{path-value function} lead to effective solutions for data
clustering. The method identifies a root node for each cluster and
finds the samples which are ``more strongly connected'' to each root
than to any other. The output is an optimum-path forest whose trees
(clusters) are the influence zones of their roots. This framework
extends the image foresting transform from the image domain to the
feature space, revealing important theoretical relations among
relative fuzzy-connected segmentation, morphological
reconstructions, watershed transforms, and clustering by influence
zones. It also provides a more general and robust implementation for
the popular mean-shift algorithm. The results are illustrated in
image segmentation.
}
}