@article{4121,
abstract = {Whenevern objects are characterized by a matrix of pairwise dissimilarities, they may be clustered by any of a number of sequential, agglomerative, hierarchical, nonoverlapping (SAHN) clustering methods. These SAHN clustering methods are defined by a paradigmatic algorithm that usually requires 0(n 3) time, in the worst case, to cluster the objects. An improved algorithm (Anderberg 1973), while still requiring 0(n 3) worst-case time, can reasonably be expected to exhibit 0(n 2) expected behavior. By contrast, we describe a SAHN clustering algorithm that requires 0(n 2 logn) time in the worst case. When SAHN clustering methods exhibit reasonable space distortion properties, further improvements are possible. We adapt a SAHN clustering algorithm, based on the efficient construction of nearest neighbor chains, to obtain a reasonably general SAHN clustering algorithm that requires in the worst case 0(n 2) time and space.
Whenevern objects are characterized byk-tuples of real numbers, they may be clustered by any of a family of centroid SAHN clustering methods. These methods are based on a geometric model in which clusters are represented by points ink-dimensional real space and points being agglomerated are replaced by a single (centroid) point. For this model, we have solved a class of special packing problems involving point-symmetric convex objects and have exploited it to design an efficient centroid clustering algorithm. Specifically, we describe a centroid SAHN clustering algorithm that requires 0(n 2) time, in the worst case, for fixedk and for a family of dissimilarity measures including the Manhattan, Euclidean, Chebychev and all other Minkowski metrics.},
author = {Day,William H and Herbert Edelsbrunner},
journal = {Journal of Classification},
number = {1},
pages = {7 -- 24},
publisher = {Springer},
title = {{Efficient algorithms for agglomerative hierarchical clustering methods}},
doi = {10.1007/BF01890115},
volume = {1},
year = {1984},
}