@techreport{TR-IC-11-08, number = {IC-11-08}, author = {Jurandy Almeida and Neucimar J. Leite and Ricardo da S. Torres}, title = {Searching in High-Dimensional Metric Spaces using BP-Trees}, month = {March}, year = {2011}, institution = {Institute of Computing, University of Campinas}, note = {In English, 30 pages. \par\selectlanguage{english}\textbf{Abstract} Similarity search in high-dimensional metric spaces is a key operation in many applications, such as multimedia databases, image retrieval, object recognition, and others. The high dimensionality of the data requires special index structures to facilitate the search. A problem regarding the creation of suitable index structures for high-dimensional data is the relationship between the geometry of the data and the organization of an index structure. Most of existing indexes are constructed by partitioning the dataset using distance-based criteria. However, those methods either produce disjoint partitions, but ignore the distribution properties of the data; or produce non-disjoint groups, which greatly affect the search performance. In this paper, we study the performance of a new index structure, called Ball-and-Plane tree (BP-tree), which overcomes the above disadvantages. BP-tree is constructed by recursively dividing the dataset into compact clusters. Different from other techniques, it integrates the advantages of both disjoint and non-disjoint paradigms in order to achieve a structure of tight and low overlapping clusters, yielding significantly improved performance. Results obtained from an extensive experimental evaluation with real-world datasets show that BP-tree consistently outperforms the state-of-the-art solutions. In addition, BP-tree scales up well, exhibiting sublinear performance with growing number of objects in the database. } }