@techreport{TR-IC-13-27, number = {IC-13-27}, author = {Luiz Gomes-Jr and Luciano da F. Costa and André Santanchè}, title = {{Querying complex data}}, month = {October}, year = {2013}, institution = {Institute of Computing, University of Campinas}, note = {In English, 19 pages. \par\selectlanguage{english}\textbf{Abstract} Database technology has advanced to support increasingly complex data -- from relations to semi-structured data and unstructured documents. More recently, graph databases have regained attention following demands from applications like social networks and recommendation systems. Graph analysis, usually associated with the Complex Networks field, has become a central tool in areas such as biology, physics and linguistics. Database management systems should improve support to these data and applications beyond the data model level tackled by current graph databases, including more flexible querying models and management mechanisms. \par In this paper, we define the characteristics of the highly interconnected data that underlies many of these modern applications. We adopt the term \emph{complex data} as a reference to the field of complex networks. A database management system for complex data requires a flexible query model that explores the topology of the relationships, taking into account their eventual uncertainty. Efficient query processing becomes a challenge, requiring new mechanisms for relationship-based query optimizations. \par To meet the new requirements, our solution models complex data as property graphs with weighted relationships. We propose a new query language that allows ranking of elements based on properties of the topology of the graph. The queries are evaluated based on a variation of the spreading activation model, which is the core of the query processor and the main target for query optimization strategies. Experiments with real data show the practicability of our approach and support our analysis of several query optimization and approximation mechanisms. } }