@techreport{TR-IC-01-05, number = {IC-01-05}, author = {João Carlos Setubal and Renato F. Werneck}, title = {A Program for Building Contig Scaffolds in Double-Barreled Shotgun Genome Sequencing}, month = {April}, year = {2001}, institution = {Institute of Computing, University of Campinas}, note = {In English, 20 pages. \par\selectlanguage{english}\textbf{Abstract} We describe a program that builds contig scaffolds from contig assemblies, to be used in a whole-genome sequencing project. Our program builds scaffolds based on forward/reverse pair information (both from small clones, such as plasmids, and from large clones, such as cosmids). The program assumes that a DNA assembly, preferably with large repeats masked, is available. A scaffold is a path in a weighted graph, and the main novelty of our approach is a careful weighting scheme for arcs in this graph, such that heavier paths represent more reliable scaffolds. This weighting scheme takes into account the presence of repeats, possible clone duplication, existence of different clone libraries, and hybrid (small clones mixed with large clones) links between contigs. The program provides two different algorithms for scaffold building: one that uses a simple greedy strategy, and one that produces scaffolds that correspond to paths of maximum weight. If $|N|$ is the number of contigs and $|L|$ is the number of F/R pairs, the complexities are $O(|N| + |L| \log |L|)$ (greedy) and $O(|N|^{2} + |L| \log |L|)$ (maximum-weight path). This program has been successfully used in several bacterial genome projects. \par {\em This report supersedes IC-TR-00-20.} } }