diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java index dc1967a628..5bedb80f39 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicates.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicates.java @@ -327,6 +327,7 @@ protected int doWork() { DuplicationMetrics metrics = AbstractMarkDuplicatesCommandLineProgram.addReadToLibraryMetrics(rec, header, libraryIdGenerator); + // Now try and figure out the next duplicate index (if going by coordinate. if going by query name, only do this // if the query name has changed. nextDuplicateIndex = nextIndexIfNeeded(sortOrder, recordInFileIndex, nextDuplicateIndex, duplicateQueryName, rec, this.duplicateIndexes); @@ -337,7 +338,6 @@ protected int doWork() { if (isDuplicate) { rec.setDuplicateReadFlag(true); - AbstractMarkDuplicatesCommandLineProgram.addDuplicateReadToMetrics(rec, metrics); } else { rec.setDuplicateReadFlag(false); @@ -552,7 +552,7 @@ private void buildSortedReadEndLists(final boolean useBarcodes) { final ReadEndsForMarkDuplicates fragmentEnd = buildReadEnds(header, indexForRead, rec, useBarcodes); this.fragSort.add(fragmentEnd); - if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) { + if (MarkDuplicatesUtil.pairedForMarkDuplicates(rec)) { final StringBuilder key = new StringBuilder(); key.append(ReservedTagConstants.READ_GROUP_ID); key.append(rec.getReadName()); @@ -650,7 +650,7 @@ private ReadEndsForMarkDuplicates buildReadEnds(final SAMFileHeader header, fina ends.score = DuplicateScoringStrategy.computeDuplicateScore(rec, this.DUPLICATE_SCORING_STRATEGY); // Doing this lets the ends object know that it's part of a pair - if (rec.getReadPairedFlag() && !rec.getMateUnmappedFlag()) { + if (MarkDuplicatesUtil.pairedForMarkDuplicates(rec)) { ends.read2ReferenceIndex = rec.getMateReferenceIndex(); } diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java index 35c46b5101..857d63cef4 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigar.java @@ -154,13 +154,14 @@ protected int doWork() { this.SKIP_PAIRS_WITH_NO_MATE_CIGAR, this.MAX_RECORDS_IN_RAM, this.BLOCK_SIZE, - this.TMP_DIR); + this.TMP_DIR, + MIN_INFORMATIVE_MAPPING_Q); // progress logger! final ProgressLogger progress = new ProgressLogger(log, (int) 1e6, "Read"); // Go through the records - for (final SAMRecord record : new IterableAdapter(iterator)) { + for (final SAMRecord record : new IterableAdapter<>(iterator)) { if (progress.record(record)) { iterator.logMemoryStats(log); } @@ -192,6 +193,15 @@ protected int doWork() { return 0; } + protected String[] customCommandLineValidation() { + + if (0 <= MIN_INFORMATIVE_MAPPING_Q) { + LOG.warn("non-zero value for MIN_INFORMATIVE_MAPPING_Q is not supported in " + MarkDuplicatesWithMateCigar.class.getSimpleName()); + } + + return super.customCommandLineValidation(); + } + /** * Updates the program record if necessary. */ diff --git a/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java index 75da3ab8c2..1f73c94c82 100644 --- a/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java +++ b/src/main/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarIterator.java @@ -24,20 +24,18 @@ package picard.sam.markduplicates; -import htsjdk.samtools.util.SamRecordWithOrdinal; -import htsjdk.samtools.util.SamRecordTrackingBuffer; -import picard.PicardException; -import htsjdk.samtools.util.Histogram; -import picard.sam.DuplicationMetrics; -import htsjdk.samtools.util.Log; -import htsjdk.samtools.util.PeekableIterator; import htsjdk.samtools.*; import htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy; -import htsjdk.samtools.util.CloseableIterator; +import htsjdk.samtools.util.*; +import picard.PicardException; +import picard.sam.DuplicationMetrics; import picard.sam.markduplicates.util.*; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.List; +import java.util.NoSuchElementException; +import java.util.Set; /** * This will iterate through a coordinate sorted SAM file (iterator) and either mark or @@ -77,7 +75,7 @@ public class MarkDuplicatesWithMateCigarIterator implements SAMRecordIterator { /** * The queue that stores the records that currently are not marked as duplicates. These need to be kept until - * they cannot proven not to be duplicates, with the latter records having greater coordinate. The queue is stored in 5' unclipped + * they cannot proven not to be duplicates, with the later records having greater coordinate. The queue is stored in 5' unclipped * ordering, along with keeping the record with the best score, defined by the scoring strategies. If any record * is added to this queue and can be identified as a duplicate, the outputBuffer is notified of its * status and it can be emitted. Therefore, we limit the amount of records in this queue to only those that will NOT @@ -99,6 +97,8 @@ public class MarkDuplicatesWithMateCigarIterator implements SAMRecordIterator { boolean isClosed = false; + private final short minInformativeMappingQuality; + /** * Initializes the mark duplicates iterator. * @@ -111,6 +111,7 @@ public class MarkDuplicatesWithMateCigarIterator implements SAMRecordIterator { * @param skipPairsWithNoMateCigar true to not return mapped pairs with no mate cigar, false otherwise * @param blockSize the size of the blocks in the underlying buffer/queue * @param tmpDirs the temporary directories to use if we spill records to disk + * @param minInformativeMappingQuality the minimal mapping quality that is considered informative for dup-marking * @throws PicardException if the inputs are not in coordinate sort order */ public MarkDuplicatesWithMateCigarIterator(final SAMFileHeader header, @@ -122,19 +123,20 @@ public MarkDuplicatesWithMateCigarIterator(final SAMFileHeader header, final boolean skipPairsWithNoMateCigar, final int maxRecordsInRam, final int blockSize, - final List tmpDirs) throws PicardException { + final List tmpDirs, final short minInformativeMappingQuality) throws PicardException { + this.minInformativeMappingQuality = minInformativeMappingQuality; if (header.getSortOrder() != SAMFileHeader.SortOrder.coordinate) { throw new PicardException(getClass().getName() + " expects the input to be in coordinate sort order."); } this.header = header; - backingIterator = new PeekableIterator(iterator); - outputBuffer = new SamRecordTrackingBuffer(maxRecordsInRam, blockSize, tmpDirs, header, SamRecordWithOrdinalAndSetDuplicateReadFlag.class); + backingIterator = new PeekableIterator<>(iterator); + outputBuffer = new SamRecordTrackingBuffer<>(maxRecordsInRam, blockSize, tmpDirs, header, SamRecordWithOrdinalAndSetDuplicateReadFlag.class); this.removeDuplicates = removeDuplicates; this.skipPairsWithNoMateCigar = skipPairsWithNoMateCigar; this.opticalDuplicateFinder = opticalDuplicateFinder; - toMarkQueue = new MarkQueue(duplicateScoringStrategy); + toMarkQueue = new MarkQueue(duplicateScoringStrategy, this.minInformativeMappingQuality); libraryIdGenerator = new LibraryIdGenerator(header); // Check for supported scoring strategies @@ -237,9 +239,9 @@ public SAMRecord next() throws PicardException { */ private boolean ignoreDueToMissingMateCigar(final SamRecordWithOrdinal samRecordWithOrdinal) { final SAMRecord record = samRecordWithOrdinal.getRecord(); - // ignore/except-on paired records with mapped mate and no mate cigar - if (record.getReadPairedFlag() && - !record.getMateUnmappedFlag() && null == SAMUtils.getMateCigar(record)) { // paired with one end unmapped and no mate cigar + // ignore/throw-on paired records with mapped mate and no mate cigar + if (MarkDuplicatesUtil.pairedForMarkDuplicates(record, minInformativeMappingQuality) && + null == SAMUtils.getMateCigar(record)) { // paired with one end unmapped and no mate cigar // NB: we are not truly examining these records. Do we want to count them? @@ -250,7 +252,7 @@ private boolean ignoreDueToMissingMateCigar(final SamRecordWithOrdinal samRecord // update metrics if (record.getReadUnmappedFlag()) { ++metrics.UNMAPPED_READS; - } else if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) { + } else if (!MarkDuplicatesUtil.pairedForMarkDuplicates(record, minInformativeMappingQuality)) { ++metrics.UNPAIRED_READS_EXAMINED; } else { ++metrics.READ_PAIRS_EXAMINED; @@ -395,6 +397,7 @@ private SAMRecord markDuplicatesAndGetTheNextAvailable() { continue; } + // check for an unmapped read if (record.getReadUnmappedFlag()) { // unmapped reads at the end of the file! @@ -415,7 +418,7 @@ private SAMRecord markDuplicatesAndGetTheNextAvailable() { } // build a read end for use in the toMarkQueue - readEnds = new ReadEndsForMateCigar(header, samRecordWithOrdinal, opticalDuplicateFinder, libraryIdGenerator.getLibraryId(samRecordWithOrdinal.getRecord())); + readEnds = new ReadEndsForMateCigar(header, samRecordWithOrdinal, opticalDuplicateFinder, libraryIdGenerator.getLibraryId(samRecordWithOrdinal.getRecord()), minInformativeMappingQuality); // check that the minimumDistance was not too small checkForMinimumDistanceFailure(readEnds); @@ -444,7 +447,7 @@ private SAMRecord markDuplicatesAndGetTheNextAvailable() { } } else { // Bring the simple metrics up to date - if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) { + if (!MarkDuplicatesUtil.pairedForMarkDuplicates(record, minInformativeMappingQuality)) { ++metrics.UNPAIRED_READS_EXAMINED; } else { ++metrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end @@ -596,7 +599,7 @@ private boolean tryPollingTheToMarkQueue(final boolean flush, final ReadEndsForM final Set locations = toMarkQueue.getLocations(next); if (!locations.isEmpty()) { - AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(new ArrayList(locations), null, + AbstractMarkDuplicatesCommandLineProgram.trackOpticalDuplicates(new ArrayList<>(locations), null, opticalDuplicateFinder, libraryIdGenerator); } } diff --git a/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java b/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java index fb3c7498bd..cdc7b6ebda 100644 --- a/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java +++ b/src/main/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigar.java @@ -41,6 +41,7 @@ import picard.sam.DuplicationMetrics; import picard.sam.markduplicates.util.AbstractMarkDuplicatesCommandLineProgram; import picard.sam.markduplicates.util.LibraryIdGenerator; +import picard.sam.markduplicates.util.MarkDuplicatesUtil; import picard.sam.markduplicates.util.ReadEnds; import java.util.ArrayList; @@ -65,6 +66,7 @@ */ @DocumentedFeature @ExperimentalFeature + @CommandLineProgramProperties( summary = "Examines aligned records in the supplied SAM or BAM file to locate duplicate molecules. " + "All records are then written to the output file with the duplicate records flagged.", @@ -74,12 +76,7 @@ public class SimpleMarkDuplicatesWithMateCigar extends MarkDuplicates { private final Log log = Log.getInstance(MarkDuplicatesWithMateCigar.class); - private class ReadEndsForSimpleMarkDuplicatesWithMateCigar extends ReadEnds {} - - private static boolean isPairedAndBothMapped(final SAMRecord record) { - return record.getReadPairedFlag() && - !record.getReadUnmappedFlag() && - !record.getMateUnmappedFlag(); + private class ReadEndsForSimpleMarkDuplicatesWithMateCigar extends ReadEnds { } /** @@ -126,7 +123,7 @@ protected int doWork() { for (final DuplicateSet duplicateSet : new IterableAdapter<>(iterator)) { final SAMRecord representative = duplicateSet.getRepresentative(); final boolean doOpticalDuplicateTracking = (this.READ_NAME_REGEX != null) && - isPairedAndBothMapped(representative) && + MarkDuplicatesUtil.pairedForMarkDuplicates(representative) && representative.getFirstOfPairFlag(); final Set duplicateReadEndsSeen = new HashSet<>(); @@ -149,7 +146,7 @@ protected int doWork() { // First bring the simple metrics up to date if (record.getReadUnmappedFlag()) { ++metrics.UNMAPPED_READS; - } else if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) { + } else if (!MarkDuplicatesUtil.pairedForMarkDuplicates(record)) { ++metrics.UNPAIRED_READS_EXAMINED; } else { ++metrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end @@ -157,7 +154,7 @@ protected int doWork() { if (record.getDuplicateReadFlag()) { // Update the duplication metrics - if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) { + if (!MarkDuplicatesUtil.pairedForMarkDuplicates(record)) { ++metrics.UNPAIRED_READ_DUPLICATES; } else { ++metrics.READ_PAIR_DUPLICATES;// will need to be divided by 2 at the end @@ -168,7 +165,7 @@ protected int doWork() { // To track optical duplicates, store a set of locations for mapped pairs, first end only. We care about orientation relative // to the first end of the pair for optical duplicate tracking, which is more stringent than PCR duplicate tracking. if (doOpticalDuplicateTracking && - isPairedAndBothMapped(record) && + MarkDuplicatesUtil.pairedForMarkDuplicates(record) && !duplicateReadEndsSeen.contains(record.getReadName())) { final ReadEndsForSimpleMarkDuplicatesWithMateCigar readEnd = new ReadEndsForSimpleMarkDuplicatesWithMateCigar(); diff --git a/src/main/java/picard/sam/markduplicates/UmiGraph.java b/src/main/java/picard/sam/markduplicates/UmiGraph.java index a52e49d7d3..511872a814 100644 --- a/src/main/java/picard/sam/markduplicates/UmiGraph.java +++ b/src/main/java/picard/sam/markduplicates/UmiGraph.java @@ -62,6 +62,7 @@ public class UmiGraph { private final boolean allowMissingUmis; private final boolean duplexUmis; + /** * Creates a UmiGraph object * @param set Set of reads that have the same start-stop positions, these will be broken up by UMI diff --git a/src/main/java/picard/sam/markduplicates/UmiUtil.java b/src/main/java/picard/sam/markduplicates/UmiUtil.java index 8ecbdb1d1c..eaf46808f1 100644 --- a/src/main/java/picard/sam/markduplicates/UmiUtil.java +++ b/src/main/java/picard/sam/markduplicates/UmiUtil.java @@ -27,6 +27,7 @@ import htsjdk.samtools.SAMRecord; import htsjdk.samtools.SAMUtils; import org.apache.commons.lang3.StringUtils; +import picard.sam.markduplicates.util.MarkDuplicatesUtil; import java.util.regex.Pattern; @@ -102,7 +103,7 @@ static String getTopStrandNormalizedUmi(final SAMRecord record, final String umi * @return Top or bottom strand, unknown if it cannot be determined. */ static ReadStrand getStrand(final SAMRecord rec) { - if (rec.getReadUnmappedFlag() || rec.getMateUnmappedFlag()) { + if (!MarkDuplicatesUtil.pairedForMarkDuplicates(rec)) { return ReadStrand.UNKNOWN; } @@ -185,5 +186,4 @@ public enum ReadStrand { BOTTOM, UNKNOWN } - } diff --git a/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java index f8eafb419a..748d4779cf 100644 --- a/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java +++ b/src/main/java/picard/sam/markduplicates/util/AbstractMarkDuplicatesCommandLineProgram.java @@ -25,14 +25,7 @@ package picard.sam.markduplicates.util; import htsjdk.samtools.DuplicateScoringStrategy.ScoringStrategy; -import htsjdk.samtools.MergingSamRecordIterator; -import htsjdk.samtools.SAMFileHeader; -import htsjdk.samtools.SAMProgramRecord; -import htsjdk.samtools.SAMRecord; -import htsjdk.samtools.SamFileHeaderMerger; -import htsjdk.samtools.SamInputResource; -import htsjdk.samtools.SamReader; -import htsjdk.samtools.SamReaderFactory; +import htsjdk.samtools.*; import htsjdk.samtools.metrics.MetricsFile; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.Histogram; @@ -45,12 +38,7 @@ import picard.sam.util.PGTagArgumentCollection; import java.io.File; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.HashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; +import java.util.*; /** * Abstract class that holds parameters and methods common to classes that perform duplicate @@ -84,6 +72,13 @@ public abstract class AbstractMarkDuplicatesCommandLineProgram extends AbstractO "Deprecated, used ASSUME_SORT_ORDER=coordinate instead.", mutex = {"ASSUME_SORT_ORDER"}) public boolean ASSUME_SORTED = false; + @Argument(shortName = "MIN_MQ", + doc = "The minimal value of the mapping quality that is considered informative for the purpose of creating " + + "mapped-pairs. If a read has mapping quality smaller than this, the tool will consider it as unaligned " + + "for the purpose of duplicate mapping (though it will remain mapped).", + optional = true) + public short MIN_INFORMATIVE_MAPPING_Q = 0; + @Argument(shortName = StandardOptionDefinitions.ASSUME_SORT_ORDER_SHORT_NAME, doc = "If not null, assume that the input file has this order even if the header says otherwise.", optional = true, mutex = {"ASSUME_SORTED"}) @@ -218,6 +213,8 @@ public static DuplicationMetrics addReadToLibraryMetrics(final SAMRecord rec, fi ++metrics.SECONDARY_OR_SUPPLEMENTARY_RDS; } else if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) { ++metrics.UNPAIRED_READS_EXAMINED; + } else if (!MarkDuplicatesUtil.pairedForMarkDuplicates(rec)) { + ++metrics.UNPAIRED_READS_EXAMINED; } else { ++metrics.READ_PAIRS_EXAMINED; // will need to be divided by 2 at the end } @@ -228,7 +225,7 @@ public static void addDuplicateReadToMetrics(final SAMRecord rec, final Duplicat // only update duplicate counts for "decider" reads, not tag-a-long reads if (!rec.isSecondaryOrSupplementary() && !rec.getReadUnmappedFlag()) { // Update the duplication metrics - if (!rec.getReadPairedFlag() || rec.getMateUnmappedFlag()) { + if (!MarkDuplicatesUtil.pairedForMarkDuplicates(rec)) { ++metrics.UNPAIRED_READ_DUPLICATES; } else { ++metrics.READ_PAIR_DUPLICATES;// will need to be divided by 2 at the end @@ -377,6 +374,12 @@ private static int trackOpticalDuplicates(final List list, return opticalDuplicates; } + @Override + protected String[] customCommandLineValidation() { + MarkDuplicatesUtil.setMinInformativeMappingQuality(MIN_INFORMATIVE_MAPPING_Q); + return super.customCommandLineValidation(); + } + private static void trackDuplicateCounts(final int listSize, final int optDupCnt, final LibraryIdGenerator libraryIdGenerator) { diff --git a/src/main/java/picard/sam/markduplicates/util/MarkDuplicatesUtil.java b/src/main/java/picard/sam/markduplicates/util/MarkDuplicatesUtil.java new file mode 100644 index 0000000000..646ef02326 --- /dev/null +++ b/src/main/java/picard/sam/markduplicates/util/MarkDuplicatesUtil.java @@ -0,0 +1,73 @@ +/* + * The MIT License + * + * Copyright (c) 2019 The Broad Institute + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +package picard.sam.markduplicates.util; + +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMTag; +import htsjdk.samtools.util.Log; +import htsjdk.utils.ValidationUtils; + +public class MarkDuplicatesUtil { + //singleton. + private MarkDuplicatesUtil() { + } + + private static short MIN_INFORMATIVE_MAPPING_QUALITY = -1; + private static final Log log = Log.getInstance(MarkDuplicatesUtil.class); + + private static boolean warnedForMissingMQ = false; + + private static boolean readAlignedForMarkDuplicates(final SAMRecord rec, final short minInformativeMappingQuality) { + return !rec.getReadUnmappedFlag() && rec.getMappingQuality() >= minInformativeMappingQuality; + } + + private static boolean mateAlignedForMarkDuplicates(final SAMRecord rec, final short minInformativeMappingQuality) { + final Short mateMappingQuality = rec.getShortAttribute(SAMTag.MQ.name()); + if (null == mateMappingQuality) { // cannot rule out, so consider the pair + if (!warnedForMissingMQ) { + log.warn("Missing mate mapping quality (MQ) tag in record: " + rec.getSAMString()+ " Future such warnings wll be supressed."); + warnedForMissingMQ = true; + } + return !rec.getMateUnmappedFlag(); + } + return !rec.getMateUnmappedFlag() && mateMappingQuality >= minInformativeMappingQuality; + } + + public static boolean pairedForMarkDuplicates(final SAMRecord rec, final short minInformativeMappingQuality) { + return rec.getReadPairedFlag() && + readAlignedForMarkDuplicates(rec, minInformativeMappingQuality) && + mateAlignedForMarkDuplicates(rec, minInformativeMappingQuality); + } + + public static boolean pairedForMarkDuplicates(final SAMRecord rec) { + ValidationUtils.validateArg(MIN_INFORMATIVE_MAPPING_QUALITY >= 0, "MIN_INFORMATIVE_MAPPING_QUALITY wasn't initialized. Must be >=0."); + + return pairedForMarkDuplicates(rec, MIN_INFORMATIVE_MAPPING_QUALITY); + } + + public static void setMinInformativeMappingQuality(final short minInformativeMappingQuality) { + MIN_INFORMATIVE_MAPPING_QUALITY = minInformativeMappingQuality; + } +} diff --git a/src/main/java/picard/sam/markduplicates/util/MarkQueue.java b/src/main/java/picard/sam/markduplicates/util/MarkQueue.java index 228257f4c8..21f01e221a 100644 --- a/src/main/java/picard/sam/markduplicates/util/MarkQueue.java +++ b/src/main/java/picard/sam/markduplicates/util/MarkQueue.java @@ -109,6 +109,7 @@ public int compare(final ReadEndsForMateCigar lhs, final ReadEndsForMateCigar rh /** The nonDuplicateReadEndsSet of all read ends sorted by 5' start unclipped position. Some read ends in this nonDuplicateReadEndsSet may eventually be duplicates. */ private final TreeSet nonDuplicateReadEndsSet = new TreeSet(new MarkQueueComparator()); + private final short minInformativeMappingQuality; /** * Reads in the main nonDuplicateReadEndsSet may occasionally have mates with the same chromosome, coordinate, and orientation, causing collisions * We store the 'best' end of the mate pair in the main nonDuplicateReadEndsSet, and the other end in this nonDuplicateReadEndsSet. We only remove from this.otherEndOfNonDuplicateReadEndsSet when @@ -125,8 +126,9 @@ public int compare(final ReadEndsForMateCigar lhs, final ReadEndsForMateCigar rh /** temporary so we do not need to create many objects */ private ReadEndsForMateCigar tmpReadEnds = null; - public MarkQueue(final ScoringStrategy duplicateScoringStrategy) { + public MarkQueue(final ScoringStrategy duplicateScoringStrategy, final short minInformativeMappingQuality) { comparator = new ReadEndsMCComparator(duplicateScoringStrategy); + this.minInformativeMappingQuality = minInformativeMappingQuality; } /** Returns the number of duplicates detected */ @@ -171,7 +173,7 @@ public ReadEndsForMateCigar peek() { /** Updates the duplication metrics given the provided duplicate */ private void updateDuplicationMetrics(final ReadEndsForMateCigar duplicate, final DuplicationMetrics metrics) { // Update the duplication metrics - if (!duplicate.getRecord().getReadPairedFlag() || duplicate.getRecord().getMateUnmappedFlag()) { + if (!MarkDuplicatesUtil.pairedForMarkDuplicates(duplicate.getRecord(), minInformativeMappingQuality)) { ++metrics.UNPAIRED_READ_DUPLICATES; } else { ++metrics.READ_PAIR_DUPLICATES;// will need to be divided by 2 at the end @@ -209,7 +211,7 @@ public ReadEndsForMateCigar poll(final SamRecordTrackingBuffer outputBuffer, // NB: only care about read1ReferenceIndex, read1Coordinate, and orientation in the nonDuplicateReadEndsSet if (null == this.tmpReadEnds) { // initialize - this.tmpReadEnds = new ReadEndsForMateCigar(header, current.getSamRecordIndex(), opticalDuplicateFinder, current.libraryId); + this.tmpReadEnds = new ReadEndsForMateCigar(header, current.getSamRecordIndex(), opticalDuplicateFinder, current.libraryId, minInformativeMappingQuality); this.tmpReadEnds.read2ReferenceIndex = this.tmpReadEnds.read2Coordinate = -1; this.tmpReadEnds.samRecordWithOrdinal = null; // nullify so we do not hold onto it forever } else { @@ -356,7 +358,7 @@ public void add(final ReadEndsForMateCigar other, // add to the physical locations for optical duplicate tracking final SAMRecord record = other.getRecord(); - if (record.getReadPairedFlag() && !record.getReadUnmappedFlag() && !record.getMateUnmappedFlag() && addToLocationSet) { + if (MarkDuplicatesUtil.pairedForMarkDuplicates(record,minInformativeMappingQuality) && addToLocationSet) { if (null == locationSet) throw new PicardException("location nonDuplicateReadEndsSet was null: " + record.getSAMString()); locationSet.add(other); } diff --git a/src/main/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java index 03970f8faf..464fb1806a 100644 --- a/src/main/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java +++ b/src/main/java/picard/sam/markduplicates/util/ReadEndsForMateCigar.java @@ -52,9 +52,12 @@ public class ReadEndsForMateCigar extends ReadEnds { */ private PhysicalLocationForMateCigarSet locationSet = null; + private final short minInformativeMappingQuality; + /** Builds a read ends object that represents a single read. */ public ReadEndsForMateCigar(final SAMFileHeader header, final SamRecordWithOrdinal samRecordWithOrdinal, - final OpticalDuplicateFinder opticalDuplicateFinder, final short libraryId) { + final OpticalDuplicateFinder opticalDuplicateFinder, final short libraryId, final short minInformativeMappingQuality) { + this.minInformativeMappingQuality = minInformativeMappingQuality; this.readGroup = -1; this.tile = -1; @@ -72,7 +75,7 @@ public ReadEndsForMateCigar(final SAMFileHeader header, final SamRecordWithOrdin throw new PicardException("Found an unexpected unmapped read"); } - if (record.getReadPairedFlag() && !record.getReadUnmappedFlag() && !record.getMateUnmappedFlag()) { + if (MarkDuplicatesUtil.pairedForMarkDuplicates(record, minInformativeMappingQuality)) { this.read2ReferenceIndex = record.getMateReferenceIndex(); this.read2Coordinate = record.getMateNegativeStrandFlag() ? SAMUtils.getMateUnclippedEnd(record) : SAMUtils.getMateUnclippedStart(record); @@ -96,7 +99,7 @@ public ReadEndsForMateCigar(final SAMFileHeader header, final SamRecordWithOrdin this.libraryId = libraryId; // Is this unmapped or its mate? - if (record.getReadUnmappedFlag() || (record.getReadPairedFlag() && record.getMateUnmappedFlag())) { + if (!MarkDuplicatesUtil.pairedForMarkDuplicates(record, minInformativeMappingQuality)) { this.hasUnmapped = 1; } @@ -117,7 +120,8 @@ public ReadEndsForMateCigar(final SAMFileHeader header, final SamRecordWithOrdin } /** Creates a shallow copy from the "other" */ - public ReadEndsForMateCigar(final ReadEndsForMateCigar other, final SamRecordWithOrdinal samRecordWithOrdinal) { + public ReadEndsForMateCigar(final ReadEndsForMateCigar other, final SamRecordWithOrdinal samRecordWithOrdinal, final short minInformativeMappingQuality) { + this.minInformativeMappingQuality = minInformativeMappingQuality; this.readGroup = other.readGroup; this.tile = other.tile; this.x = other.x; diff --git a/src/main/java/picard/sam/util/Pair.java b/src/main/java/picard/sam/util/Pair.java index 2f2222026f..63a4cd0ca6 100644 --- a/src/main/java/picard/sam/util/Pair.java +++ b/src/main/java/picard/sam/util/Pair.java @@ -68,8 +68,7 @@ public boolean equals(final Object o) { } /** - * Basic hashcode function. Assume hashcodes of left and right are - * randomly distributed and return the XOR of the two. + * Basic hashcode function. * * @return hashcode of the pair. */ @@ -85,8 +84,13 @@ public String toString() { @Override public int compareTo(final Pair o) { final int leftCompare; + if (this.left == null && o.left == null) { leftCompare = 0; + } else if (this.left == null) { + return -1; + } else if (o.left == null) { + return 1; } else { leftCompare = this.left.compareTo(o.left); } @@ -95,8 +99,12 @@ public int compareTo(final Pair o) { if (this.right == null && o.right == null) { return 0; + } else if (this.right == null) { + return -1; + } else if (o.right == null) { + return 1; + } else { + return this.right.compareTo(o.right); } - - return this.right.compareTo(o.right); } } diff --git a/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java index 7a190bbe5e..725f1caf5c 100644 --- a/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java +++ b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTest.java @@ -23,15 +23,25 @@ */ package picard.sam.markduplicates; -import htsjdk.samtools.*; +import htsjdk.samtools.SAMFileHeader; +import htsjdk.samtools.SAMRecord; +import htsjdk.samtools.SAMRecordSetBuilder; +import htsjdk.samtools.SAMTag; +import htsjdk.samtools.SamReader; +import htsjdk.samtools.SamReaderFactory; import htsjdk.samtools.util.CloseableIterator; import htsjdk.samtools.util.CloserUtil; +import htsjdk.samtools.util.CollectionUtil; +import org.testng.annotations.BeforeSuite; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; +import picard.sam.markduplicates.util.MarkDuplicatesUtil; +import picard.sam.util.Pair; import picard.sam.util.PhysicalLocationInt; import picard.sam.util.ReadNameParser; import java.io.File; +import java.util.Map; /** * This class defines the individual test cases to run. The actual running of the test is done @@ -41,7 +51,7 @@ public abstract class AbstractMarkDuplicatesCommandLineProgramTest { protected abstract AbstractMarkDuplicatesCommandLineProgramTester getTester(); - protected static final int DEFAULT_BASE_QUALITY = 10; + static final int DEFAULT_BASE_QUALITY = 10; protected boolean markSecondaryAndSupplementaryRecordsLikeTheCanonical() { return false; @@ -51,6 +61,14 @@ protected boolean markUnmappedRecordsLikeTheirMates() { return false; } + private final short LOW_MAPPING_Q = 1; + + @BeforeSuite + public void setMinInformativeMQ() { + MarkDuplicatesUtil.setMinInformativeMappingQuality((short) (LOW_MAPPING_Q + 1)); + + } + @Test public void testSingleUnmappedFragment() { final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester(); @@ -409,8 +427,8 @@ public void testMappedPairAndMatePairFirstOppositeStrandSecondUnmapped() { public void testMappedPairAndMappedFragmentAndMatePairSecondUnmapped() { final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester(); tester.getSamRecordSetBuilder().setReadLength(76); - tester.addMatePair(1, 10040, 10040, false, true, true, markUnmappedRecordsLikeTheirMates(), "76M", null, false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate, // second end unmapped + tester.addMatePair(1, 10040, 10040, false, true, true, markUnmappedRecordsLikeTheirMates(), "76M", null, false, false, false, false, false, DEFAULT_BASE_QUALITY); // first a duplicate, tester.addMappedPair(1, 10189, 10040, false, false, "41S35M", "65M11S", true, false, false, DEFAULT_BASE_QUALITY); // mapped OK tester.addMappedFragment(1, 10040, true, DEFAULT_BASE_QUALITY); // duplicate tester.runTest(); @@ -749,4 +767,33 @@ public void testTwoMappedPairsWithSupplementaryReadsAfterCanonical(final Boolean tester.addMappedFragment(fragLikeFirst ? "RUNID:1:1:15993:13361" : "RUNID:1:1:16020:13352", 4, 400, markSecondaryAndSupplementaryRecordsLikeTheCanonical() && !fragLikeFirst, null, null, additionalFragSecondary, additionalFragSupplementary, DEFAULT_BASE_QUALITY); tester.runTest(); } + + + @Test + public void testDuplicateWithLowMappingQ() { + final AbstractMarkDuplicatesCommandLineProgramTester tester = getTester(); + + final SAMRecordSetBuilder builder = new SAMRecordSetBuilder(); + tester.getSamRecordSetBuilder().setReadLength(68); + tester.setMinInformativeMappingQ(LOW_MAPPING_Q + 1); + + tester.addMatePair("RUNID:3:1:15029:113060", 0, 129384554, 129384355, false, false, true, markUnmappedRecordsLikeTheirMates(), "68M", "66M1I1M", false, true, false, false, false, DEFAULT_BASE_QUALITY); + tester.addMatePair("RUNID:2:2:15029:113060", 0, 129384554, 129384554, false, false, false, false, "68M", "68M", false, true, false, false, false, DEFAULT_BASE_QUALITY); + + // Create the pathology + final CloseableIterator iterator = tester.getRecordIterator(); + final Map, Short> mappingQuality = new CollectionUtil.DefaultingMap<>((short) (LOW_MAPPING_Q + 1)); + mappingQuality.put(new Pair<>("RUNID:3:1:15029:113060", false), LOW_MAPPING_Q); + + while (iterator.hasNext()) { + final SAMRecord record = iterator.next(); + record.setMappingQuality(mappingQuality.get(new Pair<>(record.getReadName(),record.getFirstOfPairFlag()))); + record.setAttribute(SAMTag.MQ.name(), mappingQuality.get(new Pair<>(record.getReadName(),!record.getFirstOfPairFlag()))); + + } + iterator.close(); + + builder.getRecords().forEach(tester::addRecord); + tester.runTest(); + } } diff --git a/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java index 17fa6e3c4f..979b058a03 100644 --- a/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java +++ b/src/test/java/picard/sam/markduplicates/AbstractMarkDuplicatesCommandLineProgramTester.java @@ -33,6 +33,7 @@ import org.testng.Assert; import picard.cmdline.CommandLineProgram; import picard.sam.DuplicationMetrics; +import picard.sam.markduplicates.util.MarkDuplicatesUtil; import picard.sam.testers.SamFileTester; import java.io.File; @@ -52,6 +53,7 @@ abstract public class AbstractMarkDuplicatesCommandLineProgramTester extends Sam final DuplicationMetrics expectedMetrics; boolean testOpticalDuplicateDTTag = false; + private short minInformativeMappingQ=0; public AbstractMarkDuplicatesCommandLineProgramTester(final ScoringStrategy duplicateScoringStrategy, SAMFileHeader.SortOrder sortOrder) { this(duplicateScoringStrategy, sortOrder, true); @@ -78,7 +80,9 @@ public AbstractMarkDuplicatesCommandLineProgramTester() { } @Override - public String getCommandLineProgramName() { return getProgram().getClass().getSimpleName(); } + public String getCommandLineProgramName() { + return getProgram().getClass().getSimpleName(); + } /** * Tells MarkDuplicates to record which reads are optical duplicates @@ -96,6 +100,7 @@ public void recordOpticalDuplicatesMarked() { public void updateExpectedDuplicationMetrics() { final FormatUtil formatter = new FormatUtil(); + MarkDuplicatesUtil.setMinInformativeMappingQuality(minInformativeMappingQ); try (final CloseableIterator inputRecordIterator = this.getRecordIterator()) { while (inputRecordIterator.hasNext()) { @@ -112,7 +117,7 @@ public void updateExpectedDuplicationMetrics() { // First bring the simple metricsFile up to date if (record.getReadUnmappedFlag()) { ++expectedMetrics.UNMAPPED_READS; - } else if (!record.getReadPairedFlag() || record.getMateUnmappedFlag()) { + } else if (!MarkDuplicatesUtil.pairedForMarkDuplicates(record)) { ++expectedMetrics.UNPAIRED_READS_EXAMINED; if (isDuplicate) { ++expectedMetrics.UNPAIRED_READ_DUPLICATES; @@ -145,6 +150,7 @@ public void test() throws IOException { /** * Runs test and returns metrics + * * @return Duplication metrics * @throws IOException */ @@ -155,14 +161,14 @@ public MetricsFile testMetrics() throws IOException // Read the output and check the duplicate flag int outputRecords = 0; final Set sequencingDTErrorsSeen = new HashSet<>(); - try(final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(fastaFiles.get(samRecordSetBuilder.getHeader())).open(getOutput())) { + try (final SamReader reader = SamReaderFactory.makeDefault().referenceSequence(fastaFiles.get(samRecordSetBuilder.getHeader())).open(getOutput())) { for (final SAMRecord record : reader) { outputRecords++; final String key = samRecordToDuplicatesFlagsKey(record); Assert.assertTrue(this.duplicateFlags.containsKey(key), "DOES NOT CONTAIN KEY: " + key); final boolean value = this.duplicateFlags.get(key); this.duplicateFlags.remove(key); - Assert.assertEquals(record.getDuplicateReadFlag(), value, "Mismatching read: " + record.getSAMString()); + Assert.assertEquals(record.getDuplicateReadFlag(), value, "Mismatching Duplicate flag on read: " + record.getSAMString()); if (testOpticalDuplicateDTTag && MarkDuplicates.DUPLICATE_TYPE_SEQUENCING.equals(record.getAttribute("DT"))) { sequencingDTErrorsSeen.add(record.getReadName()); } @@ -175,10 +181,9 @@ public MetricsFile testMetrics() throws IOException // Check the values written to metrics.txt against our input expectations final MetricsFile metricsOutput = new MetricsFile<>(); - try{ + try { metricsOutput.read(new FileReader(metricsFile)); - } - catch (final FileNotFoundException ex) { + } catch (final FileNotFoundException ex) { Assert.fail("Metrics file not found: " + ex.getMessage()); } Assert.assertEquals(metricsOutput.getMetrics().size(), 1); @@ -203,4 +208,9 @@ public MetricsFile testMetrics() throws IOException } abstract protected CommandLineProgram getProgram(); + + public void setMinInformativeMappingQ(final int i) { + this.addArg("MIN_INFORMATIVE_MAPPING_Q=" + i); + this.minInformativeMappingQ=(short)i; + } } diff --git a/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java index e9b989b438..618bf8bb8c 100644 --- a/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java +++ b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingBarcodeTest.java @@ -5,23 +5,24 @@ * molecular barcode tag, even if the code is trying to use the molecular barcode */ -abstract public class MarkDuplicateWithMissingBarcodeTest extends MarkDuplicatesTest { +public abstract class MarkDuplicateWithMissingBarcodeTest extends MarkDuplicatesTest { protected AbstractMarkDuplicatesCommandLineProgramTester getTester() { return new MarkDuplicatesWithMissingBarcodesTester(); } - abstract protected String getArgumentName(); + protected abstract String getArgumentName(); - abstract protected String getTagValue(); + protected abstract String getTagValue(); private class MarkDuplicatesWithMissingBarcodesTester extends MarkDuplicatesTester { + @Override public void runTest() { boolean hasRX = false; boolean isDuplex = false; for (final String argument : this.getArgs()) { - if (argument.startsWith(getArgumentName())) { + if (argument.startsWith(getArgumentName())|| argument.startsWith("BARCODE_TAG")) { hasRX = true; break; } diff --git a/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java index 0d868e4984..c3986177fc 100644 --- a/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java +++ b/src/test/java/picard/sam/markduplicates/MarkDuplicateWithMissingReadTwoBarcodeTest.java @@ -11,7 +11,7 @@ public class MarkDuplicateWithMissingReadTwoBarcodeTest extends MarkDuplicateWit @Override protected String getArgumentName() { - return "READ_TWO_BARCODE_TAG"; + return "READ_ONE_BARCODE_TAG"; } @Override diff --git a/src/test/java/picard/sam/markduplicates/MarkDuplicatesTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicatesTest.java index f12654b587..0f71590472 100644 --- a/src/test/java/picard/sam/markduplicates/MarkDuplicatesTest.java +++ b/src/test/java/picard/sam/markduplicates/MarkDuplicatesTest.java @@ -34,14 +34,17 @@ import htsjdk.samtools.util.CollectionUtil; import htsjdk.samtools.util.IOUtil; import htsjdk.samtools.util.IterableAdapter; -import htsjdk.samtools.util.TestUtil; import org.testng.Assert; import org.testng.annotations.BeforeClass; import org.testng.annotations.DataProvider; import org.testng.annotations.Test; import java.io.File; -import java.util.*; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; /** * This class defines the individual test cases to run. The actual running of the test is done @@ -322,7 +325,8 @@ public Object[][] testDuplexUmiDataProvider() { Arrays.asList(false, true), // Is duplicate Arrays.asList(false, false), // Negative Strand Flag of first in pair Arrays.asList(true, true), // Negative Strand Flag of second in pair - },{ + }, + { // Test case where UMIs are not duplex, but are different. None of the reads should be duplicates. false, // Use duplex UMI (true), or single stranded UMI (false) Arrays.asList("ATC", "GCG"), // UMIs diff --git a/src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java b/src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java index e2cad133bf..f328595a5e 100644 --- a/src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java +++ b/src/test/java/picard/sam/markduplicates/MarkDuplicatesWithMateCigarTest.java @@ -126,4 +126,10 @@ public void testScoringStrategyForMateReferenceLengthComparison() { tester.runTest(); } + + @Test(enabled = false) + @Override + public void testDuplicateWithLowMappingQ() { + + } } diff --git a/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java b/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java index 7f2911d8f6..2c606d1d76 100644 --- a/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java +++ b/src/test/java/picard/sam/markduplicates/SimpleMarkDuplicatesWithMateCigarTest.java @@ -51,4 +51,11 @@ public void testTwoMappedPairsWithSoftClippingFirstOfPairOnlyNoMateCigar() { @Override public void testOpticalDuplicateClusterSamePositionNoOpticalDuplicates(final String readName1, final String readName2) { } + + // To enable this test a change in htsjdk would be required. In particular the duplicateSetIterator would need to be modified. + @Test(enabled = false) + @Override + public void testDuplicateWithLowMappingQ() { + + } } diff --git a/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java b/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java index c33aaa16fb..f6af3baa24 100644 --- a/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java +++ b/src/test/java/picard/sam/markduplicates/UmiAwareMarkDuplicatesWithMateCigarTest.java @@ -502,5 +502,12 @@ public void testDuplexUmi(final boolean duplexUmi, final int editDistanceToJoin, tester.setExpectedMetrics(expectedMetrics); tester.runTest(); } + + // To enable this test a change in htsjdk would be required. In particular the duplicateSetIterator would need to be modified. + @Test(enabled = false) + @Override + public void testDuplicateWithLowMappingQ() { + + } } diff --git a/src/test/java/picard/sam/testers/SamFileTester.java b/src/test/java/picard/sam/testers/SamFileTester.java index 0aab148fbf..1a3d64fdc8 100644 --- a/src/test/java/picard/sam/testers/SamFileTester.java +++ b/src/test/java/picard/sam/testers/SamFileTester.java @@ -12,6 +12,7 @@ import java.io.IOException; import java.nio.file.Path; import java.util.ArrayList; +import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -137,59 +138,59 @@ protected String samRecordToDuplicatesFlagsKey(final SAMRecord record) { } // Below are a bunch of utility methods for adding records to the SAMRecordSetBuilder - public void addUnmappedFragment(final int referenceSequenceIndex, + public List addUnmappedFragment(final int referenceSequenceIndex, final int defaultQualityScore) { - addFragment(referenceSequenceIndex, -1, true, false, null, null, defaultQualityScore, false); + return addFragment(referenceSequenceIndex, -1, true, false, null, null, defaultQualityScore, false); } - public void addUnmappedFragment(final int referenceSequenceIndex, + public List addUnmappedFragment(final int referenceSequenceIndex, final String qualityString) { - addFragment(referenceSequenceIndex, -1, true, false, null, qualityString, -1, false); + return addFragment(referenceSequenceIndex, -1, true, false, null, qualityString, -1, false); } - public void addUnmappedPair(final int referenceSequenceIndex, + public List addUnmappedPair(final int referenceSequenceIndex, final int defaultQualityScore) { - addMatePair(referenceSequenceIndex, -1, -1, true, true, false, false, null, null, false, false, false, false, false, defaultQualityScore); + return addMatePair(referenceSequenceIndex, -1, -1, true, true, false, false, null, null, false, false, false, false, false, defaultQualityScore); } - public void addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, + public List addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final int defaultQualityScore) { - addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, null, null, defaultQualityScore, false); + return addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, null, null, defaultQualityScore, false); } - public void addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, + public List addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final int defaultQualityScore, final boolean isSecondary) { - addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, null, null, defaultQualityScore, isSecondary); + return addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, null, null, defaultQualityScore, isSecondary); } - public void addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar, + public List addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar, final int defaultQualityScore) { - addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, null, defaultQualityScore, false); + return addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, null, defaultQualityScore, false); } - public void addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar, + public List addMappedFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar, final String qualityString, final int defaultQualityScore) { - addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, qualityString, defaultQualityScore, false); + return addFragment(referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, qualityString, defaultQualityScore, false); } - public void addMappedFragment(final String readName, final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar, + public List addMappedFragment(final String readName, final int referenceSequenceIndex, final int alignmentStart, final boolean isDuplicate, final String cigar, final String qualityString, final boolean isSecondary, final boolean isSupplementary, final int defaultQualityScore) { - addFragment(readName, referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, qualityString, defaultQualityScore, isSecondary, isSupplementary); + return addFragment(readName, referenceSequenceIndex, alignmentStart, false, isDuplicate, cigar, qualityString, defaultQualityScore, isSecondary, isSupplementary); } - public void addMappedPair(final int referenceSequenceIndex, + public List addMappedPair(final int referenceSequenceIndex, final int alignmentStart1, final int alignmentStart2, final boolean isDuplicate1, final boolean isDuplicate2, final int defaultQualityScore) { - addMappedPair(referenceSequenceIndex, alignmentStart1, alignmentStart2, isDuplicate1, isDuplicate2, null, null, + return addMappedPair(referenceSequenceIndex, alignmentStart1, alignmentStart2, isDuplicate1, isDuplicate2, null, null, false, defaultQualityScore); } - public void addMappedPair(final int referenceSequenceIndex, + public List addMappedPair(final int referenceSequenceIndex, final int alignmentStart1, final int alignmentStart2, final boolean isDuplicate1, @@ -198,11 +199,11 @@ public void addMappedPair(final int referenceSequenceIndex, final String cigar2, final boolean firstOnly, final int defaultQualityScore) { - addMappedPair(referenceSequenceIndex, alignmentStart1, alignmentStart2, isDuplicate1, isDuplicate2, cigar1, + return addMappedPair(referenceSequenceIndex, alignmentStart1, alignmentStart2, isDuplicate1, isDuplicate2, cigar1, cigar2, false, true, firstOnly, defaultQualityScore); } - public void addMappedPair(final int referenceSequenceIndex, + public List addMappedPair(final int referenceSequenceIndex, final int alignmentStart1, final int alignmentStart2, final boolean isDuplicate1, @@ -213,11 +214,11 @@ public void addMappedPair(final int referenceSequenceIndex, final boolean strand2, final boolean firstOnly, final int defaultQualityScore) { - addMatePair(referenceSequenceIndex, alignmentStart1, alignmentStart2, false, false, isDuplicate1, isDuplicate2, cigar1, cigar2, + return addMatePair(referenceSequenceIndex, alignmentStart1, alignmentStart2, false, false, isDuplicate1, isDuplicate2, cigar1, cigar2, strand1, strand2, firstOnly, false, false, defaultQualityScore); } - public void addMatePair(final int referenceSequenceIndex, + public List addMatePair(final int referenceSequenceIndex, final int alignmentStart1, final int alignmentStart2, final boolean record1Unmapped, @@ -232,18 +233,18 @@ public void addMatePair(final int referenceSequenceIndex, final boolean record1NonPrimary, final boolean record2NonPrimary, final int defaultQualityScore) { - addMatePair("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped, + return addMatePair("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped, isDuplicate1, isDuplicate2, cigar1, cigar2, strand1, strand2, firstOnly, record1NonPrimary, record2NonPrimary, defaultQualityScore); } - private void addFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean recordUnmapped, final boolean isDuplicate, final String cigar, + private List addFragment(final int referenceSequenceIndex, final int alignmentStart, final boolean recordUnmapped, final boolean isDuplicate, final String cigar, final String qualityString, final int defaultQualityScore, final boolean isSecondary) { - addFragment("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart, recordUnmapped, isDuplicate, cigar, + return addFragment("READ" + readNameCounter++, referenceSequenceIndex, alignmentStart, recordUnmapped, isDuplicate, cigar, qualityString, defaultQualityScore, isSecondary, false); } - private void addFragment(final String readName, final int referenceSequenceIndex, final int alignmentStart, final boolean recordUnmapped, final boolean isDuplicate, final String cigar, + private List addFragment(final String readName, final int referenceSequenceIndex, final int alignmentStart, final boolean recordUnmapped, final boolean isDuplicate, final String cigar, final String qualityString, final int defaultQualityScore, final boolean isSecondary, final boolean isSupplementary) { final SAMRecord record = samRecordSetBuilder.addFrag(readName, referenceSequenceIndex, alignmentStart, false, @@ -252,9 +253,10 @@ private void addFragment(final String readName, final int referenceSequenceIndex final String key = samRecordToDuplicatesFlagsKey(record); Assert.assertFalse(this.duplicateFlags.containsKey(key)); this.duplicateFlags.put(key, isDuplicate); + return Collections.singletonList(record); } - public void addMatePair(final String readName, + public List addMatePair(final String readName, final int referenceSequenceIndex1, final int referenceSequenceIndex2, final int alignmentStart1, @@ -299,9 +301,10 @@ public void addMatePair(final String readName, final String key2 = samRecordToDuplicatesFlagsKey(record2); Assert.assertFalse(this.duplicateFlags.containsKey(key2)); this.duplicateFlags.put(key2, isDuplicate2); + return samRecordList; } - public void addMatePair(final String readName, + public List addMatePair(final String readName, final int referenceSequenceIndex1, final int referenceSequenceIndex2, final int alignmentStart1, @@ -318,11 +321,11 @@ public void addMatePair(final String readName, final boolean record1NonPrimary, final boolean record2NonPrimary, final int defaultQuality) { - addMatePair(readName, referenceSequenceIndex1, referenceSequenceIndex2, alignmentStart1, alignmentStart2, + return addMatePair(readName, referenceSequenceIndex1, referenceSequenceIndex2, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped, isDuplicate1, isDuplicate2, cigar1, cigar2, strand1, strand2, firstOnly, record1NonPrimary, record2NonPrimary, defaultQuality, null); } - public void addMatePair(final String readName, + public List addMatePair(final String readName, final int referenceSequenceIndex, final int alignmentStart1, final int alignmentStart2, @@ -338,7 +341,7 @@ public void addMatePair(final String readName, final boolean record1NonPrimary, final boolean record2NonPrimary, final int defaultQuality) { - addMatePair(readName, referenceSequenceIndex, referenceSequenceIndex, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped, + return addMatePair(readName, referenceSequenceIndex, referenceSequenceIndex, alignmentStart1, alignmentStart2, record1Unmapped, record2Unmapped, isDuplicate1, isDuplicate2, cigar1, cigar2, strand1, strand2, firstOnly, record1NonPrimary, record2NonPrimary, defaultQuality); }