Minimal quality report for targeted sequencing data using TEQC. This is an analysis for Sample_1002.correct.bwa.header.sorted.bed using target information from exonTargetsSorted.bed. Parameters used:

baitFile <- NA
coverage <- 40
covMax <- 250

Target and read information

Target information is the same for all samples:

# Import target information; built-in support is limited to hg18/19
targets <- get.targets(targetsfile = targetFile, chrcol = 1, startcol = 2, 
    endcol = 3, zerobased = T, skip = 0)
## [1] "read 65 (non-overlapping) target regions"
# Sanity check, fraction of genome covered by targets
ft <- fraction.target(targets, genome = "hg19")
ft
## [1] 6.124e-06

Test how many reads are on-target or are at most 100bp off to either side:

# Sequence information
reads <- get.reads(seqFile, filetype = "bed", idcol = 4)
## [1] "read 1849875 sequenced reads"
# Data exploration
# 
# Fraction of reads on target
fr <- fraction.reads.target(reads, targets, Offset = 0)
fr
## [1] 0.8019
# Fraction of reads on target with 100bp boundaries
fr <- fraction.reads.target(reads, targets, Offset = 100)
fr
## [1] 1

Test for uneven distribution among chromosomes:

# Distribution of reads along the chromosomes
chrom.barplot(reads, targets)

plot of chunk barplot

Coverage

Read coverage for each baited region:

# 
# Create the coverage object
#
cov <- coverage.target(reads, targets, perTarget=T, perBase=T)
## Warning message: failed to set names on the result of unlisting a SimpleRleList object
cov

$avgTargetCoverage [1] 4219

$targetCoverageSD [1] 4293

$targetCoverageQuantiles 0% 25% 50% 75% 100% 0 1192 2840 5686 20803

$targetCoverages RangedData with 65 rows and 2 value columns across 4 spaces space ranges | avgCoverage coverageSD | 1 chr1 [235272661, 235275423] | 3087.1 2991.89 2 chr1 [235277084, 235277225] | 2417.9 1751.99 3 chr1 [235283134, 235283214] | 1352.4 930.77 4 chr1 [235285642, 235285687] | 164.4 96.78 5 chr1 [235291911, 235292256] | 6534.1 5744.63 6 chr11 [ 72975571, 72975694] | 4880.8 3194.49 7 chr11 [ 72980940, 72981200] | 5769.1 4203.49 8 chr11 [ 72983248, 72983511] | 8285.4 5323.56 9 chr11 [ 73006776, 73006860] | 3481.8 2157.29 ... ... ... ... ... ... 57 chr5 [1330396, 1330498] | 4759.3 4468.4 58 chr5 [1331915, 1331998] | 2953.3 1615.3 59 chr5 [1334405, 1334498] | 2166.8 1001.5 60 chr5 [1335173, 1335289] | 6299.8 3852.5 61 chr5 [1338020, 1338097] | 958.2 349.9 62 chr5 [1338976, 1339120] | 7305.4 6260.5 63 chr5 [1341787, 1341975] | 3276.2 3597.7 64 chr5 [1344467, 1344566] | 4976.3 2906.4 65 chr5 [1344796, 1345002] | 3194.4 3809.9

$coverageAll SimpleRleList of length 4 $chr1 'integer' Rle of length 235292330 with 2375 runs Lengths: 235272590 1 1 ... 1 1 1 Values : 0 24 110 ... 270 80 15

$chr11 'integer' Rle of length 73009722 with 2058 runs Lengths: 72975486 1 1 ... 1 1 1 Values : 0 1 15 ... 21 15 8

$chr15 'integer' Rle of length 78952954 with 4816 runs Lengths: 78885337 1 1 ... 1 1 1 Values : 0 45 52 ... 85 17 1

$chr5 'integer' Rle of length 1345002 with 5153 runs Lengths: 1253273 1 2 28 ... 1 7 73 Values : 0 2 3 429 ... 2 1 0

$coverageTarget SimpleRleList of length 4 $chr1 'integer' Rle of length 3378 with 2060 runs Lengths: 1 1 1 1 1 2 ... 1 1 1 1 1 1 Values : 4156 4132 4081 1098 952 939 ... 2650 2094 2078 2067 2074 2158

$chr11 'integer' Rle of length 2860 with 1738 runs Lengths: 1 1 1 8 1 24 ... 1 1 1 1 1 1 Values : 788 1029 1918 1923 1922 1921 ... 1101 1098 1097 1092 1095 1097

$chr15 'integer' Rle of length 6842 with 3520 runs Lengths: 1 1 1 1 1 1 ... 1 1 1 18 1 1 Values : 1911 1941 1943 1974 1976 1979 ... 304 303 382 1140 1142 1208

$chr5 'integer' Rle of length 6132 with 3332 runs Lengths: 22 1 1 1 1 1 ... 1 1 1 1 7 73 Values : 429 430 474 1828 1880 1969 ... 247 45 4 2 1 0

Provide reports on overall coverage and troublesome low-coverage regions:

# Extract target coverage information, export to results
targetCov <- cov$targetCoverages
targetCov <- readsPerTarget(reads, targetCov)
targetCov
## RangedData with 65 rows and 3 value columns across 4 spaces
##        space                 ranges   | avgCoverage coverageSD    nReads
##     <factor>              <IRanges>   |   <numeric>  <numeric> <numeric>
## 1       chr1 [235272661, 235275423]   |      3087.1    2991.89    140739
## 2       chr1 [235277084, 235277225]   |      2417.9    1751.99      7162
## 3       chr1 [235283134, 235283214]   |      1352.4     930.77      3131
## 4       chr1 [235285642, 235285687]   |       164.4      96.78       336
## 5       chr1 [235291911, 235292256]   |      6534.1    5744.63     43140
## 6      chr11 [ 72975571,  72975694]   |      4880.8    3194.49     11763
## 7      chr11 [ 72980940,  72981200]   |      5769.1    4203.49     25031
## 8      chr11 [ 72983248,  72983511]   |      8285.4    5323.56     38391
## 9      chr11 [ 73006776,  73006860]   |      3481.8    2157.29      9114
## ...      ...                    ... ...         ...        ...       ...
## 57      chr5     [1330396, 1330498]   |      4759.3     4468.4     14093
## 58      chr5     [1331915, 1331998]   |      2953.3     1615.3      6349
## 59      chr5     [1334405, 1334498]   |      2166.8     1001.5      7874
## 60      chr5     [1335173, 1335289]   |      6299.8     3852.5     13790
## 61      chr5     [1338020, 1338097]   |       958.2      349.9      1500
## 62      chr5     [1338976, 1339120]   |      7305.4     6260.5     20045
## 63      chr5     [1341787, 1341975]   |      3276.2     3597.7     10248
## 64      chr5     [1344467, 1344566]   |      4976.3     2906.4      9907
## 65      chr5     [1344796, 1345002]   |      3194.4     3809.9     11012
targetFrame <- as.data.frame(targetCov)

# Calculate average coverage for this sample
avgCoverage <- mean(targetFrame$avgCoverage)
avgCoverage
## [1] 3618
# Repeat for targets with low coverage only for troubleshooting
lowCoverage <- max(5, avgCoverage * 0.1)
filterCov <- targetFrame[targetFrame$avgCoverage < lowCoverage, ]
filterCov
##    space     start       end width avgCoverage coverageSD nReads
## 4   chr1 235285642 235285687    46      164.41      96.78    336
## 12 chr15  78886257  78886353    97       23.98      29.42     74
## 15 chr15  78909367  78909475   109      258.69     402.70    978
## 48  chr5   1294887   1295162   276       22.70      40.57    144
## 53  chr5   1322993   1323026    34      287.44      29.00    454

Summary information is stored in /n/home10/ohofmann/hsph/projects/dc_lung_cancer/data/Sample_1002.correct.bwa.header.sorted_targetCoverage.txt and /n/home10/ohofmann/hsph/projects/dc_lung_cancer/data/Sample_1002.correct.bwa.header.sorted_targetCoverageLowCov.txt.

N-fold coverage overview

N-fold coverage overview, using 40 as a threshold for now:

covered.k(cov$coverageTarget, k = c(1, coverage/2, coverage, coverage * 
    2))
## Warning message: failed to set names on the result of unlisting a SimpleRleList object
##      1     20     40     80 
## 0.9438 0.9280 0.9236 0.9172 

Histogram of read coverages for bases within the target. Dashed lines highlight the cumulative fraction of target bases with at least the specified coverage.

covRange <- cov$coverageTarget[cov$coverageTarget <= covMax]
coverage.hist(covRange, covthreshold = coverage, breaks = c(0:(covMax/10)) * 
    10, xlim = c(0, covMax))
## Warning message: failed to set names on the result of unlisting a SimpleRleList object

plot of chunk covhiso

Coverage and read dependency on target size:

# coverage.targetlength.plot(targetCov, plotcolumn='nReads', pch=16,# cex=1.5) coverage.targetlength.plot(targetCov, plotcolumn='avgCoverage',# pch=16, cex=1.5)

Coverage along the target regions (+/- 1kb):

coverage.plot(cov$coverageAll, targets, Offset = 100, chr = "chr1", 
    Start = 235271660, End = 235293256)

plot of chunk chromPlots

coverage.plot(cov$coverageAll, targets, Offset = 100, chr = "chr11", 
    Start = 72974570, End = 73007860)

plot of chunk chromPlots

coverage.plot(cov$coverageAll, targets, Offset = 100, chr = "chr15", 
    Start = 78884397, End = 78914322)

plot of chunk chromPlots

coverage.plot(cov$coverageAll, targets, Offset = 100, chr = "chr15", 
    Start = 78915636, End = 78953874)

plot of chunk chromPlots

coverage.plot(cov$coverageAll, targets, Offset = 100, chr = "chr5", 
    Start = 1252282, End = 1296162)

plot of chunk chromPlots

coverage.plot(cov$coverageAll, targets, Offset = 100, chr = "chr5", 
    Start = 1317900, End = 1346002)

plot of chunk chromPlots