Peak density in transcript features

Calculate peak density across transcript features

.

.

A common step in CLIP-seq data analysis is to visualize the distribution of called peaks across various transcript features, such as exons, introns, and untranslated regions (UTRs). This can be accomplished using the R package RCAS, which integrates genomic annotation (in GTF format) with peak coordinates (in BED format) to generate peak density profiles over defined transcript regions.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
setwd("E:/220625_PC/R workplace/220801_peak_density/")
library(RCAS)

# Import GTF annotation file and save as RDS for faster future loading
gtf <- importGtf("E:/220625_PC/R workplace/220910_annotation/Drosaphila/dmel-all-r6.44.gtf",
overwriteObjectAsRds = TRUE)

# Check column names of the GTF data frame
colnames(as.data.frame(gtf))

# Convert GTF to data frame for genome annotation
genome.anno = as.data.frame(gtf)

# Extract transcript database features (genes, transcripts, exons, etc.) from GRanges
txdbFeatures <- getTxdbFeaturesFromGRanges(gtf)

# Import peak data from BED file, limiting to 10,000 samples;The parameter sampleN = 10000 limits the imported BED file to the first 10,000 peaks (or rows).
peak_bed <- importBed("E:/220625_PC/R workplace/220801_peak_density/peak_bed/peak_bed_10838.txt",
sampleN = 10000)

# Check column names of the peak BED data
colnames(as.data.frame(peak_bed))

.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
library(Matrix)
library(genomation) # Required dependency for RCAS

# Define matrix class if needed
setClass("mMatrix", contains="Matrix")

# Compute coverage profiles for peaks across transcript features
# Takes peak regions and transcript features, samples 10,000 peaks
cvgList <- calculateCoverageProfileList(
queryRegions = peak_bed,
targetRegionsList = txdbFeatures,
sampleN = 10000)

# Create density plot with mean coverage and 95% confidence interval
tiff("RNA_density.tiff",units = "in",width = 9,height = 8,res = 600)
ggplot2::ggplot(cvgList, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'lightgreen',
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'black',size=0.2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature, ncol = 3)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())
dev.off()

# Check distribution of peaks across different feature types
table(cvgList$feature)

.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
#------Transcripts
#Create transcript-specific coverage plot with custom styling
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("Transcripts-coverage.tiff",units = "in",width = 9,height = 9,res = 900)

# Subset data to include only transcript features
list_Transcripts <- cvgList[cvgList$feature=="transcripts",]
ggplot2::ggplot(list_Transcripts, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'green', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'green',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature,ncol = 1) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

#------3UTR
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("3UTR-1.tiff",units = "in",width = 9,height = 9,res = 900)
list_3UTR <- cvgList[cvgList$feature=="threeUTRs",]
ggplot2::ggplot(list_3UTR, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'blue', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'blue',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature,ncol = 1) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

#------5UTR
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("5UTR-1.tiff",units = "in",width = 9,height = 9,res = 900)
list_5UTR <- cvgList[cvgList$feature=="fiveUTRs",]
ggplot2::ggplot(list_5UTR, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'blue', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'blue',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature, ncol = 1) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

#------CDS
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("CDS-1.tiff",units = "in",width = 9,height = 9,res = 900)
list_CDS <- cvgList[cvgList$feature=="cds",]
ggplot2::ggplot(list_CDS, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'blue', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'blue',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature, ncol = 1) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

#------Intron
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("Intron-2.tiff",units = "in",width = 9,height = 9,res = 900)
list_Intron <- cvgList[cvgList$feature=="introns",]
ggplot2::ggplot(list_Intron, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'red', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature, ncol = 1) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

#------Exon
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("Exon-2.tiff",units = "in",width = 9,height = 9,res = 900)
list_Exon <- cvgList[cvgList$feature=="exons",]
ggplot2::ggplot(list_Exon, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'red', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature, ncol = 3) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

.

peaks_density_in_transcripts

.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
#------ Split intron coverage plot into two halves for detailed visualization
# Intron devide into two part (half)
setwd("E:/220625_PC/R workplace/20220320_SXL/Fan.data/220801_peak_density/")
colnames(cvgList)

# Subset data to include only intron features
list_Intron <- cvgList[cvgList$feature=="introns",]

tiff("Intron-half-1.tiff",units = "in",width = 6,height = 9,res = 900)

# Extract first 50 bins (first half of intron regions)
list_Intron_half_1 <- list_Intron[1:50,]

# Plot first half with red color scheme
ggplot2::ggplot(list_Intron_half_1, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'red', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature, ncol = 1) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

#------Create second half of intron coverage plot (bins 51-100)
tiff("Intron-half-2.tiff",units = "in",width = 6,height = 9,res = 900)

# Extract bins 51-100 (second half of intron regions)
list_Intron_half_2 <- list_Intron[51:100,]

# Plot second half with identical red styling
ggplot2::ggplot(list_Intron_half_2, aes(x = bins, y = meanCoverage)) +
geom_ribbon(fill = 'red', alpha=0.3,
aes(ymin = meanCoverage - standardError * 1.96,
ymax = meanCoverage + standardError * 1.96)) +
geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
facet_wrap( ~ feature, ncol = 1) +
ylim(0,0.1)+
theme(panel.grid =element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank())+
theme(axis.text.x = element_text(size = 15,color = "black"),
axis.text.y = element_text(size = 15,color = "black"),
axis.title.x = element_text(size = 18,color = "black"),
axis.title.y = element_text(size = 18,color = "black"))
dev.off()

Peak density in transcript features
https://www.lianganmin.cn/2025/12/19/20251218-Peak-density-in-transcript-features/
Author
An-min
Posted on
December 19, 2025
Licensed under