Peak density in transcript features

Calculate peak density across transcript features

A common step in CLIP-seq data analysis is to visualize the distribution of called peaks across various transcript features, such as exons, introns, and untranslated regions (UTRs). This can be accomplished using the R package RCAS, which integrates genomic annotation (in GTF format) with peak coordinates (in BED format) to generate peak density profiles over defined transcript regions.

setwd("E:/220625_PC/R workplace/220801_peak_density/")
library(RCAS)

# Import GTF annotation file and save as RDS for faster future loading
gtf <- importGtf("E:/220625_PC/R workplace/220910_annotation/Drosaphila/dmel-all-r6.44.gtf",
                 overwriteObjectAsRds = TRUE)

# Check column names of the GTF data frame
colnames(as.data.frame(gtf))

# Convert GTF to data frame for genome annotation
genome.anno = as.data.frame(gtf) 

# Extract transcript database features (genes, transcripts, exons, etc.) from GRanges
txdbFeatures <- getTxdbFeaturesFromGRanges(gtf)

# Import peak data from BED file, limiting to 10,000 samples;The parameter sampleN = 10000 limits the imported BED file to the first 10,000 peaks (or rows).
peak_bed <- importBed("E:/220625_PC/R workplace/220801_peak_density/peak_bed/peak_bed_10838.txt", 
                      sampleN = 10000)

# Check column names of the peak BED data
colnames(as.data.frame(peak_bed))

library(Matrix)
library(genomation)  # Required dependency for RCAS

# Define matrix class if needed
setClass("mMatrix", contains="Matrix")

# Compute coverage profiles for peaks across transcript features
# Takes peak regions and transcript features, samples 10,000 peaks
cvgList <- calculateCoverageProfileList(
  queryRegions = peak_bed,
  targetRegionsList = txdbFeatures,
  sampleN = 10000)

# Create density plot with mean coverage and 95% confidence interval
tiff("RNA_density.tiff",units = "in",width = 9,height = 8,res = 600)
ggplot2::ggplot(cvgList, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'lightgreen', 
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'black',size=0.2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature, ncol = 3)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())
dev.off()

# Check distribution of peaks across different feature types
table(cvgList$feature)

#------Transcripts
#Create transcript-specific coverage plot with custom styling
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("Transcripts-coverage.tiff",units = "in",width = 9,height = 9,res = 900)

# Subset data to include only transcript features
list_Transcripts <- cvgList[cvgList$feature=="transcripts",]
ggplot2::ggplot(list_Transcripts, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'green', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'green',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature,ncol = 1) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

#------3UTR
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("3UTR-1.tiff",units = "in",width = 9,height = 9,res = 900)
list_3UTR <- cvgList[cvgList$feature=="threeUTRs",]
ggplot2::ggplot(list_3UTR, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'blue', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'blue',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature,ncol = 1) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

#------5UTR
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("5UTR-1.tiff",units = "in",width = 9,height = 9,res = 900)
list_5UTR <- cvgList[cvgList$feature=="fiveUTRs",]
ggplot2::ggplot(list_5UTR, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'blue', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'blue',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature, ncol = 1) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

#------CDS
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("CDS-1.tiff",units = "in",width = 9,height = 9,res = 900)
list_CDS <- cvgList[cvgList$feature=="cds",]
ggplot2::ggplot(list_CDS, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'blue', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'blue',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature, ncol = 1) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

#------Intron
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("Intron-2.tiff",units = "in",width = 9,height = 9,res = 900)
list_Intron <- cvgList[cvgList$feature=="introns",]
ggplot2::ggplot(list_Intron, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'red', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature, ncol = 1) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

#------Exon
setwd("E:/220625_PC/R workplace/220801_peak_density/")
colnames(cvgList)
tiff("Exon-2.tiff",units = "in",width = 9,height = 9,res = 900)
list_Exon <- cvgList[cvgList$feature=="exons",]
ggplot2::ggplot(list_Exon, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'red', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature, ncol = 3) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

peaks_density_in_transcripts

#------ Split intron coverage plot into two halves for detailed visualization
# Intron devide into two part (half)
setwd("E:/220625_PC/R workplace/20220320_SXL/Fan.data/220801_peak_density/")
colnames(cvgList)

# Subset data to include only intron features
list_Intron <- cvgList[cvgList$feature=="introns",]

tiff("Intron-half-1.tiff",units = "in",width = 6,height = 9,res = 900)

# Extract first 50 bins (first half of intron regions)
list_Intron_half_1 <- list_Intron[1:50,]

# Plot first half with red color scheme
ggplot2::ggplot(list_Intron_half_1, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'red', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature, ncol = 1) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

#------Create second half of intron coverage plot (bins 51-100)
tiff("Intron-half-2.tiff",units = "in",width = 6,height = 9,res = 900)

# Extract bins 51-100 (second half of intron regions)
list_Intron_half_2 <- list_Intron[51:100,]

# Plot second half with identical red styling
ggplot2::ggplot(list_Intron_half_2, aes(x = bins, y = meanCoverage)) + 
  geom_ribbon(fill = 'red', alpha=0.3,
              aes(ymin = meanCoverage - standardError * 1.96, 
                  ymax = meanCoverage + standardError * 1.96)) + 
  geom_line(color = 'red',size=2) + theme_bw(base_size = 28) +
  facet_wrap( ~ feature, ncol = 1) +
  ylim(0,0.1)+
  theme(panel.grid =element_blank(),
        panel.grid.major = element_blank(),
        panel.grid.minor = element_blank())+
  theme(axis.text.x = element_text(size = 15,color = "black"),   
        axis.text.y = element_text(size = 15,color = "black"),   
        axis.title.x = element_text(size = 18,color = "black"),  
        axis.title.y = element_text(size = 18,color = "black")) 
dev.off()

Bioinformatic

Peak density in transcript features

https://www.lianganmin.cn/2025/12/19/20251218-Peak-density-in-transcript-features/

Author

An-min

Posted on

December 19, 2025

Licensed under

2025年总结 Previous

2025年夏天东湖骑行 [1] Next