RNA Binding sites in AS-events

RNA Binding sites in AS-events

.

.

# gene ID transform—
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
library(rtracklayer)
library(dplyr)

gtf_file <- "E:/220625_PC/R workplace/220910_annotation/Drosaphila/dmel-all-r6.44.gtf"

genome.anno <- import(gtf_file) %>%
.[.$type == "gene"] %>% # filtered by gene
as.data.frame() %>%
select(
GeneID = gene_id,
gene_symbol = gene_symbol,
seqnames,
start,
end,
width,
type
)

.

Universal Function for splice sites and intron of AS-events

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
library(dplyr)

#------ dir.path
base_raw <- "E:/220625_PC/R workplace/220320_SXL/220421_SXL.S2/AS/230806_AS/"
out_site <- "E:/220625_PC/R workplace/220320_SXL/202404_Fig/250318/splice.site/"
out_intron <- "E:/220625_PC/R workplace/220320_SXL/202404_Fig/250318/intron/"

#------ function for merge genome.anno and X.MATS.JC.txt
read_mats <- function(file_path){
df <- read.table(file_path, header = TRUE)
df <- merge(df, genome.anno, by = "GeneID", all.x = TRUE)
df$chr <- gsub("chr", "", df$chr)
return(df)
}

#------ function for splice site write.table
export_sites <- function(df, fields, prefix){

for(f in fields){
out_df <- df[, c("GeneID","ID","gene_symbol","chr","strand", f)]

write.table(
out_df,
file = file.path(out_splice, paste0(prefix, "_", f, ".txt")),
quote = FALSE,
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)
}
}

#------ function for splice-intron write.table
export_intron <- function(df, col1, col2, prefix){

out_df <- df[, c("GeneID","ID","gene_symbol","chr","strand", col1, col2)]

write.table(
out_df,
file = file.path(out_intron, paste0(prefix, ".txt")),
quote = FALSE,
row.names = FALSE,
col.names = FALSE,
sep = "\t"
)
}

.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
######################## SE ################################
se_file <- file.path(base_raw, "raw_gtf/novel/SE.MATS.JC.txt")
se_df <- read_mats(se_file)

se_fields <- c("upstreamEE","exonStart_0base","exonEnd","downstreamES")

export_sites(se_df, se_fields, "S2_se")

######################## RI ################################
ri_file <- file.path(base_raw, "new/without_novel/RI.MATS.JC.txt")
ri_df <- read_mats(ri_file)

ri_fields <- c("upstreamEE","downstreamES")

export_sites(ri_df, ri_fields, "S2_ri")

######################## A5SS ##############################
a5_file <- file.path(base_raw, "raw_gtf/novel/A5SS.MATS.JC.txt")
a5_df <- read_mats(a5_file)

# strands +/-
a5_p <- filter(a5_df, strand == "+")
a5_m <- filter(a5_df, strand == "-")

export_sites(a5_p,
c("shortEE","longExonEnd","flankingES"),
"S2_a5ss_p")

export_sites(a5_m,
c("flankingEE","longExonStart_0base","shortES"),
"S2_a5ss_m")

######################## A3SS ##############################
a3_file <- file.path(base_raw, "raw_gtf/novel/A3SS.MATS.JC.txt")
a3_df <- read_mats(a3_file)

# strands +/-
a3_p <- filter(a3_df, strand == "+")
a3_m <- filter(a3_df, strand == "-")

export_sites(a3_p,
c("flankingEE","longExonStart_0base","shortES"),
"S2_a3ss_p")

export_sites(a3_m,
c("shortEE","longExonEnd","flankingES"),
"S2_a3ss_m")

######################## MXE ###############################
mxe_file <- file.path(base_raw, "raw_gtf/novel/MXE.MATS.JC.txt")
mxe_df <- read_mats(mxe_file)

mxe_fields <- c(
"upstreamEE",
"X1stExonStart_0base",
"X1stExonEnd",
"X2ndExonStart_0base",
"X2ndExonEnd",
"downstreamES"
)

export_sites(mxe_df, mxe_fields, "S2_mxe")

.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
############################ SE ############################
se_df <- read_mats(file.path(base_raw, "raw_gtf/novel/SE.MATS.JC.txt"))

export_intron(se_df, "upstreamEE", "exonStart_0base", "S2_se_intron1")
export_intron(se_df, "exonEnd", "downstreamES", "S2_se_intron2")

############################ RI ############################
ri_df <- read_mats(file.path(base_raw, "new/without_novel/RI.MATS.JC.txt"))

export_intron(ri_df, "upstreamEE", "downstreamES", "S2_ri_intron")

########################### A5SS ###########################
a5_df <- read_mats(file.path(base_raw, "raw_gtf/novel/A5SS.MATS.JC.txt"))

a5_p <- filter(a5_df, strand == "+")
a5_m <- filter(a5_df, strand == "-")

export_intron(a5_p, "longExonEnd", "flankingES", "S2_a5ss_p_intron")
export_intron(a5_m, "flankingEE", "longExonStart_0base", "S2_a5ss_m_intron")

########################### A3SS ###########################
a3_df <- read_mats(file.path(base_raw, "raw_gtf/novel/A3SS.MATS.JC.txt"))

a3_p <- filter(a3_df, strand == "+")
a3_m <- filter(a3_df, strand == "-")

export_intron(a3_p, "flankingEE", "longExonStart_0base", "S2_a3ss_p_intron")
export_intron(a3_m, "longExonEnd", "flankingES", "S2_a3ss_m_intron")

############################ MXE ###########################
mxe_df <- read_mats(file.path(base_raw, "raw_gtf/novel/MXE.MATS.JC.txt"))

export_intron(mxe_df, "upstreamEE", "X1stExonStart_0base", "S2_mxe_intron1")
export_intron(mxe_df, "X1stExonEnd", "X2ndExonStart_0base", "S2_mxe_intron2")
export_intron(mxe_df, "X2ndExonEnd", "downstreamES", "S2_mxe_intron3")

.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
cd  /data1/amliang/projects/sxl_DC/220906_peak_in_AS.events/20250318_peak_in_S2/
mkdir splice.site intron

# splice sites
cd splice.site
ln -s /data1/amliang/projects/sxl_DC/220315_SXL_APA/sxl-bind-DEG/F.peak.bed ./ # peaks

# region up/down 75bp of splice site
ls *.txt |while read id; do(less $id | awk '{print $4"\t"$6-75"\t"$6+75"\t"$1"\t"$2"\t"$3"\t"$4"\t"$5"\t"$6}' > $(basename ${id} ".txt").bed );done

ls *bed | while read id; do(bedtools intersect -a ./F.peak.bed -b $id -wa -wb |sort -k4 > peak_in_$(basename ${id} ".bed"));done

# intron
cd intron
ln -s /data1/amliang/projects/sxl_DC/220315_SXL_APA/sxl-bind-DEG/F.peak.bed ./ # peaks

ls *.txt |while read id; do(tr -d '\r' < $id | sed 's/ \+/\t/g' > $(basename ${id} ".txt").TXT );done

ls *.TXT |while read id; do(less $id | awk '{print $4"\t"$6"\t"$7"\t"$1"\t"$2"\t"$3"\t"$5}' > $(basename ${id} ".TXT").bed );done

ls *bed | while read id; do(bedtools intersect -a ./F.peak.bed -b $id -wa -wb |sort -k4 > peak_in_$(basename ${id} ".bed"));done

.

Download all the results (BED files) from the server to the local folders “intron” and “splice.site”.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
#------------------------------------ with peak 

# the shell script & data in :
# /data1/amliang/projects/sxl_DC/220906_peak_in_AS.events/20250318_peak_in_S2

# ------ peak in splice site
setwd("E:/220625_PC/R workplace/220320_SXL/202404_Fig/250318/splice.site/")
peak_in_S2_se_upstreamEE <- read.table("peak_in_S2_se_upstreamEE",header = F)
peak_in_S2_se_exonStart_0base <- read.table("peak_in_S2_se_exonStart_0base",header = F)
peak_in_S2_se_exonEnd <- read.table("peak_in_S2_se_exonEnd",header = F)
peak_in_S2_se_downstreamES <- read.table("peak_in_S2_se_downstreamES",header = F)

peak_in_S2_ri_upstreamEE <- read.table("peak_in_S2_ri_upstreamEE",header = F)
peak_in_S2_ri_downstreamES <- read.table("peak_in_S2_ri_downstreamES",header = F)

peak_in_S2_a5ss_p_shortEE <- read.table("peak_in_S2_a5ss_p_shortEE",header = F)
peak_in_S2_a5ss_p_longExonEnd <- read.table("peak_in_S2_a5ss_p_longExonEnd",header = F)
peak_in_S2_a5ss_p_flankingES <- read.table("peak_in_S2_a5ss_p_flankingES",header = F)
peak_in_S2_a5ss_m_flankingEE <- read.table("peak_in_S2_a5ss_m_flankingEE",header = F)
peak_in_S2_a5ss_m_longExonStart_0base <- read.table("peak_in_S2_a5ss_m_longExonStart_0base",header = F)
peak_in_S2_a5ss_m_shortES <- read.table("peak_in_S2_a5ss_m_shortES",header = F)

peak_in_S2_a3ss_p_flankingEE <- read.table("peak_in_S2_a3ss_p_flankingEE",header = F)
peak_in_S2_a3ss_p_longExonStart_0base <- read.table("peak_in_S2_a3ss_p_longExonStart_0base",header = F)
peak_in_S2_a3ss_p_shortES <- read.table("peak_in_S2_a3ss_p_shortES",header = F)
peak_in_S2_a3ss_m_shortEE <- read.table("peak_in_S2_a3ss_m_shortEE",header = F)
peak_in_S2_a3ss_m_longExonEnd <- read.table("peak_in_S2_a3ss_m_longExonEnd",header = F)
peak_in_S2_a3ss_m_flankingES <- read.table("peak_in_S2_a3ss_m_flankingES",header = F)

peak_in_S2_mxe_upstreamEE<- read.table("peak_in_S2_mxe_upstreamEE",header = F)
peak_in_S2_mxe_X1stExonStart_0base <- read.table("peak_in_S2_mxe_X1stExonStart_0base",header = F)
peak_in_S2_mxe_X1stExonEnd<- read.table("peak_in_S2_mxe_X1stExonEnd",header = F)
peak_in_S2_mxe_X2ndExonStart_0base<- read.table("peak_in_S2_mxe_X2ndExonStart_0base",header = F)
peak_in_S2_mxe_X2ndExonEnd<- read.table("peak_in_S2_mxe_X2ndExonEnd",header = F)
peak_in_S2_mxe_downstreamES<- read.table("peak_in_S2_mxe_downstreamES",header = F)


#
peak_in_S2_se_SS <- rbind(peak_in_S2_se_upstreamEE,
peak_in_S2_se_exonStart_0base,
peak_in_S2_se_exonEnd,
peak_in_S2_se_downstreamES)

peak_in_S2_ri_SS <- rbind(peak_in_S2_ri_upstreamEE,
peak_in_S2_ri_downstreamES)

peak_in_S2_a5ss_SS <- rbind(peak_in_S2_a5ss_p_shortEE,
peak_in_S2_a5ss_p_longExonEnd,
peak_in_S2_a5ss_p_flankingES,
peak_in_S2_a5ss_m_flankingEE,
peak_in_S2_a5ss_m_longExonStart_0base,
peak_in_S2_a5ss_m_shortES)

peak_in_S2_a3ss_SS <- rbind(peak_in_S2_a3ss_p_flankingEE,
peak_in_S2_a3ss_p_longExonStart_0base,
peak_in_S2_a3ss_p_shortES,
peak_in_S2_a3ss_m_shortEE,
peak_in_S2_a3ss_m_longExonEnd,
peak_in_S2_a3ss_m_flankingES)

peak_in_S2_mxe_SS <- rbind(peak_in_S2_mxe_upstreamEE,
peak_in_S2_mxe_X1stExonStart_0base,
peak_in_S2_mxe_X1stExonEnd,
peak_in_S2_mxe_X2ndExonStart_0base,
peak_in_S2_mxe_X2ndExonEnd,
peak_in_S2_mxe_downstreamES)

col_name_ss <- c("peak_chr","peak_start","peak_end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","target_start","target_end","GeneID","ID","gene_symbol","chr","strand","pos")

colnames(peak_in_S2_se_SS) <- col_name_ss
colnames(peak_in_S2_ri_SS) <- col_name_ss
colnames(peak_in_S2_a5ss_SS) <- col_name_ss
colnames(peak_in_S2_a3ss_SS) <- col_name_ss
colnames(peak_in_S2_mxe_SS) <- col_name_ss


# ------ peak in intron
setwd("E:/220625_PC/R workplace/220320_SXL/202404_Fig/250318/intron/")
peak_in_S2_se_intron1 <- read.table("peak_in_S2_se_intron1",header = F)
peak_in_S2_se_intron2 <- read.table("peak_in_S2_se_intron2",header = F)

peak_in_S2_ri_intron <- read.table("peak_in_S2_ri_intron",header = F)

peak_in_S2_a5ss_p_intron <- read.table("peak_in_S2_a5ss_p_intron",header = F)
peak_in_S2_a5ss_m_intron <- read.table("peak_in_S2_a5ss_m_intron",header = F)

peak_in_S2_a3ss_p_intron <- read.table("peak_in_S2_a3ss_p_intron",header = F)
peak_in_S2_a3ss_m_intron <- read.table("peak_in_S2_a3ss_m_intorn",header = F)

peak_in_S2_mxe_intron1 <- read.table("peak_in_S2_mxe_intron1",header = F)
peak_in_S2_mxe_intron2 <- read.table("peak_in_S2_mxe_intron2",header = F)
peak_in_S2_mxe_intron3 <- read.table("peak_in_S2_mxe_intron3",header = F)

peak_in_S2_se_intron <- rbind(peak_in_S2_se_intron1,
peak_in_S2_se_intron2)
peak_in_S2_a5ss_intron <- rbind(peak_in_S2_a5ss_p_intron,
peak_in_S2_a5ss_m_intron)
peak_in_S2_a3ss_intron <- rbind(peak_in_S2_a3ss_p_intron,
peak_in_S2_a3ss_m_intron)
peak_in_S2_mxe_intron <- rbind(peak_in_S2_mxe_intron1,
peak_in_S2_mxe_intron2,
peak_in_S2_mxe_intron3)

col_name_intron <- c("peak_chr","peak_start","peak_end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","target_start","target_end","GeneID","ID","gene_symbol","strand")

colnames(peak_in_S2_se_intron) <- col_name_intron
colnames(peak_in_S2_ri_intron) <- col_name_intron
colnames(peak_in_S2_a5ss_intron) <- col_name_intron
colnames(peak_in_S2_a3ss_intron) <- col_name_intron
colnames(peak_in_S2_mxe_intron) <- col_name_intron




# ------ merge peak in splice site and intron
peak_in_S2_se_SS <- peak_in_S2_se_SS[,c("peak_chr","peak_start","peak_end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","target_start","target_end","GeneID","ID","gene_symbol","strand")]
peak_in_S2_ri_SS <- peak_in_S2_ri_SS[,c("peak_chr","peak_start","peak_end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","target_start","target_end","GeneID","ID","gene_symbol","strand")]
peak_in_S2_a5ss_SS <- peak_in_S2_a5ss_SS[,c("peak_chr","peak_start","peak_end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","target_start","target_end","GeneID","ID","gene_symbol","strand")]
peak_in_S2_a3ss_SS <- peak_in_S2_a3ss_SS[,c("peak_chr","peak_start","peak_end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","target_start","target_end","GeneID","ID","gene_symbol","strand")]
peak_in_S2_mxe_SS <- peak_in_S2_mxe_SS[,c("peak_chr","peak_start","peak_end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","target_start","target_end","GeneID","ID","gene_symbol","strand")]

peak_SE_S2 <- rbind(peak_in_S2_se_SS,peak_in_S2_se_intron)
peak_RI_S2 <- rbind(peak_in_S2_ri_SS,peak_in_S2_ri_intron)
peak_A5SS_S2 <- rbind(peak_in_S2_a5ss_SS,peak_in_S2_a5ss_intron)
peak_A3SS_S2 <- rbind(peak_in_S2_a3ss_SS,peak_in_S2_a3ss_intron)
peak_MXE_S2 <- rbind(peak_in_S2_mxe_SS,peak_in_S2_mxe_intron)

# filter
peak_SE_S2 <- peak_SE_S2[peak_SE_S2$peak.strand == peak_SE_S2$strand,]
peak_RI_S2 <- peak_RI_S2[peak_RI_S2$peak.strand == peak_RI_S2$strand,]
peak_A5SS_S2 <- peak_A5SS_S2[peak_A5SS_S2$peak.strand == peak_A5SS_S2$strand,]
peak_A3SS_S2 <- peak_A3SS_S2[peak_A3SS_S2$peak.strand == peak_A3SS_S2$strand,]
peak_MXE_S2 <- peak_MXE_S2[peak_MXE_S2$peak.strand == peak_MXE_S2$strand,]

setwd("E:/220625_PC/R workplace/220320_SXL/202404_Fig/250318/")
library(openxlsx)
wb <- createWorkbook()
addWorksheet(wb, "peak_SE_S2")
addWorksheet(wb, "peak_RI_S2")
addWorksheet(wb, "peak_A5SS_S2")
addWorksheet(wb, "peak_A3SS_S2")
addWorksheet(wb, "peak_MXE_S2")
writeData(wb,"peak_SE_S2",peak_SE_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_RI_S2",peak_RI_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_A5SS_S2",peak_A5SS_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_A3SS_S2",peak_A3SS_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_MXE_S2",peak_MXE_S2,startCol = 1,startRow = 1,rowNames = F)
saveWorkbook(wb, file='peak_in_AS_S2.xlsx')


#------ AS-events
setwd("E:/220625_PC/R workplace/220320_SXL/220421_SXL.S2/AS/230806_AS/raw_gtf/novel/")
S2_se_novelSS <- read.table("SE.MATS.JC.txt",header = T)
S2_a3ss_novelSS <- read.table("A3SS.MATS.JC.txt",header = T)
S2_a5ss_novelSS <- read.table("A5SS.MATS.JC.txt",header = T)
S2_mxe_novelSS <- read.table("MXE.MATS.JC.txt",header = T)
setwd("E:/220625_PC/R workplace/220320_SXL/220421_SXL.S2/AS/230806_AS/new/without_novel/")
S2_ri_novelSS <- read.table("RI.MATS.JC.txt",header = T)

library(stringr)
S2_se_novelSS$SJC_WT_1 <- str_split(S2_se_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,1]
S2_se_novelSS$SJC_WT_2 <- str_split(S2_se_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,2]
S2_se_novelSS$SJC_WT_3 <- str_split(S2_se_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,3]
S2_se_novelSS$SJC_Sxl_1 <- str_split(S2_se_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,1]
S2_se_novelSS$SJC_Sxl_2 <- str_split(S2_se_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,2]
S2_se_novelSS$SJC_Sxl_3 <- str_split(S2_se_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,3]

S2_ri_novelSS$SJC_WT_1 <- str_split(S2_ri_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,1]
S2_ri_novelSS$SJC_WT_2 <- str_split(S2_ri_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,2]
S2_ri_novelSS$SJC_WT_3 <- str_split(S2_ri_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,3]
S2_ri_novelSS$SJC_Sxl_1 <- str_split(S2_ri_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,1]
S2_ri_novelSS$SJC_Sxl_2 <- str_split(S2_ri_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,2]
S2_ri_novelSS$SJC_Sxl_3 <- str_split(S2_ri_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,3]

S2_a5ss_novelSS$SJC_WT_1 <- str_split(S2_a5ss_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,1]
S2_a5ss_novelSS$SJC_WT_2 <- str_split(S2_a5ss_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,2]
S2_a5ss_novelSS$SJC_WT_3 <- str_split(S2_a5ss_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,3]
S2_a5ss_novelSS$SJC_Sxl_1 <- str_split(S2_a5ss_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,1]
S2_a5ss_novelSS$SJC_Sxl_2 <- str_split(S2_a5ss_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,2]
S2_a5ss_novelSS$SJC_Sxl_3 <- str_split(S2_a5ss_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,3]

S2_a3ss_novelSS$SJC_WT_1 <- str_split(S2_a3ss_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,1]
S2_a3ss_novelSS$SJC_WT_2 <- str_split(S2_a3ss_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,2]
S2_a3ss_novelSS$SJC_WT_3 <- str_split(S2_a3ss_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,3]
S2_a3ss_novelSS$SJC_Sxl_1 <- str_split(S2_a3ss_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,1]
S2_a3ss_novelSS$SJC_Sxl_2 <- str_split(S2_a3ss_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,2]
S2_a3ss_novelSS$SJC_Sxl_3 <- str_split(S2_a3ss_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,3]

S2_mxe_novelSS$SJC_WT_1 <- str_split(S2_mxe_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,1]
S2_mxe_novelSS$SJC_WT_2 <- str_split(S2_mxe_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,2]
S2_mxe_novelSS$SJC_WT_3 <- str_split(S2_mxe_novelSS$SJC_SAMPLE_1,"\\,",simplify = T)[,3]
S2_mxe_novelSS$SJC_Sxl_1 <- str_split(S2_mxe_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,1]
S2_mxe_novelSS$SJC_Sxl_2 <- str_split(S2_mxe_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,2]
S2_mxe_novelSS$SJC_Sxl_3 <- str_split(S2_mxe_novelSS$SJC_SAMPLE_2,"\\,",simplify = T)[,3]


#
S2_se_novelSS <- merge(S2_se_novelSS,genome.anno, by="GeneID", all.x=TRUE)
S2_ri_novelSS <- merge(S2_ri_novelSS,genome.anno, by="GeneID", all.x=TRUE)
S2_a5ss_novelSS <- merge(S2_a5ss_novelSS,genome.anno, by="GeneID", all.x=TRUE)
S2_a3ss_novelSS <- merge(S2_a3ss_novelSS,genome.anno, by="GeneID", all.x=TRUE)
S2_mxe_novelSS <- merge(S2_mxe_novelSS, genome.anno, by="GeneID", all.x=TRUE)

S2_se_diff_up <- S2_se_novelSS[S2_se_novelSS$FDR < 0.05 & S2_se_novelSS$IncLevelDifference > 0.05,]
S2_se_diff_down <- S2_se_novelSS[S2_se_novelSS$FDR < 0.05 & S2_se_novelSS$IncLevelDifference < -0.05,]
S2_ri_diff_up <- S2_ri_novelSS[S2_ri_novelSS$FDR < 0.05 & S2_ri_novelSS$IncLevelDifference > 0.05,]
S2_ri_diff_down <- S2_ri_novelSS[S2_ri_novelSS$FDR < 0.05 & S2_ri_novelSS$IncLevelDifference < -0.05,]
S2_a3ss_diff_up <- S2_a3ss_novelSS[S2_a3ss_novelSS$FDR < 0.05 & S2_a3ss_novelSS$IncLevelDifference > 0.05,]
S2_a3ss_diff_down <- S2_a3ss_novelSS[S2_a3ss_novelSS$FDR < 0.05 & S2_a3ss_novelSS$IncLevelDifference < -0.05,]
S2_a5ss_diff_up <- S2_a5ss_novelSS[S2_a5ss_novelSS$FDR < 0.05 & S2_a5ss_novelSS$IncLevelDifference > 0.05,]
S2_a5ss_diff_down <- S2_a5ss_novelSS[S2_a5ss_novelSS$FDR < 0.05 & S2_a5ss_novelSS$IncLevelDifference < -0.05,]
S2_mxe_diff_up <- S2_mxe_novelSS[S2_mxe_novelSS$FDR < 0.05 & S2_mxe_novelSS$IncLevelDifference > 0.05,]
S2_mxe_diff_down <- S2_mxe_novelSS[S2_mxe_novelSS$FDR < 0.05 & S2_mxe_novelSS$IncLevelDifference < -0.05,]

S2_se_diff_up_filter <- S2_se_diff_up[apply(data.frame(as.numeric(S2_se_diff_up$SJC_WT_1),
as.numeric(S2_se_diff_up$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_se_diff_up$SJC_Sxl_1),
as.numeric(S2_se_diff_up$SJC_Sxl_2)),
1, mean) > 5,]
S2_se_diff_down_filter <- S2_se_diff_down[apply(data.frame(as.numeric(S2_se_diff_down$SJC_WT_1),
as.numeric(S2_se_diff_down$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_se_diff_down$SJC_Sxl_1),
as.numeric(S2_se_diff_down$SJC_Sxl_2)),
1, mean) > 5,]

S2_ri_diff_up_filter <- S2_ri_diff_up[apply(data.frame(as.numeric(S2_ri_diff_up$SJC_WT_1),
as.numeric(S2_ri_diff_up$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_ri_diff_up$SJC_Sxl_1),
as.numeric(S2_ri_diff_up$SJC_Sxl_2)),
1, mean) > 5,]
S2_ri_diff_down_filter <- S2_ri_diff_down[apply(data.frame(as.numeric(S2_ri_diff_down$SJC_WT_1),
as.numeric(S2_ri_diff_down$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_ri_diff_down$SJC_Sxl_1),
as.numeric(S2_ri_diff_down$SJC_Sxl_2)),
1, mean) > 5,]

S2_a3ss_diff_up_filter <- S2_a3ss_diff_up[apply(data.frame(as.numeric(S2_a3ss_diff_up$SJC_WT_1),
as.numeric(S2_a3ss_diff_up$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_a3ss_diff_up$SJC_Sxl_1),
as.numeric(S2_a3ss_diff_up$SJC_Sxl_2)),
1, mean) > 5,]
S2_a3ss_diff_down_filter <- S2_a3ss_diff_down[apply(data.frame(as.numeric(S2_a3ss_diff_down$SJC_WT_1),
as.numeric(S2_a3ss_diff_down$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_a3ss_diff_down$SJC_Sxl_1),
as.numeric(S2_a3ss_diff_down$SJC_Sxl_2)),
1, mean) > 5,]

S2_a5ss_diff_up_filter <- S2_a5ss_diff_up[apply(data.frame(as.numeric(S2_a5ss_diff_up$SJC_WT_1),
as.numeric(S2_a5ss_diff_up$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_a5ss_diff_up$SJC_Sxl_1),
as.numeric(S2_a5ss_diff_up$SJC_Sxl_2)),
1, mean) > 5,]
S2_a5ss_diff_down_filter <- S2_a5ss_diff_down[apply(data.frame(as.numeric(S2_a5ss_diff_down$SJC_WT_1),
as.numeric(S2_a5ss_diff_down$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_a5ss_diff_down$SJC_Sxl_1),
as.numeric(S2_a5ss_diff_down$SJC_Sxl_2)),
1, mean) > 5,]

S2_mxe_diff_up_filter <- S2_mxe_diff_up[apply(data.frame(as.numeric(S2_mxe_diff_up$SJC_WT_1),
as.numeric(S2_mxe_diff_up$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_mxe_diff_up$SJC_Sxl_1),
as.numeric(S2_mxe_diff_up$SJC_Sxl_2)),
1, mean) > 5,]
S2_mxe_diff_down_filter <- S2_mxe_diff_down[apply(data.frame(as.numeric(S2_mxe_diff_down$SJC_WT_1),
as.numeric(S2_mxe_diff_down$SJC_WT_2)),
1, mean) > 5 |
apply(data.frame(as.numeric(S2_mxe_diff_down$SJC_Sxl_1),
as.numeric(S2_mxe_diff_down$SJC_Sxl_2)),
1, mean) > 5,]




# ------ peak in AS-events-region
S2_diff_se_up_peak <- S2_se_diff_up_filter[S2_se_diff_up_filter$ID %in% peak_SE_S2$ID,]
S2_diff_se_down_peak <- S2_se_diff_down_filter[S2_se_diff_down_filter$ID %in% peak_SE_S2$ID,]

S2_diff_ri_up_peak <- S2_ri_diff_up_filter[S2_ri_diff_up_filter$ID %in% peak_RI_S2$ID,]
S2_diff_ri_down_peak <- S2_ri_diff_down_filter[S2_ri_diff_down_filter$ID %in% peak_RI_S2$ID,]

S2_diff_a5ss_up_peak <- S2_a5ss_diff_up_filter[S2_a5ss_diff_up_filter$ID %in% peak_A5SS_S2$ID,]
S2_diff_a5ss_down_peak <- S2_a5ss_diff_down_filter[S2_a5ss_diff_down_filter$ID %in% peak_A5SS_S2$ID,]

S2_diff_a3ss_up_peak <- S2_a3ss_diff_up_filter[S2_a3ss_diff_up_filter$ID %in% peak_A3SS_S2$ID,]
S2_diff_a3ss_down_peak <- S2_a3ss_diff_down_filter[S2_a3ss_diff_down_filter$ID %in% peak_A3SS_S2$ID,]

S2_diff_mxe_up_peak <- S2_mxe_diff_up_filter[S2_mxe_diff_up_filter$ID %in% peak_MXE_S2$ID,]
S2_diff_mxe_down_peak <- S2_mxe_diff_down_filter[S2_mxe_diff_down_filter$ID %in% peak_MXE_S2$ID,]


#
peak_S2_diff_se_up <- peak_SE_S2[peak_SE_S2$ID %in% S2_se_diff_up_filter$ID,]
peak_S2_diff_se_down <- peak_SE_S2[peak_SE_S2$ID %in% S2_se_diff_down_filter$ID,]

peak_S2_diff_ri_up <- peak_RI_S2[peak_RI_S2$ID %in% S2_ri_diff_up_filter$ID,]
peak_S2_diff_ri_down <- peak_RI_S2[peak_RI_S2$ID %in% S2_ri_diff_down_filter$ID,]

peak_S2_diff_a3ss_up <- peak_A3SS_S2[peak_A3SS_S2$ID %in% S2_a3ss_diff_up_filter$ID,]
peak_S2_diff_a3ss_down <- peak_A3SS_S2[peak_A3SS_S2$ID %in% S2_a3ss_diff_down_filter$ID,]

peak_S2_diff_a5ss_up <- peak_A5SS_S2[peak_A5SS_S2$ID %in% S2_a5ss_diff_up_filter$ID,]
peak_S2_diff_a5ss_down <- peak_A5SS_S2[peak_A5SS_S2$ID %in% S2_a5ss_diff_down_filter$ID,]

peak_S2_diff_mxe_up <- peak_MXE_S2[peak_MXE_S2$ID %in% S2_mxe_diff_up_filter$ID,]
peak_S2_diff_mxe_down <- peak_MXE_S2[peak_MXE_S2$ID %in% S2_mxe_diff_down_filter$ID,]


peak_S2_se_up <- peak_S2_diff_se_up[!duplicated(peak_S2_diff_se_up$peak.xuhao),]
peak_S2_se_down <- peak_S2_diff_se_down[!duplicated(peak_S2_diff_se_down$peak.xuhao),]

peak_S2_ri_up <- peak_S2_diff_ri_up[!duplicated(peak_S2_diff_ri_up$peak.xuhao),]
peak_S2_ri_down <- peak_S2_diff_ri_down[!duplicated(peak_S2_diff_ri_down$peak.xuhao),]

peak_S2_a3ss_up <- peak_S2_diff_a3ss_up[!duplicated(peak_S2_diff_a3ss_up$peak.xuhao),]
peak_S2_a3ss_down <- peak_S2_diff_a3ss_down[!duplicated(peak_S2_diff_a3ss_down$peak.xuhao),]

peak_S2_a5ss_up <- peak_S2_diff_a5ss_up[!duplicated(peak_S2_diff_a5ss_up$peak.xuhao),]
peak_S2_a5ss_down <- peak_S2_diff_a5ss_down[!duplicated(peak_S2_diff_a5ss_down$peak.xuhao),]

peak_S2_mxe_up <- peak_S2_diff_mxe_up[!duplicated(peak_S2_diff_mxe_up$peak.xuhao),]
peak_S2_mxe_down <- peak_S2_diff_mxe_down[!duplicated(peak_S2_diff_mxe_down$peak.xuhao),]


peak_S2_AS <- rbind(peak_S2_se_up,peak_S2_se_down,
peak_S2_ri_up,peak_S2_ri_down,
peak_S2_a3ss_up,peak_S2_a3ss_down,
peak_S2_a5ss_up,peak_S2_a5ss_down,
peak_S2_mxe_up ,peak_S2_mxe_down)


peak_S2_AS_gene <- peak_S2_AS[!duplicated(peak_S2_AS$GeneID),]


setwd("E:/220625_PC/R workplace/220320_SXL/202404_Fig/260225_peak_in_AS/")
library(openxlsx)
wb <- createWorkbook()
addWorksheet(wb, "peak_S2_AS")
writeData(wb,"peak_S2_AS",peak_S2_AS,startCol = 1,startRow = 1,rowNames = F)
saveWorkbook(wb, file='20260225_peak_in_AS_S2.xlsx')

write.csv(peak_S2_AS_gene,file = "peak_S2_AS_gene.csv")




# ------ peak in AS-events-genes

# peak
setwd("E:/220625_PC/R workplace/220320_SXL/Fan.data/sxl_peak_bind_DEGs/20220328/")
peak_intersect_feature <- read.table("Sxl-bind-feature.txt",header = F)
head(peak_intersect_feature)
colnames(peak_intersect_feature) <- c("peak.chr","peak.strat","peak.end","peak.xuhao","log10Pv","peak.strand","siteScore","V8",
"chr","start","end","V12","ID_feature","strand")
peak_feature <- peak_intersect_feature
library(stringr)
peak_feature$GeneID <- str_split(peak_feature$ID_feature,"\\_",simplify = T)[,1]
peak_feature$transID <- str_split(peak_feature$ID_feature,"\\_",simplify = T)[,2]
peak_feature$feature <- str_split(peak_feature$ID_feature,"\\_",simplify = T)[,3]

peak_feature <- peak_feature[peak_feature$strand == peak_feature$peak.strand,]

peak_feature$pos <- paste0(peak_feature$peak.chr,":",peak_feature$peak.strat)
peak_feature$pos <- paste0(peak_feature$pos,"-",peak_feature$peak.end)
peak_gene <- peak_feature[!duplicated(peak_feature$GeneID),]



#
S2_diff_se_up_peak.gene <- S2_se_diff_up_filter[S2_se_diff_up_filter$GeneID %in% peak_gene$GeneID,]
S2_diff_se_down_peak.gene <- S2_se_diff_down_filter[S2_se_diff_down_filter$GeneID %in% peak_gene$GeneID,]

S2_diff_ri_up_peak.gene <- S2_ri_diff_up_filter[S2_ri_diff_up_filter$GeneID %in% peak_gene$GeneID,]
S2_diff_ri_down_peak.gene <- S2_ri_diff_down_filter[S2_ri_diff_down_filter$GeneID %in% peak_gene$GeneID,]

S2_diff_a3ss_up_peak.gene <- S2_a3ss_diff_up_filter[S2_a3ss_diff_up_filter$GeneID %in% peak_gene$GeneID,]
S2_diff_a3ss_down_peak.gene <- S2_a3ss_diff_down_filter[S2_a3ss_diff_down_filter$GeneID %in% peak_gene$GeneID,]

S2_diff_a5ss_up_peak.gene <- S2_a5ss_diff_up_filter[S2_a5ss_diff_up_filter$GeneID %in% peak_gene$GeneID,]
S2_diff_a5ss_down_peak.gene <- S2_a5ss_diff_down_filter[S2_a5ss_diff_down_filter$GeneID %in% peak_gene$GeneID,]

S2_diff_mxe_up_peak.gene <- S2_mxe_diff_up_filter[S2_mxe_diff_up_filter$GeneID %in% peak_gene$GeneID,]
S2_diff_mxe_down_peak.gene <- S2_mxe_diff_down_filter[S2_mxe_diff_down_filter$GeneID %in% peak_gene$GeneID,]

setwd("E:/220625_PC/R workplace/220320_SXL/202404_Fig/250318/")
library(openxlsx)
wb <- createWorkbook()
addWorksheet(wb, "peak_SE_S2")
addWorksheet(wb, "peak_RI_S2")
addWorksheet(wb, "peak_A5SS_S2")
addWorksheet(wb, "peak_A3SS_S2")
addWorksheet(wb, "peak_MXE_S2")
writeData(wb,"peak_SE_S2",peak_SE_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_RI_S2",peak_RI_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_A5SS_S2",peak_A5SS_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_A3SS_S2",peak_A3SS_S2,startCol = 1,startRow = 1,rowNames = F)
writeData(wb,"peak_MXE_S2",peak_MXE_S2,startCol = 1,startRow = 1,rowNames = F)
saveWorkbook(wb, file='peak_in_AS_S2.xlsx')


RNA Binding sites in AS-events
https://www.lianganmin.cn/2026/02/26/20260226-RNA-Binding-sites-in-AS-events/
Author
An-min
Posted on
February 26, 2026
Licensed under