R数据分析（2）–孟德尔随机化

R数据分析（2）–孟德尔随机化

|

2024-8-05 17:06

本文最后更新于 310 天前，其中的信息可能已经有所发展或是发生改变。

简介：批量两样本孟德尔随机化、中介孟德尔随机化及多变量孟德尔随机化代码

确认GWAS网站令牌

Sys.getenv("HOME") #查找根目录，更新api
ieugwasr::get_opengwas_jwt() #查看令牌
ieugwasr::api_status() #查看api状态

抽取工具变量

# 设定暴露id
exposure_datc <- c('ieu-a-31')# 按需添加或减少更多的outcome

exposure_dats <- list()
for (i in seq_along(exposure_datc)) {
  exposure_dat<-extract_instruments(outcomes=exposure_datc[i],
                                    p1=5e-05, #设定选取的相关性p值，默认为-08
                                    clump=TRUE, r2=0.001, #去掉连锁不平衡
                                    kb=10000,access_token= NULL)
  exposure_dats[[i]] <- exposure_dat}


# 筛除弱工具变量
#查看并剔除暴露1

get_f<-function(dat,F_value=10){
  log<-is.na(dat$eaf.exposure)
  log<-unique(log)
  if(length(log)==1)
  {if(log==TRUE){
    print("数据不包含eaf，无法计算F统计量")
    return(dat)}
  }
  if(is.null(dat$beta.exposure[1])==T || is.na(dat$beta.exposure[1])==T){print("数据不包含beta，无法计算F统计量")
    return(dat)}
  if(is.null(dat$se.exposure[1])==T || is.na(dat$se.exposure[1])==T){print("数据不包含se，无法计算F统计量")
    return(dat)}
  if(is.null(dat$samplesize.exposure[1])==T || is.na(dat$samplesize.exposure[1])==T){print("数据不包含samplesize(样本量)，无法计算F统计量")
    return(dat)}
  
  
  if("FALSE"%in%log && is.null(dat$beta.exposure[1])==F && is.na(dat$beta.exposure[1])==F && is.null(dat$se.exposure[1])==F && is.na(dat$se.exposure[1])==F && is.null(dat$samplesize.exposure[1])==F && is.na(dat$samplesize.exposure[1])==F){
    R2<-(2*(1-dat$eaf.exposure)*dat$eaf.exposure*(dat$beta.exposure^2))/((2*(1-dat$eaf.exposure)*dat$eaf.exposure*(dat$beta.exposure^2))+(2*(1-dat$eaf.exposure)*dat$eaf.exposure*(dat$se.exposure^2)*dat$samplesize.exposure))
    F<- (dat$samplesize.exposure-2)*R2/(1-R2)
    dat$R2<-R2
    dat$F<-F
    dat<-subset(dat,F>F_value)
    delnumber <- nrow(subset(dat,F<=F_value))
    print(paste("删除了",delnumber,"个弱工具变量"))
    return(dat)
  }
}



# 补充样本量（如缺失）
samplesize <- c(2505,2505,2505,2625,2625)
for (i in seq_along(exposure_datc)) {
  exposure_dats[[i]]$samplesize.exposure <- samplesize[i]}


# 过滤弱工具变量
for ( i in seq_along(exposure_dats)){
  exposure_dats[[i]] <- get_f(exposure_dats[[i]],F_value=10)
}

提取结果数据

# 设定结局id

outcome_dats <- c("finn-b-R18_ABNORMAL_SPERMATOZ","finn-b-N14_MALEINFERT")

result_dats <- list()
for (i in seq_along(exposure_dats)) {
  current_results <- list()
  for (n in seq_along(outcome_dats)){
    # 提取当前 exposure_dat 的 SNP 列
    exposure_dat <-  exposure_dats[[i]]
    snps <- exposure_dats[[i]]$SNP
    outcome <- outcome_dats[n]
    # 根据 SNP 列生成对应的 outcome_dat，并将其存储到 outcome_dats 列表中
    result_dat<- extract_outcome_data(snps = snps,
                                      outcome = outcome, 
                                      proxies = FALSE,
                                      maf_threshold = 0.01,
                                      access_token = NULL)
    # 效应等位与效应量保持统一
    if (is.null(result_dat)) {
      next
    }
    adjresult_dat<- harmonise_data(exposure_dat = exposure_dat,
                                   outcome_dat =  result_dat)
    current_results[[paste0("result_dat", i, "_", n)]] <- adjresult_dat
    
  }
  result_dats[[paste0("result_dat", i)]] <- current_results
}

MR分析

result_res <- list()
for (i in names(result_dats)){
  current_results <- list()
  for(n in names(result_dats[[i]])){
    res <- mr(result_dats[[i]][[n]])
    res[,"dataid"] <- i
    res[,"samplesize.exposure"] <- result_dats[[i]][[n]][1,"samplesize.exposure"]
    res[,"samplesize.outcome"] <- result_dats[[i]][[n]][1,"samplesize.outcome"]
    res$lcl <- res$b - (1.96 * res$se)
    res$ucl <- res$b + (1.96 * res$se)
    res$or <- exp(res$b)
    res$orlcl <- exp(res$lcl)
    res$orucl <- exp(res$ucl)
    current_results[[n]] <- res
  }
  result_res[[i]] <- current_results 
}


# 合并列表的结果按照一个暴露数据一张表导出为excel
wb <- createWorkbook()

# 循环遍历 result_dats 中的每个数据框，并将其导出到工作簿的不同工作表中
for (i in names(result_res)) {
  combined_data <- do.call(bind_rows, result_res[[i]])
  
  # 将 combined_data 导出到工作簿的一个工作表中
  addWorksheet(wb, sheetName = i)
  writeData(wb, sheet = i, x = combined_data)
}

# 将工作簿保存为 Excel 文件
saveWorkbook(wb, "res结果.xlsx", overwrite = TRUE)

异质性检验

# 4.1mr_presso来找出和处理离群值（也有检测异质性的功能）
for (i in names(result_dats)){
  for(n in names(result_dats[[i]])){
    if (nrow(result_dats[[i]][[n]]) < 3) {
      next  # 如果观测数小于2则无法进行presso分析跳过当前循环
    }
    t <- run_mr_presso(result_dats[[i]][[n]],NbDistribution = 1000)
    indice <- t[[1]][["MR-PRESSO results"]][["Distortion Test"]][["Outliers Indices"]]
    print(indice)
    if (is.null(indice)) {
      next  # 如果未发现离群值则不输出
    }
    print(result_dats[[i]][[n]][indice,])
  }}


# 删除上面输出提示的异常值
result_dats[["result_dat7"]][["result_dat7_2"]] <- result_dats[["result_dat7"]][["result_dat7_2"]][-indice,]

# 如发现异常SNP剔除异常值后再重新跑上面的mr分析


# 4.2异质性检验 mr_heterogeneity和水平多效性检验
result_heterogeneity <- list()
for (i in names(result_dats)){
  current_results <- list()
  for(n in names(result_dats[[i]])){
    if (nrow(result_dats[[i]][[n]]) < 3) {
      next  # 如果观测数小于3则无法进行异质性检验分析跳过当前循环
    }
    a <- mr_heterogeneity(result_dats[[i]][[n]])# p＜0.05表明存在异质性
    b <- mr_pleiotropy_test(result_dats[[i]][[n]])
    result <- bind_cols(a[1,],b[,c(5:7)])
    current_results[[n]] <- result
  }
  result_heterogeneity[[i]] <- current_results
}



# 合并列表的结果按照一个暴露数据一张表导出为excel

wb <- createWorkbook()

# 循环遍历 result_dats 中的每个数据框，并将其导出到工作簿的不同工作表中
for (i in names(result_heterogeneity)) {
  combined_data <- do.call(bind_rows, result_heterogeneity[[i]])
  
  # 将 combined_data 导出到工作簿的一个工作表中
  addWorksheet(wb, sheetName = i)
  writeData(wb, sheet = i, x = combined_data)
}

# 将工作簿保存为 Excel 文件
saveWorkbook(wb, "暴露结局_heterogeneity.xlsx", overwrite = TRUE)

绘图

# 创建存储文件夹
dir.create(file.path("figures"))
exposures <- c("暴露1") #这里根据实际暴露数量更改
chart_types <- c("散点图", "漏斗图", "留一森林图","森林图")
for (exposure in exposures) {
  # 创建暴露目录
  exposure_dir <- file.path("figures", exposure)
  dir.create(exposure_dir)
  
  # 对于每种图表类型，创建一个子目录
  for (chart_type in chart_types) {
    chart_dir <- file.path(exposure_dir, chart_type)
    dir.create(chart_dir)
  }
}

# 散点图
for (i in seq_along(result_dats)){
  current_results <- list()
  for(n in names(result_dats[[i]])){
    file_name <- paste("figures/暴露", i,"/散点图/",n,".png", sep = "")
    p1 <- mr_scatter_plot(result_res[[i]][[n]], result_dats[[i]][[n]])
    ggsave(file_name, p1[[1]], width = 8, height = 6)
  }
}


# 漏斗图
for (i in seq_along(result_dats)){
  current_results <- list()
  for(n in names(result_dats[[i]])){
    file_name <- paste("figures/暴露", i,"/漏斗图/",n,".png", sep = "")
    p2 <- mr_funnel_plot(singlesnp_results = mr_singlesnp(result_dats[[i]][[n]]))
    ggsave(file_name, p2[[1]], width = 8, height = 6)
  }
}

#5.3森林图
#5.3.1留一分析的森林图

for (i in seq_along(result_dats)){
  current_results <- list()
  for(n in names(result_dats[[i]])){
    file_name <- paste("figures/暴露", i,"/留一森林图/",n,".png", sep = "")
    res_loo <- mr_leaveoneout(result_dats[[i]][[n]])
    p3 <- mr_leaveoneout_plot(res_loo) 
    ggsave(file_name, p3[[1]], width = 8, height = 6)
  }
}

# 森林图
for (i in seq_along(result_dats)){
  current_results <- list()
  for(n in names(result_dats[[i]])){
    file_name <- paste("figures/暴露", i,"/森林图/",n,".png", sep = "")
    res_single <- mr_singlesnp(result_dats[[i]][[n]])
    p4 <- mr_forest_plot(res_single) 
    ggsave(file_name, p4[[1]], width = 8, height = 6)
  }
}
# 森林图美化版

bc <- read.xlsx("肠炎肠菌生殖孟德尔随机化/森林图.xlsx",1)


## 处理数据为数值型并调整小数位数

bc$se<-round(as.numeric(bc$se),8)
bc$or<-round(as.numeric(bc$or),3)
bc$orlcl<-round(as.numeric(bc$orlcl),3)
bc$orucl<-round(as.numeric(bc$orucl),3)
bc$pval<-round(as.numeric(bc$pval),3)
# 调整outcome列的缩进
bc$outcome<- ifelse(!is.na(bc$samplesize.outcome), bc$outcome, paste0("   ", bc$outcome))
########把缺失值设置为空白以避免图形中显示为NA不美观
bc$samplesize.outcome<- ifelse(is.na(bc$samplesize.outcome), "", bc$samplesize.outcome)
bc$pval <- ifelse(is.na(bc$pval), "", bc$pval)
bc$outcome <- ifelse(is.na(bc$outcome), "", bc$outcome)
########创建一个空白列用于放图形
bc$` ` <- paste(rep(" ", 20), collapse = " ")
########创建新列名OR (95% CI)
bc$`OR (95% CI)` <- ifelse(is.na(bc$se), "",
                           sprintf("%.3f (%.3f - %.3f)",
                                   bc$or, bc$orlcl, bc$orucl))#sprintF返回字符和可变量组合

tm <- forest_theme(base_size = 10,  #文本的大小
                   # Confidence interval point shape, line type/color/width
                   ci_pch = 15,   #可信区间点的形状
                   ci_col = "#762a83",    #CI的颜色
                   ci_fill = "#0000FF80",     #ci颜色填充
                   ci_alpha = 0.8,        #ci透明度
                   ci_lty = 1,            #CI的线型
                   ci_lwd = 2,          #CI的线宽
                   ci_Theight = 0.2, # Set an T end at the end of CI  ci的高度，默认是NULL
                   # Reference line width/type/color   参考线默认的参数，中间的竖的虚线
                   refline_lwd = 1,       #中间的竖的虚线
                   refline_lty = "dashed",
                   refline_col = "grey20",
                   # Vertical line width/type/color  垂直线宽/类型/颜色   可以添加一条额外的垂直线，如果没有就不显示
                   vertline_lwd = 1,              #可以添加一条额外的垂直线，如果没有就不显示
                   vertline_lty = "dashed",
                   vertline_col = "grey20",
                   # Change summary color for filling and borders   更改填充和边框的摘要颜色
                   summary_fill = "yellow",       #汇总部分大菱形的颜色
                   summary_col = "#4575b4",
                   # Footnote font size/face/color  脚注字体大小/字体/颜色
                   footnote_cex = 0.6,
                   footnote_fontface = "italic",
                   footnote_col = "red")



forestplot <- forest(bc[,c(1,2,10,11,6)],   #这里分别对应图形1-5列的显示数值，第三列为空白用来画图
                     est = bc$or,       #效应值
                     lower = bc$orlcl,     #可信区间下限
                     upper = bc$orucl,      #可信区间上限
                     # sizes = bc$se,  #(感觉这里可加可不加,如加则取消注释)
                     ci_column = 3.2,   #在那一列画森林图，要选空的那一列
                     ref_line = 1,
                     arrow_lab = c(" ", " "),
                     xlim = c(0, 2),  #森林图轴的范围
                     ticks_at = c(0, 0.5, 1, 1.5, 2), #森林图刻度
                     footnote = "OR, odds ratio; IVX, inverse variance weighting; CI, confidence interval.",
                     theme = tm)


ggsave("森林图.png",forestplot, width = 8, height = 6)

forestploter twosampleMR

暂无评论

发送评论编辑评论

Markdown

|´・ω・)ノ

ヾ(≧∇≦*)ゝ

(☆ω☆)

（╯‵□′）╯︵┴─┴

￣﹃￣

(/ω＼)

∠( ᐛ 」∠)＿

(๑•̀ㅁ•́ฅ)

→_→

୧(๑•̀⌄•́๑)૭

٩(ˊᗜˋ*)و

(ノ°ο°)ノ

(´இ皿இ｀)

⌇●﹏●⌇

(ฅ´ω`ฅ)

(╯°A°)╯︵○○○

φ(￣∇￣o)

ヾ(´･･｀｡)ノ"

( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃

(ó﹏ò｡)

Σ(っ °Д °;)っ

( ,,´･ω･)ﾉ"(´っω･｀｡)

╮(╯▽╰)╭

o(*////▽////*)q

＞﹏＜

( ๑´•ω•) "(ㆆᴗㆆ)

颜文字

Emoji

小恐龙

花!