简介:gtsummary包可实现快速常用论文表
详细参数设置参考官方网址
基线表tbl_summary
1.基线表
baseline_vars <- c("group","a1","a2") #指定要分析的变量
specific_vars <- c("a2") #指定非正态变量
t1 <- data %>% select(baseline_vars) %>% #选择需要分析的变量
tbl_summary(by = group, #指定分组变量
missing="no", #指定是否显示变量的缺失值个数
statistic = list(#连续性变量的统计量设定
all_continuous() ~ "{mean}({sd})", #连续性变量的统计量设定
specific_vars ~ "{median}({p25},{p75})",
all_categorical() ~ "{n}({p}%)"),#分类变量的统计量设定
digits = list(all_continuous() ~ 2,
all_categorical()~c(0,2))) %>% #设定分类统计量的小数位数
add_overall() %>%
add_p(test=list(all_continuous()~"t.test", #这里不设置test参数则为默认的秩和检验
specific_vars ~"wilcox.test"),
pvalue_fun = ~ style_pvalue(.x, digits = 3)) %>%
modify_fmt_fun(statistic ~label_style_number(digits=2)) %>% #调整统计量显示数字位数
modify_header(statistic ~ "**Test Statistic**") #添加检验统计量
t1
2.结果导出
#结果输出为excel表
t1 %>% as_hux_xlsx(file = 'table1.xlsx')
#将结果转为word表
t1 %>% as_gt() %>% gt::gtsave("table1.docx")
#转换为R数据集以便进一步处理
table1 <- t1 %>% as_tibble()
3.拓展(自定义函数)
mytest1 <- function(data, variable, ...) {
shapiro.test(data[[variable]])$p.value
}
add_stat(fns=all_continuous()~mytest1) %>%
modify_header(add_stat_1 = "**shapiro.test p**") #添加正态性检验
#添加另一种输出z值的wilcoxon结果同理
mytest3 <- function(data, variable,by, ...) {
wilcox_test(data[[variable]] ~ data[[by]])@statistic@teststatistic
}
mytest4 <- function(data, variable,by, ...) {
pvalue(wilcox_test(data[[variable]] ~ data[[by]]))
}
语法简介:
“select”选择需要被分析的变量(包括分组变量)
tbl_summary函数语法
“by=”指定分组变量
“statistic=”指定变量的统计值,“all_continuous() ~”指定连续性变量,“all_categorical()”指定分类变量
“digits =”指定显示的统计量的小数位数,如要分别指定可以用向量的形式
“add_p(test=all_continuous()~”t.test”,pvalue_fun = ~ style_pvalue(.x, digits = 3))”添加p值,指定检验方法和p值的小数位数
add_difference(test=all_continuous() ~”t.test”,estimate=all_continuous()~ label_style_sigfig(digits = 4),pvalue_fun = ~ style_pvalue(.x, digits = 3)) #多增加了差值的置信区间
“add_overall()”指定添加汇总列
“add_stat_label”添加统计量显示标签
“modify_header(statistic ~ “Test Statistic”)”添加检验统计量
add_stat(fns=all_continuous()~mytest1) 增加自定义统计量列
modify_header(add_stat_1 = “**statistic Z**”) 定义add_stat增加列的列名
#定义检验统计量和自定义统计量的小数位数(label_style_pvalue表示p值风格小数)
modify_fmt_fun(statistic ~ ~ style_number(digits=2),add_stat_1 ~ ~ style_number(digits=2),add_stat_2 ~ label_style_pvalue(digits = 2))
“as_hux_xlsx(file = ‘table1.xlsx’)” 结果输出为excel表
注意事项:
add_p函数默认添加的检验方法是非参数检验,如需要t检验p值需要使用“test=all_continuous()~”t.test””
有一些离散型变量如年龄或者分类数较多的变量,默认会识别为分类变量,可以用“type=”来指定其为连续性变量(eg:type=list(a3~”continuous”) )
默认会将二分类变量单行显示(可用value参数指定要显示的分类),如需修改为两行显示需指定type = list(all_dichotomous() ~ “categorical”)
有时可能出现添加add_overall() 时汇总列出现在两组中间的情况,这时需设定分组变量为factor格式
数据结果展示:
单因素表tbl_continuous
vars <- c("age_class", "ag117", "sch_c3", "zhuado_pub", "mpcaWI_C3",
"M_edu", "M_job", "parity_c2", "F_edu", "F_job")
t1 <- data %>% select(vars,"si") %>% #选择需要分析的变量
tbl_continuous(
variable = si,
include =c("age_class", "ag117", "sch_c3", "zhuado_pub", "mpcaWI_C3",
"M_edu", "M_job", "parity_c2", "F_edu", "F_job"),
statistic = ~"{mean}({sd})",
digits = list(all_continuous() ~ 2,
all_categorical()~c(2,2))) %>% #设定分类统计量的小数位数
add_p(test=all_categorical()~ "oneway.test",pvalue_fun = ~ style_pvalue(.x, digits = 3)) %>%
modify_header(statistic ~ "**Test Statistic**") %>%
modify_fmt_fun(statistic ~ label_style_sigfig(digits = 4))
t1 %>% as_hux_xlsx("table1.xlsx")
数据结果展示:
回归表tbl_uvregression和tbl_regression
#单因素回归表
t1 <- data %>%
# 选择要分析的变量
select("si","bc07", "ag117", "sch_c3", "zhuado_pub", "mpcaWI_C3",
"M_edu", "M_job", "parity_c2", "F_edu", "F_job") %>%
# 执行单变量回归
tbl_uvregression(
method = glm, # 使用广义线性模型
y = si, # 指定因变量
method.args = list(family = gaussian), # 指定logistic回归
estimate_fun= ~style_number(.x, digits = 2),
exponentiate = F, # 显示OR值而非系数
hide_n = TRUE , # 隐藏每组的样本量
pvalue_fun = ~ style_pvalue(.x, digits = 3)
) %>%
modify_column_merge(
pattern = "{estimate}({conf.low},{conf.high})", # 合并格式:估计值 (置信区间)
rows = !is.na(estimate)
)
#多因素回归表
model <- glm(si ~ bc07+ag117+sch_c3+zhuado_pub+mpcaWI_C3+M_edu+M_job+parity_c2+F_edu+F_job, data =data)
t2 <- tbl_regression(model, estimate_fun=~style_number(.x, digits = 2),pvalue_fun = ~ style_pvalue(.x, digits = 3))%>%
modify_column_merge(
pattern = "{estimate}({conf.low},{conf.high})", # 合并格式:估计值 (置信区间)
rows = !is.na(estimate)
)
#合并结果
table4 <- tbl_merge(
tbls = list(t1, t2),
tab_spanner = c("**single model**", "**multi model**")
)
table4 %>% as_gt() %>% gtsave("table4.docx")
数据结果展示:









