简介:本专题总结了我目前碰到的一些用R代码解决的Excel数据及表格处理中的难点,相比传统excel菜单式操作具有可复现及可批量化的优势。
所用r包:openxlsx、tidyverse、redaxl、rio
Excel或wps功能的R语言实现
Excel日期格式
adam <- read.xlsx("xxxxx.xlsx")
adam$生日 <- as.Date(adam$生日, origin = "1899-12-30")
Excel查询功能的R语言实现
Excel查重功能的R语言实现
Excel数据比较功能的R语言实现
Excel表格拆分
同一数据集表格拆分
data_list <- list()
# 按乡镇筛选adam中的数据,并将其导出到工作簿的不同工作表中
for (i in unique(adam$group)) {
split_data <- bcadam %>% filter(group==i)
data_list[[i]] <- split_data
}
export(data_list,"拆分后数据.xlsx")
同一工作簿表格拆分
excel_file <- "xxxx.xlsx"
sheet_names <- excel_sheets(excel_file)
dir.create("拆分表格") #相对文件夹路径
# 使用map函数循环读取所有表格,并存储在一个列表中
for(sheet_name in sheet_names){
sheet_data=read.xlsx(excel_file,sheet=sheet_name)
locatoin <- paste0("拆分表格/",sheet_name,".xlsx")
write.xlsx(sheet_data,locatoin)
}
Excel表格合并
# 读取Excel文件的所有表格名称
file_names <- list.files(path = "位置pc号信息", pattern = "*.xlsx",full.names = TRUE)
selected_columns <- c("序号","位置","pc","姓名","类型")
# 定义一个函数,用于读取每个表格,并只保留指定的列
all_sheets1=list()
read_and_select1 <- function(file_name) {
sheet_names <- getSheetNames(file_name)
sheets <- tibble()
for (i in sheet_names){
df <- import(file_name, sheet = i)
# 保留指定列,如果缺失则填充NA
df_selected <- df %>% select(any_of(selected_columns)) %>%
mutate(冻存盒编号=i) %>%
mutate(across(everything(), as.character))
sheets <- bind_rows(sheets,df_selected)
}
return(sheets)}
all_sheets1 <- map(file_names,read_and_select1)
# 将所有表格合并为一个数据集adam
adam1 <- bind_rows(all_sheets1)
all_sheets1[["汇总"]] <- adam1
all_sheets1 <- all_sheets1 %>% set_names("白","红","黄","汇总")
export(all_sheets1,"re整理后数据.xlsx")
Excel表格自定义整合
adam1 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=1)
adam2 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=2)
adam3 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=3)
adam4 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=4)
adam5 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=5)
adam6 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=6)
wb <- createWorkbook()
doc <- read_docx()
for (i in 1:nrow(adam1)){
test <- bind_rows(adam4[i,c(2:5)],adam5[i,c(2:5)],adam6[i,c(2:5)])
test[4,1] <- adam1[i,5]
test[4,2] <- adam2[i,5]
test[5,1] <- adam1[i,6]
test[5,2] <- adam2[i,6]
test$组别 <- c("常规组","tDCS组","联合组","F","P")
test <- test %>% select(组别,everything())
# Check the condition and add superscript 'a' if adam3[i,3] < 0.05
if (adam3[3*(i-1)+1, 7] < 0.05) {
# Add superscript 'a' to test[3,3]
test[2, 3] <- paste0(test[2, 3], "a")
}
if ((adam3[3*(i-1)+3, 7] < 0.05)&(adam3[3*(i-1)+2, 7] < 0.05)) {
# Add superscript 'a' to test[3,3]
test[3, 3] <- paste0(test[3, 3], "ab")
} else if (adam3[3*(i-1)+2, 7] < 0.05) {
# Add superscript 'a' to test[3,3]
test[3, 3] <- paste0(test[3, 3], "a")
}
addWorksheet(wb, sheetName = adam1[i,1])
writeData(wb, sheet = adam1[i,1], x = test)}
saveWorkbook(wb, "批量化表格.xlsx", overwrite = TRUE
Excel转word
使用officer包
excel_file <- "xxx.xlsx"
sheet_names <- excel_sheets(excel_file)
doc <- read_docx()
for (i in seq_along(sheet_names)){
test <- read,xlsx(excel_file,sheet=i)
# Convert the table to a flextable for better formatting in Word
ft <- flextable(test)
# Add a title for each table (using the value from adam1[i,1] as sheetName)
title <- paste0("Table",i])
# Add the table, title, and footnote to the Word document
doc <- doc %>%
body_add_par(title, style = "Normal") %>%
body_add_flextable(ft) %>%
body_add_par("注:p<0.05表示差异有统计学意义", style = "Normal") %>%
body_add_par("", style = "Normal") # Blank line between tables
}
print(doc, target = "output_tables.docx")