简介:本专题总结了我目前碰到的一些用R代码解决的Excel数据及表格处理中的难点,相比传统excel菜单式操作具有可复现及可批量化的优势。
所用r包:openxlsx、tidyverse、redaxl
Excel或wps功能的R语言实现
Excel日期格式
adam <- read.xlsx("xxxxx.xlsx")
adam$生日 <- as.Date(adam$生日, origin = "1899-12-30")
Excel查询功能的R语言实现
Excel查重功能的R语言实现
Excel数据比较功能的R语言实现
Excel表格拆分
同一数据集表格拆分
wb <- createWorkbook()
# 按乡镇筛选adam中的数据,并将其导出到工作簿的不同工作表中
for (i in unique(adam$乡镇)) {
split_data <- adam1 %>% filter(乡镇==i)
addWorksheet(wb, sheetName = i)
writeData(wb, sheet = i, x = split_data)
}
同一工作簿表格拆分
excel_file <- "xxxx.xlsx"
sheet_names <- excel_sheets(excel_file)
dir.create("拆分表格") #相对文件夹路径
# 使用map函数循环读取所有表格,并存储在一个列表中
for(sheet_name in sheet_names){
sheet_data=read.xlsx(excel_file,sheet=sheet_name)
locatoin <- paste0("拆分表格/",sheet_name,".xlsx")
write.xlsx(sheet_data,locatoin)
}
Excel表格合并
同一文件夹表格合并
file_names <- list.files(path = "file_location", pattern = "*.xlsx", full.names = TRUE)
wb <- createWorkbook()
data_list <- list()
# 循环读取每个Excel文件并添加到工作簿中
for (file in file_names) {
# 读取Excel文件内容
sheet_data <- read.xlsx(file)
#如列名不一致则需要设置列名 colnames(sheet_data) <- c("英","中")
#提取所需列
selected_columns <- colnames(sheet_data)
sheet_data <- sheet_data %>% select(any_of(selected_columns))
# 提取文件名作为工作表名称
sheet_name <- tools::file_path_sans_ext(basename(file))
# 添加工作表到工作簿中
addWorksheet(wb, sheet_name)
# 写入数据到对应的工作表
writeData(wb, sheet_name, sheet_data)
#将当前数据集添加变量后存入列表用于合并
sheet_data <- sheet_data %>% mutate(name=sheet_name)
data_list[[sheet_name]] <- sheet_data
}
combined_data <- data_list %>% bind_rows() #求汇总表
addWorksheet(wb, "汇总表")
writeData(wb, "汇总表", combined_data)
# 保存工作簿到指定路径
saveWorkbook(wb, "合并工作表.xlsx", overwrite = TRUE)
同一工作蒲表格合并
library(openxlsx)
# 读取Excel文件的所有表格名称
excel_file <- "C:/Users/99405/OneDrive/资料/peppa pig/peppa pig.xlsx"
sheet_names <- getSheetNames(excel_file)
selected_columns <- colnames(all_sheets[[1]])[1:15]
# 定义一个函数,用于读取每个表格,并只保留指定的列
read_and_select <- function(sheet_name) {
df <- read.xlsx(excel_file, sheet = sheet_name)
# 保留指定列,如果缺失则填充NA
df_selected <- df %>% select(any_of(selected_columns)) %>%
mutate(across(everything(), as.character))
return(df_selected)
}
all_sheets <- map(sheet_names, read_and_select)
# 将所有表格合并为一个数据集adam
adam1 <- bind_rows(all_sheets)
write.xlsx(adam,"xxx.xlsx")
Excel表格自定义整合
adam1 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=1)
adam2 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=2)
adam3 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=3)
adam4 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=4)
adam5 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=5)
adam6 <- read.xlsx("r批量化表格处理所用数据.xlsx",sheet=6)
wb <- createWorkbook()
doc <- read_docx()
for (i in 1:nrow(adam1)){
test <- bind_rows(adam4[i,c(2:5)],adam5[i,c(2:5)],adam6[i,c(2:5)])
test[4,1] <- adam1[i,5]
test[4,2] <- adam2[i,5]
test[5,1] <- adam1[i,6]
test[5,2] <- adam2[i,6]
test$组别 <- c("常规组","tDCS组","联合组","F","P")
test <- test %>% select(组别,everything())
# Check the condition and add superscript 'a' if adam3[i,3] < 0.05
if (adam3[3*(i-1)+1, 7] < 0.05) {
# Add superscript 'a' to test[3,3]
test[2, 3] <- paste0(test[2, 3], "a")
}
if ((adam3[3*(i-1)+3, 7] < 0.05)&(adam3[3*(i-1)+2, 7] < 0.05)) {
# Add superscript 'a' to test[3,3]
test[3, 3] <- paste0(test[3, 3], "ab")
} else if (adam3[3*(i-1)+2, 7] < 0.05) {
# Add superscript 'a' to test[3,3]
test[3, 3] <- paste0(test[3, 3], "a")
}
addWorksheet(wb, sheetName = adam1[i,1])
writeData(wb, sheet = adam1[i,1], x = test)}
saveWorkbook(wb, "批量化表格.xlsx", overwrite = TRUE
Excel转word
使用officer包
excel_file <- "xxx.xlsx"
sheet_names <- excel_sheets(excel_file)
doc <- read_docx()
for (i in seq_along(sheet_names)){
test <- read,xlsx(excel_file,sheet=i)
# Convert the table to a flextable for better formatting in Word
ft <- flextable(test)
# Add a title for each table (using the value from adam1[i,1] as sheetName)
title <- paste0("Table",i])
# Add the table, title, and footnote to the Word document
doc <- doc %>%
body_add_par(title, style = "Normal") %>%
body_add_flextable(ft) %>%
body_add_par("注:p<0.05表示差异有统计学意义", style = "Normal") %>%
body_add_par("", style = "Normal") # Blank line between tables
}
print(doc, target = "output_tables.docx")