注:整理了日常工作中,对于数据变量添加标签的需求
所用R包:openxlsx, Hmisc, haven
R语言添加标签(stata导出可用)
# 简单添加
var_labels <- () #给出标签向量
data <- imap(data, ~ {
label(data[[.y]]) <- var_labels[[.y]] # 使用列名作为键获取标签
})
#导入标签数据集,对应变量名查询填加
label_data <- read.csv("青春末期随访24.11.3数据字典.csv")
for(i in colnames(final_data)){
label <- label_data %>% filter(Variable...Field.Name==i)
if(nrow(label)>0){
Hmisc::label(final_data[[i]]) <- paste(label[,5],label[,6])}
}
# 导出stata
write_dta(test,"青春末期数据stata添加标签版.dta")
移除R数据的标签属性(标签属性会在数据处理时导致bug)
# 移除变量标签
data <- as.data.frame(lapply(data, function(x) {
class(x) <- setdiff(class(x), "labelled") # 移除标签属性
return(x)
}))
Excel列名旁添加标注
# 1.示例简化版本
# 创建数据框
data <- data.frame(
id = 1:5,
age = c(25, 30, 35, 40, 45),
gender = c("M", "F", "M", "F", "M")
)
# 定义变量标签
var_labels <- list(
id = "Unique Identifier",
age = "Age of the individual",
gender = "Gender of the individual"
)
# 创建 Excel 工作簿
wb <- createWorkbook()
addWorksheet(wb, "Data")
# 写入数据到 Excel
writeData(wb, sheet = "Data", x = data)
# 添加批注(标签)
writeComment(wb, sheet = "Data", col = 1, row = 1, comment = createComment(var_labels$id))
writeComment(wb, sheet = "Data", col = 2, row = 1, comment = createComment(var_labels$age))
writeComment(wb, sheet = "Data", col = 3, row = 1, comment = createComment(var_labels$gender))
# 保存 Excel 文件
saveWorkbook(wb, "labelled_data_with_comments.xlsx", overwrite = TRUE)
# 2.批量化引入外部数据集版本
label_data <- read.csv("青春末期随访24.11.3数据字典.csv")
wb <- createWorkbook()
addWorksheet(wb, "Data")
# 写入数据到 Excel
writeData(wb, sheet = "Data", x = test)
# 批量为每个列标题添加批注
for(i in colnames(final_data)){
label <- label_data %>% filter(Variable...Field.Name==i)
if(nrow(label)>0){
writeComment(
wb,
sheet = "Data",
col = which(colnames(final_data)==i),
row = 1, # 第一行是标题行
comment = createComment(paste(label[,5],label[,6]), author = "")
)
}
}
# 保存 Excel 文件
saveWorkbook(wb, "青春末期随访数据excel标注.xlsx", overwrite = TRUE)