简介:简介高效的实现数据集的筛选、转置、合并,解决excel表格操作的痛点
所用r包:tidyverse
1.取数据集子集
select函数筛选列
adam1 <- adam %>% select(1,2)
adam1 <- adam %>% select("x","y")
adam1 <- adam %>% select(1:4)
adam1 <- adam %>% select(-1)
filter函数筛选行
adam1 <- adam %>% filter(x="1")
2.数据转置
宽转长
#pivot_longer
vars_to_transpose1 <- colnames(pri)[seq(15, ncol(pri), by=12)]# 指定需要转置的变量
pivot_pri1 <- pri %>%pivot_longer(all_of(vars_to_transpose1),names_to = "period",values_to = "score1")
长转宽
#pivot_wider
fish_encounters %>% pivot_wider(names_from = station, values_from = seen)
us_rent_income %>%pivot_wider(names_from = variable,values_from = c(estimate, moe))
3.数据集合并
横向
daishixiong <- daishixiong %>% left_join(bc,by="childid")
daishixiong <- daishixiong %>% right_join(bc,by="childid")
adam <- adam %>% bind_cols(adam1)
纵向
adam <- adam %>% bind_rows(adam1)
4.数据集的集合运算
并集
result <- list(data1,data2,data3,data4) %>%reduce(union) #取数据集并集
交集
5.数据集排序
横向
#select与relocate函数
adam %>% select("id",everything())
adam %>% relocate(where(is.numeric), .after = name) # 将数值列移到 name 列的后面
#进阶,根据变量特征归类排序
#方法1
# 列名列表
col_names <- colnames(dataresult)
# 提取列名中末尾数字
num_at_end <- as.numeric(str_extract(col_names,"\\d+$"))
#排序
sorted_colnames <- col_names[order(num_at_end)]
dataresult <- dataresult %>% select(sorted_colnames)
#进阶方法2
num_at_end <- as.numeric(str_extract(col_names, "\\d+$"))
zz <- as_tibble(colnames(dataresult))
zz$fz1 <- as.numeric(str_extract(col_names, "\\d+$"))
zz <- zz %>% mutate(fz2=case_when(str_detect(value,"Scr")~1,
str_detect(value,"eGFR")~2,
str_detect(value,"年龄")~3,
str_detect(value,"时间")~4,))
zz <- zz %>% arrange(fz1, fz2)
sorted_colnames <- zz[[1]]
dataresult <- dataresult %>% select(sorted_colnames)
纵向
df %>% arrange(math, sex)
df %>% arrange(-math) #按math的降序排序