R数据处理(7)-LOOKUP查询填补更新

注:本章内容介绍了excel中常用的查询填补法,该方法相较于Excel中的功能更为强大,自定义程度高,可规定按照查询到的值替换原数据或仅替换原数据的缺失值。

数据集比较

#修改数据格式使对应
bcdata <- bcdata %>% mutate(across(all_of(variables),~as.numeric(as.character(.))))
str(bcdata)
str(df_filled)

#比较两数据集的差异
#方法1简单比较-按列名匹配(适用于行顺序对应的两数据集)
result <- bcdata %>%
  mutate(across(everything(), ~ifelse(. != df_filled2[[cur_column()]]|is.na(.) != is.na(df_filled2[[cur_column()]]), ., NA))) 

# 输出补充数据集和原数据集不同的值
result %>% filter(!is.na(idcard))

#方法2精确比较-按同一id行和列名匹配(两数据集行位置不对应,先根据标识id变量匹配)
result <- bcdata %>%
  left_join(df_filled2, by = "id", suffix = c("_a", "_b")) %>%
  mutate(across(ends_with("_a"), ~ifelse(. != get(sub("a$", "b", cur_column()))|is.na(.) != is.na(get(sub("a$", "b", cur_column()))), ., NA)))
# 输出补充数据集和原数据集不同的值
result %>% filter(!is.na(idcard_a))

# 查看通过标识符如name、id可以匹配多少观测
c1 <- df_filled %>% select("name","idcard") %>% mutate(match=1)
c2 <- bcdata %>% select("name","idcard")
c3 <- c2 %>% left_join(c1,by=c("name","idcard")) 
c3 %>% filter(is.na(match))  #查看未匹配到的观测,检查是否有数据问题

查询填补

#1.简单插补--插补变量在原数据集不存在
filled_data <- df_filled %>%
  left_join(bcdata[,c("idcard", "name","variable")], by = c("idcard", "name")) 

#2.复杂填补--插补变量在原数据集存在,仅需插补原数据集中的缺失值

# 获取需要填补的变量名
variables_to_fill <- setdiff(names(bcdata), c("name","idcard","filename")) #获取填补变量名
filled_data <- df_filled %>%
  left_join(bcdata, by = c("idcard", "name"), suffix = c("", ".ref")) %>%
  mutate(across(
    all_of(variables_to_fill),
    ~ coalesce(., get(paste0(cur_column(),".ref"))))) %>%
  select(-ends_with(".ref"))

数据更新

#数据更新--插补变量在原数据集存在,需要用新数据集中对应变量的非缺失值更新原数据集
variables_to_fill <- setdiff(intersect(names(new_data), names(origin_data)), c("id","name","idcard","pc","j1"))

test <- origin_data %>%
left_join(new_data[c("id",variables_to_fill)], by = "id", suffix = c("", ".ref")) %>%
mutate(across(
all_of(variables_to_fill),
~ ifelse(
!is.na(get(paste0(cur_column(), ".ref"))) & str_trim(get(paste0(cur_column(), ".ref"))) != "",
get(paste0(cur_column(), ".ref")),.))) %>%
select(-ends_with(".ref"))

暂无评论

发送评论 编辑评论


				
|´・ω・)ノ
ヾ(≧∇≦*)ゝ
(☆ω☆)
(╯‵□′)╯︵┴─┴
 ̄﹃ ̄
(/ω\)
∠( ᐛ 」∠)_
(๑•̀ㅁ•́ฅ)
→_→
୧(๑•̀⌄•́๑)૭
٩(ˊᗜˋ*)و
(ノ°ο°)ノ
(´இ皿இ`)
⌇●﹏●⌇
(ฅ´ω`ฅ)
(╯°A°)╯︵○○○
φ( ̄∇ ̄o)
ヾ(´・ ・`。)ノ"
( ง ᵒ̌皿ᵒ̌)ง⁼³₌₃
(ó﹏ò。)
Σ(っ °Д °;)っ
( ,,´・ω・)ノ"(´っω・`。)
╮(╯▽╰)╭
o(*////▽////*)q
>﹏<
( ๑´•ω•) "(ㆆᴗㆆ)
😂
😀
😅
😊
🙂
🙃
😌
😍
😘
😜
😝
😏
😒
🙄
😳
😡
😔
😫
😱
😭
💩
👻
🙌
🖕
👍
👫
👬
👭
🌚
🌝
🙈
💊
😶
🙏
🍦
🍉
😣
Source: github.com/k4yt3x/flowerhd
颜文字
Emoji
小恐龙
花!
上一篇
下一篇