#时间:20220901
#目的:将数据中的离群值替换为NA(离群值定义为x>q75%+3*(q75%-q25%)or x<q25%-3(q75%-q25%))
#查看路径
getwd()
#设置工作路径
setwd("D:/desk/speed/data_1/behavior/R")
#导入数据
library(xlsx)
data0=read.xlsx("lapse_speed.xlsx",1)
print(data0)
#描述性统计
#library(psych)
#myvars= c("norate","corate","meanrt","fastrt")
#describe(data0[myvars])
#去除异常值
#箱线图检测异常值/可视化
boxplot(data0$lapse1 ~ speed,data=data0,range = 3,col=c("red","blue"),
main="lapse1_speed",
xlab="speed number",
ylab="lapse1")#无异常值
boxplot(data0$lapse2 ~ speed,data=data0,range = 3,col=c("red","blue"),
main="lapse2_speed",
xlab="speed number",
ylab="lapse2")#无异常值
#确定盒须最大值,最小值,箱线图的数值
#boxplot.stats(data0$meanrt)
#定义函数
remove_outliers <- function(x, na.rm = TRUE, ...) {
qnt <- quantile(x, probs = c(.25, .75), na.rm = na.rm, ...)
val <- 3 * IQR(x, na.rm = na.rm)
y <- x
y[x < (qnt[1] - val)] <- NA
y[x > (qnt[2] + val)] <- NA
y
}
#去除outlier
library(dplyr)
data1 <- data0 %>%
group_by(speed) %>%
mutate(norateout = remove_outliers(norate),corateout = remove_outliers(corate)) %>%
ungroup()
#查看去除outliers后的数据情况
fix(data1)
#箱线图核查新数据是否仍有离群值
boxplot(data1$norateout ~ speed,data=data1,range = 3,col=c("red","blue"))#wu异常值
boxplot(data1$corateout ~ speed,data=data1,range = 3,col=c("red","blue"))#wu异常值
#剔除norate 和 corate
myvars=names(data1) %in% c("norate","corate")
datarw=data1[!myvars]
#导出数据
library(xlsx)
write.xlsx(datarw, "datarwout.xlsx")
write.table(datarw, "datarwout.txt", sep=",")
网友评论