Drop Data Frame Rows Greater than 95th Percentiles in R (Example Code)
In this post, I’ll demonstrate how to drop data frame rows greater than the 95th percentile in the R programming language.
Creating Example Data
data(iris) # Load & modify example data iris_mod <- iris iris_mod$Sepal.Length[1] <- 999 iris_mod$Sepal.Length[5] <- 1000 head(iris_mod) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 999.0 3.5 1.4 0.2 setosa # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 1000.0 3.6 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa |
data(iris) # Load & modify example data iris_mod <- iris iris_mod$Sepal.Length[1] <- 999 iris_mod$Sepal.Length[5] <- 1000 head(iris_mod) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 999.0 3.5 1.4 0.2 setosa # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 1000.0 3.6 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa
Example: Delete Rows in Data Frame Greater Than 95th Percentile
SL_perc <- quantile(iris_mod$Sepal.Length, 0.95) # Get 95th percentile SL_perc # 95% # 7.51 |
SL_perc <- quantile(iris_mod$Sepal.Length, 0.95) # Get 95th percentile SL_perc # 95% # 7.51
iris_mod_rem <- iris_mod[iris_mod$Sepal.Length < SL_perc, ] # Remove rows above threshold head(iris_mod_rem) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa # 7 4.6 3.4 1.4 0.3 setosa # 8 5.0 3.4 1.5 0.2 setosa |
iris_mod_rem <- iris_mod[iris_mod$Sepal.Length < SL_perc, ] # Remove rows above threshold head(iris_mod_rem) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa # 7 4.6 3.4 1.4 0.3 setosa # 8 5.0 3.4 1.5 0.2 setosa