How to Split a Data Frame Randomly in R Programming (Example Code)
In this article, I’ll explain how to split a data frame into multiple subsets in the R programming language.
Creating Example Data
data(iris) # Load iris head(iris) # Head of iris # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5.1 3.5 1.4 0.2 setosa # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 5.0 3.6 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa |
data(iris) # Load iris head(iris) # Head of iris # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5.1 3.5 1.4 0.2 setosa # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 5.0 3.6 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa
Example: Splitting Data in Two Parts Randomly Using rbinom Function
set.seed(8739465) # Random seed |
set.seed(8739465) # Random seed
line_indicator <- rbinom(nrow(data), 1, 0.3) # 30% - 70% split |
line_indicator <- rbinom(nrow(data), 1, 0.3) # 30% - 70% split
iris_1 <- iris[line_indicator == 0, ] # Create first data frame head(iris_1) # Head of first data frame # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5.1 3.5 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 5.0 3.6 1.4 0.2 setosa # 7 4.6 3.4 1.4 0.3 setosa # 8 5.0 3.4 1.5 0.2 setosa |
iris_1 <- iris[line_indicator == 0, ] # Create first data frame head(iris_1) # Head of first data frame # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5.1 3.5 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 5.0 3.6 1.4 0.2 setosa # 7 4.6 3.4 1.4 0.3 setosa # 8 5.0 3.4 1.5 0.2 setosa
iris_2 <- iris[line_indicator == 1, ] # Create second data frame head(iris_2) # Head of second data frame # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 2 4.9 3.0 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa # 12 4.8 3.4 1.6 0.2 setosa # 16 5.7 4.4 1.5 0.4 setosa # 22 5.1 3.7 1.5 0.4 setosa # 26 5.0 3.0 1.6 0.2 setosa |
iris_2 <- iris[line_indicator == 1, ] # Create second data frame head(iris_2) # Head of second data frame # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 2 4.9 3.0 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa # 12 4.8 3.4 1.6 0.2 setosa # 16 5.7 4.4 1.5 0.4 setosa # 22 5.1 3.7 1.5 0.4 setosa # 26 5.0 3.0 1.6 0.2 setosa