Generate Data Frame Sample with Multiple Probabilities in R (Example Code)
In this article, I’ll explain how to generate a random sample with different probabilities by group in R programming.
Creating Example Data
data(iris) # Example data head(iris) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5.1 3.5 1.4 0.2 setosa # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 5.0 3.6 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa |
data(iris) # Example data head(iris) # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 1 5.1 3.5 1.4 0.2 setosa # 2 4.9 3.0 1.4 0.2 setosa # 3 4.7 3.2 1.3 0.2 setosa # 4 4.6 3.1 1.5 0.2 setosa # 5 5.0 3.6 1.4 0.2 setosa # 6 5.4 3.9 1.7 0.4 setosa
Example: Drawing a Random Sample of a Data Frame with Different Probabilities
iris_prob <- rep(NA, nrow(iris)) # Different probabilities for sample iris_prob[iris$Species == "setosa"] <- 0.1 iris_prob[iris$Species == "versicolor"] <- 0.3 iris_prob[iris$Species == "virginica"] <- 0.6 iris_prob # [1] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 # [19] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 # [37] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.3 0.3 0.3 0.3 # [55] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 # [73] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 # [91] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 # [109] 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 # [127] 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 # [145] 0.6 0.6 0.6 0.6 0.6 0.6 |
iris_prob <- rep(NA, nrow(iris)) # Different probabilities for sample iris_prob[iris$Species == "setosa"] <- 0.1 iris_prob[iris$Species == "versicolor"] <- 0.3 iris_prob[iris$Species == "virginica"] <- 0.6 iris_prob # [1] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 # [19] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 # [37] 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.3 0.3 0.3 0.3 # [55] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 # [73] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 # [91] 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.3 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 # [109] 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 # [127] 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 0.6 # [145] 0.6 0.6 0.6 0.6 0.6 0.6
set.seed(10957) # Set random seed |
set.seed(10957) # Set random seed
iris_samp <- iris[sample(nrow(iris), # Generate random sample of iris 15, prob = iris_prob), ] iris_samp # Return random sample of iris # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 149 6.2 3.4 5.4 2.3 virginica # 60 5.2 2.7 3.9 1.4 versicolor # 115 5.8 2.8 5.1 2.4 virginica # 150 5.9 3.0 5.1 1.8 virginica # 96 5.7 3.0 4.2 1.2 versicolor # 47 5.1 3.8 1.6 0.2 setosa # 117 6.5 3.0 5.5 1.8 virginica # 112 6.4 2.7 5.3 1.9 virginica # 103 7.1 3.0 5.9 2.1 virginica # 52 6.4 3.2 4.5 1.5 versicolor # 147 6.3 2.5 5.0 1.9 virginica # 126 7.2 3.2 6.0 1.8 virginica # 7 4.6 3.4 1.4 0.3 setosa # 109 6.7 2.5 5.8 1.8 virginica # 108 7.3 2.9 6.3 1.8 virginica |
iris_samp <- iris[sample(nrow(iris), # Generate random sample of iris 15, prob = iris_prob), ] iris_samp # Return random sample of iris # Sepal.Length Sepal.Width Petal.Length Petal.Width Species # 149 6.2 3.4 5.4 2.3 virginica # 60 5.2 2.7 3.9 1.4 versicolor # 115 5.8 2.8 5.1 2.4 virginica # 150 5.9 3.0 5.1 1.8 virginica # 96 5.7 3.0 4.2 1.2 versicolor # 47 5.1 3.8 1.6 0.2 setosa # 117 6.5 3.0 5.5 1.8 virginica # 112 6.4 2.7 5.3 1.9 virginica # 103 7.1 3.0 5.9 2.1 virginica # 52 6.4 3.2 4.5 1.5 versicolor # 147 6.3 2.5 5.0 1.9 virginica # 126 7.2 3.2 6.0 1.8 virginica # 7 4.6 3.4 1.4 0.3 setosa # 109 6.7 2.5 5.8 1.8 virginica # 108 7.3 2.9 6.3 1.8 virginica