forked from Mirror/wren
Add sample(_) and sample(_,_) to Random.
This commit is contained in:
@ -94,6 +94,21 @@ Returns an integer between `start` and `end`, including `start` but excluding
|
||||
System.print(random.int(-10, 10)) //> -6
|
||||
System.print(random.int(-4, 2)) //> -2
|
||||
|
||||
### **sample**(list)
|
||||
|
||||
Selects a random element from `list`.
|
||||
|
||||
### **sample**(list, count)
|
||||
|
||||
Samples `count` randomly chosen unique elements from `list`.
|
||||
|
||||
This uses "random without replacement" sampling—no index in the list will
|
||||
be selected more than once.
|
||||
|
||||
Returns a new list of the selected elements.
|
||||
|
||||
It is an error if `count` is greater than the number of elements in the list.
|
||||
|
||||
### **shuffle**(list)
|
||||
|
||||
Randomly shuffles the elements in `list`. The items are randomly re-ordered in
|
||||
|
||||
@ -47,6 +47,67 @@ foreign class Random {
|
||||
int(end) { (float() * end).floor }
|
||||
int(start, end) { (float() * (end - start)).floor + start }
|
||||
|
||||
sample(list) { sample(list, 1)[0] }
|
||||
sample(list, count) {
|
||||
if (count > list.count) Fiber.abort("Not enough elements to sample.")
|
||||
|
||||
// There at (at least) two simple algorithms for choosing a number of
|
||||
// samples from a list without replacement -- where we don't pick the same
|
||||
// element more than once.
|
||||
//
|
||||
// The first is faster when the number of samples is small relative to the
|
||||
// size of the collection. In many cases, it avoids scanning the entire
|
||||
// list. In the common case of just wanting one sample, it's a single
|
||||
// random index lookup.
|
||||
//
|
||||
// However, its performance degrades badly as the sample size increases.
|
||||
// Vitter's algorithm always scans the entire list, but it's also always
|
||||
// O(n).
|
||||
//
|
||||
// The cutoff point between the two follows a quadratic curve on the same
|
||||
// size. Based on some empirical testing, scaling that by 5 seems to fit
|
||||
// pretty closely and chooses the fastest one for the given sample and
|
||||
// collection size.
|
||||
if (count * count * 5 < list.count) {
|
||||
// Pick random elements and retry if you hit a previously chosen one.
|
||||
var picked = {}
|
||||
var result = []
|
||||
for (i in 0...count) {
|
||||
// Find an index that we haven't already selected.
|
||||
var index
|
||||
while (true) {
|
||||
index = int(count)
|
||||
if (!picked.containsKey(index)) break
|
||||
}
|
||||
|
||||
picked[index] = true
|
||||
result.add(list[index])
|
||||
}
|
||||
|
||||
return result
|
||||
} else {
|
||||
// Jeffrey Vitter's Algorithm R.
|
||||
|
||||
// Fill the reservoir with the first elements in the list.
|
||||
var result = list[0...count]
|
||||
|
||||
// We want to ensure the results are always in random order, so shuffle
|
||||
// them. In cases where the sample size is the entire collection, this
|
||||
// devolves to running Fisher-Yates on a copy of the list.
|
||||
shuffle(result)
|
||||
|
||||
// Now walk the rest of the list. For each element, randomly consider
|
||||
// replacing one of the reservoir elements with it. The probability here
|
||||
// works out such that it does this uniformly.
|
||||
for (i in count...list.count) {
|
||||
var slot = int(0, i + 1)
|
||||
if (slot < count) result[slot] = list[i]
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
}
|
||||
|
||||
shuffle(list) {
|
||||
if (list.isEmpty) return
|
||||
|
||||
|
||||
@ -49,6 +49,67 @@ static const char* randomModuleSource =
|
||||
" int(end) { (float() * end).floor }\n"
|
||||
" int(start, end) { (float() * (end - start)).floor + start }\n"
|
||||
"\n"
|
||||
" sample(list) { sample(list, 1)[0] }\n"
|
||||
" sample(list, count) {\n"
|
||||
" if (count > list.count) Fiber.abort(\"Not enough elements to sample.\")\n"
|
||||
"\n"
|
||||
" // There at (at least) two simple algorithms for choosing a number of\n"
|
||||
" // samples from a list without replacement -- where we don't pick the same\n"
|
||||
" // element more than once.\n"
|
||||
" //\n"
|
||||
" // The first is faster when the number of samples is small relative to the\n"
|
||||
" // size of the collection. In many cases, it avoids scanning the entire\n"
|
||||
" // list. In the common case of just wanting one sample, it's a single\n"
|
||||
" // random index lookup.\n"
|
||||
" //\n"
|
||||
" // However, its performance degrades badly as the sample size increases.\n"
|
||||
" // Vitter's algorithm always scans the entire list, but it's also always\n"
|
||||
" // O(n).\n"
|
||||
" //\n"
|
||||
" // The cutoff point between the two follows a quadratic curve on the same\n"
|
||||
" // size. Based on some empirical testing, scaling that by 5 seems to fit\n"
|
||||
" // pretty closely and chooses the fastest one for the given sample and\n"
|
||||
" // collection size.\n"
|
||||
" if (count * count * 5 < list.count) {\n"
|
||||
" // Pick random elements and retry if you hit a previously chosen one.\n"
|
||||
" var picked = {}\n"
|
||||
" var result = []\n"
|
||||
" for (i in 0...count) {\n"
|
||||
" // Find an index that we haven't already selected.\n"
|
||||
" var index\n"
|
||||
" while (true) {\n"
|
||||
" index = int(count)\n"
|
||||
" if (!picked.containsKey(index)) break\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" picked[index] = true\n"
|
||||
" result.add(list[index])\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" return result\n"
|
||||
" } else {\n"
|
||||
" // Jeffrey Vitter's Algorithm R.\n"
|
||||
"\n"
|
||||
" // Fill the reservoir with the first elements in the list.\n"
|
||||
" var result = list[0...count]\n"
|
||||
"\n"
|
||||
" // We want to ensure the results are always in random order, so shuffle\n"
|
||||
" // them. In cases where the sample size is the entire collection, this\n"
|
||||
" // devolves to running Fisher-Yates on a copy of the list.\n"
|
||||
" shuffle(result)\n"
|
||||
"\n"
|
||||
" // Now walk the rest of the list. For each element, randomly consider\n"
|
||||
" // replacing one of the reservoir elements with it. The probability here\n"
|
||||
" // works out such that it does this uniformly.\n"
|
||||
" for (i in count...list.count) {\n"
|
||||
" var slot = int(0, i + 1)\n"
|
||||
" if (slot < count) result[slot] = list[i]\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" return result\n"
|
||||
" }\n"
|
||||
" }\n"
|
||||
"\n"
|
||||
" shuffle(list) {\n"
|
||||
" if (list.isEmpty) return\n"
|
||||
"\n"
|
||||
|
||||
19
test/random/sample_count_all.wren
Normal file
19
test/random/sample_count_all.wren
Normal file
@ -0,0 +1,19 @@
|
||||
import "random" for Random
|
||||
|
||||
var random = Random.new(12345)
|
||||
|
||||
// Should choose all elements with roughly equal probability.
|
||||
var list = ["a", "b", "c"]
|
||||
var histogram = {}
|
||||
for (i in 1..5000) {
|
||||
var sample = random.sample(list, 3)
|
||||
var string = sample.toString
|
||||
if (!histogram.containsKey(string)) histogram[string] = 0
|
||||
histogram[string] = histogram[string] + 1
|
||||
}
|
||||
|
||||
System.print(histogram.count) // expect: 6
|
||||
for (key in histogram.keys) {
|
||||
var error = (histogram[key] / (5000 / 6) - 1).abs
|
||||
if (error > 0.1) System.print("!!! %(error)")
|
||||
}
|
||||
19
test/random/sample_count_multiple.wren
Normal file
19
test/random/sample_count_multiple.wren
Normal file
@ -0,0 +1,19 @@
|
||||
import "random" for Random
|
||||
|
||||
var random = Random.new(12345)
|
||||
|
||||
// Should choose all elements with roughly equal probability.
|
||||
var list = ["a", "b", "c", "d"]
|
||||
var histogram = {}
|
||||
for (i in 1..5000) {
|
||||
var sample = random.sample(list, 3)
|
||||
var string = sample.toString
|
||||
if (!histogram.containsKey(string)) histogram[string] = 0
|
||||
histogram[string] = histogram[string] + 1
|
||||
}
|
||||
|
||||
System.print(histogram.count) // expect: 24
|
||||
for (key in histogram.keys) {
|
||||
var error = (histogram[key] / (5000 / 24) - 1).abs
|
||||
if (error > 0.2) System.print("!!! %(error)")
|
||||
}
|
||||
23
test/random/sample_count_one.wren
Normal file
23
test/random/sample_count_one.wren
Normal file
@ -0,0 +1,23 @@
|
||||
import "random" for Random
|
||||
|
||||
var random = Random.new(12345)
|
||||
|
||||
// Single element list.
|
||||
System.print(random.sample(["single"], 1)) // expect: [single]
|
||||
|
||||
// Should choose all elements with roughly equal probability.
|
||||
var list = ["a", "b", "c", "d", "e"]
|
||||
var histogram = {}
|
||||
for (i in 1..5000) {
|
||||
var sample = random.sample(list, 1)
|
||||
|
||||
var string = sample.toString
|
||||
if (!histogram.containsKey(string)) histogram[string] = 0
|
||||
histogram[string] = histogram[string] + 1
|
||||
}
|
||||
|
||||
System.print(histogram.count) // expect: 5
|
||||
for (key in histogram.keys) {
|
||||
var error = (histogram[key] / (5000 / list.count) - 1).abs
|
||||
if (error > 0.1) System.print("!!! %(error)")
|
||||
}
|
||||
5
test/random/sample_count_too_many.wren
Normal file
5
test/random/sample_count_too_many.wren
Normal file
@ -0,0 +1,5 @@
|
||||
import "random" for Random
|
||||
|
||||
var random = Random.new(12345)
|
||||
|
||||
random.sample([1, 2, 3], 4) // expect runtime error: Not enough elements to sample.
|
||||
6
test/random/sample_count_zero.wren
Normal file
6
test/random/sample_count_zero.wren
Normal file
@ -0,0 +1,6 @@
|
||||
import "random" for Random
|
||||
|
||||
var random = Random.new(12345)
|
||||
|
||||
System.print(random.sample([], 0)) // expect: []
|
||||
System.print(random.sample([1, 2, 3], 0)) // expect: []
|
||||
20
test/random/sample_one.wren
Normal file
20
test/random/sample_one.wren
Normal file
@ -0,0 +1,20 @@
|
||||
import "random" for Random
|
||||
|
||||
var random = Random.new(12345)
|
||||
|
||||
// Single element list.
|
||||
System.print(random.sample(["single"])) // expect: single
|
||||
|
||||
// Should choose all elements with roughly equal probability.
|
||||
var list = ["a", "b", "c", "d", "e"]
|
||||
var histogram = {"a": 0, "b": 0, "c": 0, "d": 0, "e": 0}
|
||||
for (i in 1..1000) {
|
||||
var sample = random.sample(list)
|
||||
histogram[sample] = histogram[sample] + 1
|
||||
}
|
||||
|
||||
System.print(histogram.count) // expect: 5
|
||||
for (key in histogram.keys) {
|
||||
var error = (histogram[key] / (1000 / list.count) - 1).abs
|
||||
if (error > 0.2) System.print("!!! %(error)")
|
||||
}
|
||||
5
test/random/sample_one_empty.wren
Normal file
5
test/random/sample_one_empty.wren
Normal file
@ -0,0 +1,5 @@
|
||||
import "random" for Random
|
||||
|
||||
var random = Random.new(12345)
|
||||
|
||||
random.sample([]) // expect runtime error: Not enough elements to sample.
|
||||
@ -12,12 +12,21 @@ list = [1]
|
||||
random.shuffle(list)
|
||||
System.print(list) // expect: [1]
|
||||
|
||||
// Given enough tries, should generate all permutations.
|
||||
var hits = {}
|
||||
for (i in 1..200) {
|
||||
// Given enough tries, should generate all permutations with roughly equal
|
||||
// probability.
|
||||
var histogram = {}
|
||||
for (i in 1..5000) {
|
||||
var list = [1, 2, 3, 4]
|
||||
random.shuffle(list)
|
||||
hits[list.toString] = true
|
||||
|
||||
var string = list.toString
|
||||
if (!histogram.containsKey(string)) histogram[string] = 0
|
||||
histogram[string] = histogram[string] + 1
|
||||
}
|
||||
|
||||
System.print(histogram.count) // expect: 24
|
||||
for (key in histogram.keys) {
|
||||
var error = (histogram[key] / (5000 / 24) - 1).abs
|
||||
if (error > 0.2) System.print("!!! %(error)")
|
||||
}
|
||||
|
||||
System.print(hits.count) // expect: 24
|
||||
|
||||
Reference in New Issue
Block a user