1
0
forked from Mirror/wren

Add sample(_) and sample(_,_) to Random.

This commit is contained in:
Bob Nystrom
2016-02-09 07:24:45 -08:00
parent d4a4b26203
commit 8b36e2f00b
11 changed files with 248 additions and 5 deletions

View File

@ -94,6 +94,21 @@ Returns an integer between `start` and `end`, including `start` but excluding
System.print(random.int(-10, 10)) //> -6
System.print(random.int(-4, 2)) //> -2
### **sample**(list)
Selects a random element from `list`.
### **sample**(list, count)
Samples `count` randomly chosen unique elements from `list`.
This uses "random without replacement" sampling—no index in the list will
be selected more than once.
Returns a new list of the selected elements.
It is an error if `count` is greater than the number of elements in the list.
### **shuffle**(list)
Randomly shuffles the elements in `list`. The items are randomly re-ordered in

View File

@ -47,6 +47,67 @@ foreign class Random {
int(end) { (float() * end).floor }
int(start, end) { (float() * (end - start)).floor + start }
sample(list) { sample(list, 1)[0] }
sample(list, count) {
if (count > list.count) Fiber.abort("Not enough elements to sample.")
// There at (at least) two simple algorithms for choosing a number of
// samples from a list without replacement -- where we don't pick the same
// element more than once.
//
// The first is faster when the number of samples is small relative to the
// size of the collection. In many cases, it avoids scanning the entire
// list. In the common case of just wanting one sample, it's a single
// random index lookup.
//
// However, its performance degrades badly as the sample size increases.
// Vitter's algorithm always scans the entire list, but it's also always
// O(n).
//
// The cutoff point between the two follows a quadratic curve on the same
// size. Based on some empirical testing, scaling that by 5 seems to fit
// pretty closely and chooses the fastest one for the given sample and
// collection size.
if (count * count * 5 < list.count) {
// Pick random elements and retry if you hit a previously chosen one.
var picked = {}
var result = []
for (i in 0...count) {
// Find an index that we haven't already selected.
var index
while (true) {
index = int(count)
if (!picked.containsKey(index)) break
}
picked[index] = true
result.add(list[index])
}
return result
} else {
// Jeffrey Vitter's Algorithm R.
// Fill the reservoir with the first elements in the list.
var result = list[0...count]
// We want to ensure the results are always in random order, so shuffle
// them. In cases where the sample size is the entire collection, this
// devolves to running Fisher-Yates on a copy of the list.
shuffle(result)
// Now walk the rest of the list. For each element, randomly consider
// replacing one of the reservoir elements with it. The probability here
// works out such that it does this uniformly.
for (i in count...list.count) {
var slot = int(0, i + 1)
if (slot < count) result[slot] = list[i]
}
return result
}
}
shuffle(list) {
if (list.isEmpty) return

View File

@ -49,6 +49,67 @@ static const char* randomModuleSource =
" int(end) { (float() * end).floor }\n"
" int(start, end) { (float() * (end - start)).floor + start }\n"
"\n"
" sample(list) { sample(list, 1)[0] }\n"
" sample(list, count) {\n"
" if (count > list.count) Fiber.abort(\"Not enough elements to sample.\")\n"
"\n"
" // There at (at least) two simple algorithms for choosing a number of\n"
" // samples from a list without replacement -- where we don't pick the same\n"
" // element more than once.\n"
" //\n"
" // The first is faster when the number of samples is small relative to the\n"
" // size of the collection. In many cases, it avoids scanning the entire\n"
" // list. In the common case of just wanting one sample, it's a single\n"
" // random index lookup.\n"
" //\n"
" // However, its performance degrades badly as the sample size increases.\n"
" // Vitter's algorithm always scans the entire list, but it's also always\n"
" // O(n).\n"
" //\n"
" // The cutoff point between the two follows a quadratic curve on the same\n"
" // size. Based on some empirical testing, scaling that by 5 seems to fit\n"
" // pretty closely and chooses the fastest one for the given sample and\n"
" // collection size.\n"
" if (count * count * 5 < list.count) {\n"
" // Pick random elements and retry if you hit a previously chosen one.\n"
" var picked = {}\n"
" var result = []\n"
" for (i in 0...count) {\n"
" // Find an index that we haven't already selected.\n"
" var index\n"
" while (true) {\n"
" index = int(count)\n"
" if (!picked.containsKey(index)) break\n"
" }\n"
"\n"
" picked[index] = true\n"
" result.add(list[index])\n"
" }\n"
"\n"
" return result\n"
" } else {\n"
" // Jeffrey Vitter's Algorithm R.\n"
"\n"
" // Fill the reservoir with the first elements in the list.\n"
" var result = list[0...count]\n"
"\n"
" // We want to ensure the results are always in random order, so shuffle\n"
" // them. In cases where the sample size is the entire collection, this\n"
" // devolves to running Fisher-Yates on a copy of the list.\n"
" shuffle(result)\n"
"\n"
" // Now walk the rest of the list. For each element, randomly consider\n"
" // replacing one of the reservoir elements with it. The probability here\n"
" // works out such that it does this uniformly.\n"
" for (i in count...list.count) {\n"
" var slot = int(0, i + 1)\n"
" if (slot < count) result[slot] = list[i]\n"
" }\n"
"\n"
" return result\n"
" }\n"
" }\n"
"\n"
" shuffle(list) {\n"
" if (list.isEmpty) return\n"
"\n"

View File

@ -0,0 +1,19 @@
import "random" for Random
var random = Random.new(12345)
// Should choose all elements with roughly equal probability.
var list = ["a", "b", "c"]
var histogram = {}
for (i in 1..5000) {
var sample = random.sample(list, 3)
var string = sample.toString
if (!histogram.containsKey(string)) histogram[string] = 0
histogram[string] = histogram[string] + 1
}
System.print(histogram.count) // expect: 6
for (key in histogram.keys) {
var error = (histogram[key] / (5000 / 6) - 1).abs
if (error > 0.1) System.print("!!! %(error)")
}

View File

@ -0,0 +1,19 @@
import "random" for Random
var random = Random.new(12345)
// Should choose all elements with roughly equal probability.
var list = ["a", "b", "c", "d"]
var histogram = {}
for (i in 1..5000) {
var sample = random.sample(list, 3)
var string = sample.toString
if (!histogram.containsKey(string)) histogram[string] = 0
histogram[string] = histogram[string] + 1
}
System.print(histogram.count) // expect: 24
for (key in histogram.keys) {
var error = (histogram[key] / (5000 / 24) - 1).abs
if (error > 0.2) System.print("!!! %(error)")
}

View File

@ -0,0 +1,23 @@
import "random" for Random
var random = Random.new(12345)
// Single element list.
System.print(random.sample(["single"], 1)) // expect: [single]
// Should choose all elements with roughly equal probability.
var list = ["a", "b", "c", "d", "e"]
var histogram = {}
for (i in 1..5000) {
var sample = random.sample(list, 1)
var string = sample.toString
if (!histogram.containsKey(string)) histogram[string] = 0
histogram[string] = histogram[string] + 1
}
System.print(histogram.count) // expect: 5
for (key in histogram.keys) {
var error = (histogram[key] / (5000 / list.count) - 1).abs
if (error > 0.1) System.print("!!! %(error)")
}

View File

@ -0,0 +1,5 @@
import "random" for Random
var random = Random.new(12345)
random.sample([1, 2, 3], 4) // expect runtime error: Not enough elements to sample.

View File

@ -0,0 +1,6 @@
import "random" for Random
var random = Random.new(12345)
System.print(random.sample([], 0)) // expect: []
System.print(random.sample([1, 2, 3], 0)) // expect: []

View File

@ -0,0 +1,20 @@
import "random" for Random
var random = Random.new(12345)
// Single element list.
System.print(random.sample(["single"])) // expect: single
// Should choose all elements with roughly equal probability.
var list = ["a", "b", "c", "d", "e"]
var histogram = {"a": 0, "b": 0, "c": 0, "d": 0, "e": 0}
for (i in 1..1000) {
var sample = random.sample(list)
histogram[sample] = histogram[sample] + 1
}
System.print(histogram.count) // expect: 5
for (key in histogram.keys) {
var error = (histogram[key] / (1000 / list.count) - 1).abs
if (error > 0.2) System.print("!!! %(error)")
}

View File

@ -0,0 +1,5 @@
import "random" for Random
var random = Random.new(12345)
random.sample([]) // expect runtime error: Not enough elements to sample.

View File

@ -12,12 +12,21 @@ list = [1]
random.shuffle(list)
System.print(list) // expect: [1]
// Given enough tries, should generate all permutations.
var hits = {}
for (i in 1..200) {
// Given enough tries, should generate all permutations with roughly equal
// probability.
var histogram = {}
for (i in 1..5000) {
var list = [1, 2, 3, 4]
random.shuffle(list)
hits[list.toString] = true
var string = list.toString
if (!histogram.containsKey(string)) histogram[string] = 0
histogram[string] = histogram[string] + 1
}
System.print(histogram.count) // expect: 24
for (key in histogram.keys) {
var error = (histogram[key] / (5000 / 24) - 1).abs
if (error > 0.2) System.print("!!! %(error)")
}
System.print(hits.count) // expect: 24