LibTest: Add more numeric generators

Rename unsigned_int generator to number_u32. Add generators: - number_u64 - number_f64 - percentage
2025-09-18 22:26:17 +00:00 · 2023-12-30 16:23:59 +01:00 · 2023-12-30 16:23:59 +01:00 · d52ffcd830
commit d52ffcd830
parent dd53f64d2f
15 changed files with 312 additions and 133 deletions
--- a/Userland/Libraries/LibTest/Randomized/Generator.h
+++ b/Userland/Libraries/LibTest/Randomized/Generator.h
@ -15,22 +15,24 @@
 #include <AK/StringView.h>
 #include <AK/Tuple.h>

+#include <math.h>
+
 namespace Test {
 namespace Randomized {

 // Returns a random double value in range 0..1.
-inline double get_random_probability()
+// This is not a generator. It is meant to be used inside RandomnessSource::draw_value().
+// Based on: https://dotat.at/@/2023-06-23-random-double.html
+inline f64 get_random_probability()
 {
-    static constexpr u32 max_u32 = NumericLimits<u32>::max();
-    u32 random_u32 = AK::get_random_uniform(max_u32);
-    return static_cast<double>(random_u32) / static_cast<double>(max_u32);
+    return static_cast<f64>(AK::get_random<u64>() >> 11) * 0x1.0p-53;
 }

 // Generators take random bits from the RandomnessSource and return a value
 // back.
 //
 // Example:
-// - Gen::u32(5,10) --> 9, 7, 5, 10, 8, ...
+// - Gen::number_u64(5,10) --> 9, 7, 5, 10, 8, ...
 namespace Gen {

 // An unsigned integer generator.
@ -38,41 +40,41 @@ namespace Gen {
 // The minimum value will always be 0.
 // The maximum value is given by user in the argument.
 //
-// Gen::unsigned_int(10) -> value 5, RandomRun [5]
-//                       -> value 8, RandomRun [8]
-//                       etc.
+// Gen::number_u64(10) -> value 5, RandomRun [5]
+//                     -> value 8, RandomRun [8]
+//                     etc.
 //
 // Shrinks towards 0.
-inline u32 unsigned_int(u32 max)
+inline u64 number_u64(u64 max)
 {
    if (max == 0)
        return 0;

-    u32 random = Test::randomness_source().draw_value(max, [&]() {
-        // `clamp` to guard against integer overflow and calling get_random_uniform(0).
-        u32 exclusive_bound = AK::clamp(max + 1, max, NumericLimits<u32>::max());
-        return AK::get_random_uniform(exclusive_bound);
+    u64 random = Test::randomness_source().draw_value(max, [&]() {
+        // `clamp` to guard against integer overflow
+        u64 exclusive_bound = AK::clamp(max + 1, max, NumericLimits<u64>::max());
+        return AK::get_random_uniform_64(exclusive_bound);
    });
    return random;
 }

 // An unsigned integer generator in a particular range.
 //
-// Gen::unsigned_int(3,10) -> value 3,  RandomRun [0]
-//                         -> value 8,  RandomRun [5]
-//                         -> value 10, RandomRun [7]
-//                         etc.
+// Gen::number_u64(3,10) -> value 3,  RandomRun [0]
+//                       -> value 8,  RandomRun [5]
+//                       -> value 10, RandomRun [7]
+//                       etc.
 //
 // In case `min == max`, the RandomRun footprint will be smaller: no randomness
 // is needed.
 //
-// Gen::unsigned_int(3,3) -> value 3, RandomRun [] (always)
+// Gen::number_u64(3,3) -> value 3, RandomRun [] (always)
 //
 // Shrinks towards the minimum.
-inline u32 unsigned_int(u32 min, u32 max)
+inline u64 number_u64(u64 min, u64 max)
 {
    VERIFY(max >= min);
-    return unsigned_int(max - min) + min;
+    return number_u64(max - min) + min;
 }

 // Randomly (uniformly) selects a value out of the given arguments.
@ -89,7 +91,7 @@ CommonType<Ts...> one_of(Ts... choices)
    Vector<CommonType<Ts...>> choices_vec { choices... };

    constexpr size_t count = sizeof...(choices);
-    size_t i = unsigned_int(count - 1);
+    size_t i = number_u64(count - 1);
    return choices_vec[i];
 }

@ -118,16 +120,16 @@ CommonType<Ts...> frequency(Choice<Ts>... choices)
 {
    Vector<Choice<CommonType<Ts...>>> choices_vec { choices... };

-    u32 sum = 0;
+    u64 sum = 0;
    for (auto const& choice : choices_vec) {
        VERIFY(choice.weight > 0);
-        sum += static_cast<u32>(choice.weight);
+        sum += static_cast<u64>(choice.weight);
    }

-    u32 target = unsigned_int(sum);
+    u64 target = number_u64(sum);
    size_t i = 0;
    for (auto const& choice : choices_vec) {
-        u32 weight = static_cast<u32>(choice.weight);
+        u64 weight = static_cast<u64>(choice.weight);
        if (weight >= target) {
            return choice.value;
        }
@ -137,46 +139,47 @@ CommonType<Ts...> frequency(Choice<Ts>... choices)
    return choices_vec[i - 1].value;
 }

-// An unsigned integer generator in the full u32 range.
+// An unsigned integer generator in the full u64 range.
 //
-// 8/17 (47%) of the time it will bias towards 8bit numbers,
-// 4/17 (23%) towards 4bit numbers,
-// 2/17 (12%) towards 16bit numbers,
-// 1/17 (6%) towards 32bit numbers,
-// 2/17 (12%) towards edge cases like 0 and NumericLimits::max() of various unsigned int types.
+// Prefers 8bit numbers, then 4bit, 16bit, 32bit and 64bit ones.
+// Around 11% of the time it tries edge cases like 0 and various NumericLimits::max().
 //
-// Gen::unsigned_int() -> value 3,     RandomRun [0,3]
-//                     -> value 8,     RandomRun [1,8]
-//                     -> value 100,   RandomRun [2,100]
-//                     -> value 5,     RandomRun [3,5]
-//                     -> value 255,   RandomRun [4,1]
-//                     -> value 65535, RandomRun [4,2]
-//                     etc.
+// Gen::number_u64() -> value 3,     RandomRun [0,3]
+//                   -> value 8,     RandomRun [1,8]
+//                   -> value 100,   RandomRun [2,100]
+//                   -> value 5,     RandomRun [3,5]
+//                   -> value 255,   RandomRun [4,1]
+//                   -> value 65535, RandomRun [4,2]
+//                   etc.
 //
 // Shrinks towards 0.
-inline u32 unsigned_int()
+inline u64 number_u64()
 {
-    u32 bits = frequency(
+    u64 bits = frequency(
        // weight, bits
        Choice { 4, 4 },
        Choice { 8, 8 },
        Choice { 2, 16 },
        Choice { 1, 32 },
+        Choice { 1, 64 },
        Choice { 2, 0 });

    // The special cases go last as they can be the most extreme (large) values.

    if (bits == 0) {
-        // Special cases, eg. max integers for u8, u16, u32.
+        // Special cases, eg. max integers for u8, u16, u32, u64.
        return one_of(
            0U,
            NumericLimits<u8>::max(),
            NumericLimits<u16>::max(),
-            NumericLimits<u32>::max());
+            NumericLimits<u32>::max(),
+            NumericLimits<u64>::max());
    }

-    u32 max = ((u64)1 << bits) - 1;
-    return unsigned_int(max);
+    u64 max = bits == 64
+        ? NumericLimits<u64>::max()
+        : ((u64)1 << bits) - 1;
+    return number_u64(max);
 }

 // A generator returning `true` with the given `probability` (0..1).
@ -190,15 +193,15 @@ inline u32 unsigned_int()
 //   -> value true,  RandomRun [1]
 //
 // Shrinks towards false.
-inline bool weighted_boolean(double probability)
+inline bool weighted_boolean(f64 probability)
 {
    if (probability <= 0)
        return false;
    if (probability >= 1)
        return true;

-    u32 random_int = Test::randomness_source().draw_value(1, [&]() {
-        double drawn_probability = get_random_probability();
+    u64 random_int = Test::randomness_source().draw_value(1, [&]() {
+        f64 drawn_probability = get_random_probability();
        return drawn_probability <= probability ? 1 : 0;
    });
    bool random_bool = random_int == 1;
@ -219,7 +222,7 @@ inline bool boolean()

 // A vector generator of a random length between the given limits.
 //
-// Gen::vector(2,3,[]() { return Gen::unsigned_int(5); })
+// Gen::vector(2,3,[]() { return Gen::number_u64(5); })
 //   -> value [1,5],      RandomRun [1,1,1,5,0]
 //   -> value [1,5,0],    RandomRun [1,1,1,5,1,0,0]
 //   etc.
@ -227,7 +230,7 @@ inline bool boolean()
 // In case `min == max`, the RandomRun footprint will be smaller, as there will
 // be no randomness involved in figuring out the length:
 //
-// Gen::vector(3,3,[]() { return Gen::unsigned_int(5); })
+// Gen::vector(3,3,[]() { return Gen::number_u64(5); })
 //   -> value [1,3], RandomRun [1,3]
 //   -> value [5,2], RandomRun [5,2]
 //   etc.
@ -266,7 +269,7 @@ inline Vector<InvokeResult<Fn>> vector(size_t min, size_t max, Fn item_gen)
        ++size;
    }

-    double average = static_cast<double>(min + max) / 2.0;
+    f64 average = static_cast<f64>(min + max) / 2.0;
    VERIFY(average > 0);

    // A geometric distribution: https://en.wikipedia.org/wiki/Geometric_distribution#Moments_and_cumulants
@ -279,7 +282,7 @@ inline Vector<InvokeResult<Fn>> vector(size_t min, size_t max, Fn item_gen)
    // That gives us `1 - 1/p`. Then, E(X) also contains the final success, so we
    // need to say `1 + average` instead of `average`, as it will mean "our X
    // items + the final failure that stops the process".
-    double probability = 1.0 - 1.0 / (1.0 + average);
+    f64 probability = 1.0 - 1.0 / (1.0 + average);

    while (size < max) {
        if (weighted_boolean(probability)) {
@ -295,7 +298,7 @@ inline Vector<InvokeResult<Fn>> vector(size_t min, size_t max, Fn item_gen)

 // A vector generator of a given length.
 //
-// Gen::vector_of_length(3,[]() { return Gen::unsigned_int(5); })
+// Gen::vector_of_length(3,[]() { return Gen::number_u64(5); })
 //   -> value [1,5,0],    RandomRun [1,1,1,5,1,0,0]
 //   -> value [2,9,3],    RandomRun [1,2,1,9,1,3,0]
 //   etc.
@ -312,7 +315,7 @@ inline Vector<InvokeResult<Fn>> vector(size_t length, Fn item_gen)
 // If you need a different length, use vector(max,item_gen) or
 // vector(min,max,item_gen).
 //
-// Gen::vector([]() { return Gen::unsigned_int(5); })
+// Gen::vector([]() { return Gen::number_u64(5); })
 //   -> value [],         RandomRun [0]
 //   -> value [1],        RandomRun [1,1,0]
 //   -> value [1,5],      RandomRun [1,1,1,5,0]
@ -327,6 +330,118 @@ inline Vector<InvokeResult<Fn>> vector(Fn item_gen)
    return vector(0, 32, item_gen);
 }

+// A double generator in the [0,1) range.
+//
+// RandomRun footprint: a single number.
+//
+// Shrinks towards 0.
+//
+// Based on: https://dotat.at/@/2023-06-23-random-double.html
+inline f64 percentage()
+{
+    return static_cast<f64>(number_u64() >> 11) * 0x1.0p-53;
+}
+
+// An internal double generator. This one won't make any attempt to shrink nicely.
+// Test writers should use number_f64(f64 min, f64 max) instead.
+inline f64 number_f64_scaled(f64 min, f64 max)
+{
+    VERIFY(max >= min);
+
+    if (min == max)
+        return min;
+
+    f64 p = percentage();
+    return min * (1.0 - p) + max * p;
+}
+
+inline f64 number_f64(f64 min, f64 max)
+{
+    // FIXME: after we figure out how to use frequency() with lambdas,
+    // do edge cases and nicely shrinking float generators here
+
+    return number_f64_scaled(min, max);
+}
+
+inline f64 number_f64()
+{
+    // FIXME: this could be much nicer to the user, at the expense of code complexity
+    // We could follow Hypothesis' lead and remap integers 0..MAXINT to _simple_
+    // floats rather than small floats. Meaning, we would like to prefer integers
+    // over floats with decimal digits, positive numbers over negative numbers etc.
+    // As a result, users would get failures with floats like 0, 1, or 0.5 instead of
+    // ones like 1.175494e-38.
+    // Check the doc comment in Hypothesis: https://github.com/HypothesisWorks/hypothesis/blob/master/hypothesis-python/src/hypothesis/internal/conjecture/floats.py
+
+    return number_f64(NumericLimits<f64>::lowest(), NumericLimits<f64>::max());
+}
+
+// A double generator.
+//
+// The minimum value will always be NumericLimits<f64>::lowest().
+// The maximum value is given by user in the argument.
+//
+// Prefers positive numbers, then negative numbers, then edge cases.
+//
+// Shrinks towards 0.
+inline f64 number_f64(f64 max)
+{
+    // FIXME: after we figure out how to use frequency() with lambdas,
+    // do edge cases and nicely shrinking float generators here
+
+    return number_f64_scaled(NumericLimits<f64>::lowest(), max);
+}
+
+// TODO
+inline u32 number_u32(u32 max)
+{
+    if (max == 0)
+        return 0;
+
+    u32 random = Test::randomness_source().draw_value(max, [&]() {
+        // `clamp` to guard against integer overflow
+        u32 exclusive_bound = AK::clamp(max + 1, max, NumericLimits<u32>::max());
+        return AK::get_random_uniform(exclusive_bound);
+    });
+    return random;
+}
+
+// TODO
+inline u32 number_u32(u32 min, u32 max)
+{
+    VERIFY(max >= min);
+    return number_u32(max - min) + min;
+}
+
+// TODO
+inline u32 number_u32()
+{
+    u32 bits = frequency(
+        // weight, bits
+        Choice { 4, 4 },
+        Choice { 8, 8 },
+        Choice { 2, 16 },
+        Choice { 1, 32 },
+        Choice { 1, 64 },
+        Choice { 2, 0 });
+
+    // The special cases go last as they can be the most extreme (large) values.
+
+    if (bits == 0) {
+        // Special cases, eg. max integers for u8, u16, u32.
+        return one_of(
+            0U,
+            NumericLimits<u8>::max(),
+            NumericLimits<u16>::max(),
+            NumericLimits<u32>::max());
+    }
+
+    u32 max = bits == 32
+        ? NumericLimits<u32>::max()
+        : ((u32)1 << bits) - 1;
+    return number_u32(max);
+}
+
 } // namespace Gen
 } // namespace Randomized
 } // namespace Test
--- a/Userland/Libraries/LibTest/Randomized/RandomRun.h
+++ b/Userland/Libraries/LibTest/Randomized/RandomRun.h
@ -29,24 +29,24 @@ public:
    RandomRun() = default;
    RandomRun(RandomRun const& rhs) = default;
    RandomRun& operator=(RandomRun const& rhs) = default;
-    explicit RandomRun(Vector<u32> const& data)
+    explicit RandomRun(Vector<u64> const& data)
        : m_data(move(data))
    {
    }
    bool is_empty() const { return m_data.is_empty(); }
    bool contains_chunk(Chunk const& c) const { return c.index + c.size <= m_data.size(); }
-    void append(u32 n) { m_data.append(n); }
+    void append(u64 n) { m_data.append(n); }
    size_t size() const { return m_data.size(); }
-    Optional<u32> next()
+    Optional<u64> next()
    {
        if (m_current_index < m_data.size()) {
            return m_data[m_current_index++];
        }
-        return Optional<u32> {};
+        return Optional<u64> {};
    }
-    u32& operator[](size_t index) { return m_data[index]; }
-    u32 const& operator[](size_t index) const { return m_data[index]; }
-    Vector<u32> data() const { return m_data; }
+    u64& operator[](size_t index) { return m_data[index]; }
+    u64 const& operator[](size_t index) const { return m_data[index]; }
+    Vector<u64> data() const { return m_data; }

    // Shortlex sorting
    //
@ -85,7 +85,7 @@ public:

    RandomRun with_sorted(Chunk c) const
    {
-        Vector<u32> new_data = m_data;
+        Vector<u64> new_data = m_data;
        AK::dual_pivot_quick_sort(
            new_data,
            c.index,
@ -95,13 +95,13 @@ public:
    }
    RandomRun with_deleted(Chunk c) const
    {
-        Vector<u32> new_data(m_data);
+        Vector<u64> new_data(m_data);
        new_data.remove(c.index, c.size);
        return RandomRun(move(new_data));
    }

 private:
-    Vector<u32> m_data;
+    Vector<u64> m_data;
    size_t m_current_index = 0;
 };

--- a/Userland/Libraries/LibTest/Randomized/RandomnessSource.h
+++ b/Userland/Libraries/LibTest/Randomized/RandomnessSource.h
@ -26,11 +26,11 @@ public:
    static RandomnessSource live() { return RandomnessSource(RandomRun(), true); }
    static RandomnessSource recorded(RandomRun const& run) { return RandomnessSource(run, false); }
    RandomRun& run() { return m_run; }
-    u32 draw_value(u32 max, Function<u32()> random_generator)
+    u64 draw_value(u64 max, Function<u64()> random_generator)
    {
        // Live: use the random generator and remember the value.
        if (m_is_live) {
-            u32 value = random_generator();
+            u64 value = random_generator();
            m_run.append(value);
            return value;
        }
--- a/Userland/Libraries/LibTest/Randomized/Shrink.h
+++ b/Userland/Libraries/LibTest/Randomized/Shrink.h
@ -77,15 +77,15 @@ ShrinkResult keep_if_better(RandomRun const& new_run, RandomRun const& current_b
 }

 template<typename Fn, typename UpdateRunFn>
-ShrinkResult binary_shrink(u32 orig_low, u32 orig_high, UpdateRunFn update_run, RandomRun const& orig_run, Fn const& test_function)
+ShrinkResult binary_shrink(u64 orig_low, u64 orig_high, UpdateRunFn update_run, RandomRun const& orig_run, Fn const& test_function)
 {
    if (orig_low == orig_high) {
        return no_improvement(orig_run);
    }

    RandomRun current_best = orig_run;
-    u32 low = orig_low;
-    u32 high = orig_high;
+    u64 low = orig_low;
+    u64 high = orig_high;

    // Let's try with the best case (low = most shrunk) first
    RandomRun run_with_low = update_run(low, current_best);
@ -111,7 +111,7 @@ ShrinkResult binary_shrink(u32 orig_low, u32 orig_high, UpdateRunFn update_run,
    // pass/reject/overrun.
    ShrinkResult result = after_low;
    while (low + 1 < high) {
-        u32 mid = low + (high - low) / 2;
+        u64 mid = low + (high - low) / 2;
        RandomRun run_with_mid = update_run(mid, current_best);
        ShrinkResult after_mid = keep_if_better(run_with_mid, current_best, test_function);
        switch (after_mid.was_improvement) {
@ -194,7 +194,7 @@ ShrinkResult shrink_delete(DeleteChunkAndMaybeDecPrevious command, RandomRun con
 template<typename Fn>
 ShrinkResult shrink_minimize(MinimizeChoice command, RandomRun const& run, Fn const& test_function)
 {
-    u32 value = run[command.index];
+    u64 value = run[command.index];

    // We can't minimize 0! Already the best possible case.
    if (value == 0) {
@ -204,7 +204,7 @@ ShrinkResult shrink_minimize(MinimizeChoice command, RandomRun const& run, Fn co
    return binary_shrink(
        0,
        value,
-        [&](u32 new_value, RandomRun const& run) {
+        [&](u64 new_value, RandomRun const& run) {
            RandomRun copied_run = run;
            copied_run[command.index] = new_value;
            return copied_run;
@ -236,12 +236,12 @@ ShrinkResult shrink_redistribute(RedistributeChoicesAndMaybeInc command, RandomR

    ShrinkResult after_swap = keep_if_better(run_after_swap, current_best, test_function);
    current_best = after_swap.run;
-    u32 constant_sum = current_best[command.right_index] + current_best[command.left_index];
+    u64 constant_sum = current_best[command.right_index] + current_best[command.left_index];

    ShrinkResult after_redistribute = binary_shrink(
        0,
        current_best[command.left_index],
-        [&](u32 new_value, RandomRun const& run) {
+        [&](u64 new_value, RandomRun const& run) {
            RandomRun copied_run = run;
            copied_run[command.left_index] = new_value;
            copied_run[command.right_index] = constant_sum - new_value;
@ -275,7 +275,7 @@ ShrinkResult shrink_redistribute(RedistributeChoicesAndMaybeInc command, RandomR
    ShrinkResult after_inc_redistribute = binary_shrink(
        0,
        current_best[command.left_index],
-        [&](u32 new_value, RandomRun const& run) {
+        [&](u64 new_value, RandomRun const& run) {
            RandomRun copied_run = run;
            copied_run[command.left_index] = new_value;
            copied_run[command.right_index] = constant_sum - new_value;