Skip to content

Commit

Permalink
Improved test partitioning
Browse files Browse the repository at this point in the history
  • Loading branch information
camsaul committed Sep 5, 2024
1 parent d2c8235 commit 9577006
Show file tree
Hide file tree
Showing 2 changed files with 145 additions and 59 deletions.
100 changes: 82 additions & 18 deletions src/mb/hawk/core.clj
Original file line number Diff line number Diff line change
Expand Up @@ -121,23 +121,87 @@
[_nil options]
(find-tests (classpath/system-classpath) options))

(defn- partition-all-into-n-partitions
"Split sequence `xs` into `num-partitions` as equally as possible. Guaranteed to return `num-partitions`. This custom
function is used instead of [[partition-all]] or whatever because we want to make sure every partition gets tests,
even with weird combinations like 4 tests with 3 partitions or 29 tests with 10 partitions."
[num-partitions xs]
(defn- namespace*
"Like [[clojure.core/namespace]] but handles vars."
[x]
(cond
(instance? clojure.lang.Named x) (namespace x)
(var? x) (namespace (symbol x))
:else nil))

(defn- ensure-test-namespaces-are-contiguous
"Make sure `test-vars` have all the tests for each namespace next to one another so when we split we can do so without
splitting in the middle of a namespace. Does not otherwise change the order of the tests or namespaces."
[test-vars]
(let [namespace->sort-position (into {}
(map-indexed
(fn [i nmspace]
[nmspace i]))
(distinct (map namespace* test-vars)))
test-var->sort-position (into {}
(map-indexed
(fn [i varr]
[varr i]))
test-vars)]
(sort-by (juxt #(namespace->sort-position (namespace* %))
test-var->sort-position)
test-vars)))

(defn- make-test-var->partition [num-partitions test-vars]
(let [;; first figure out approximately how big each partition should be.
target-partition-size (/ (count test-vars) num-partitions)
;; then for each test var figure out what partition it would live in ideally if we weren't worried about making
;; sure namespaces are grouped together.
test-var->ideal-partition (into {}
(map-indexed (fn [i test-var]
(let [ideal-partition (long (math/floor (/ i target-partition-size)))]
(assert (<= 0 ideal-partition (dec num-partitions)))
[test-var ideal-partition]))
test-vars))
;; For each namespace figure out how many tests are in each and what the possible partitions we can put that
;; namespace into. For most namespaces there should only be one possible partition but for some the ideal split
;; happens in the middle of the namespace which means we have two possible candidate partitions to put it into.
namespace->num-tests (reduce
(fn [m test-var]
(update m (namespace* test-var) (fnil inc 0)))
{}
test-vars)
namespace->possible-partitions (reduce
(fn [m test-var]
(update m (namespace* test-var) #(conj (set %) (test-var->ideal-partition test-var))))
{}
test-vars)
;; Decide the canonical partition for each namespace. Keep track of how many tests are in each partititon. If
;; there are multiple possible candidate partitions for a namespace, choose the one that has the least tests in
;; it.
namespace->partition (:namespace->partition
(reduce
(fn [m nmspace]
(let [partition (first (sort-by (fn [partition]
(get-in m [:partition->size partition]))
(namespace->possible-partitions nmspace)))]
(-> m
(update-in [:partition->size partition] (fnil + 0) (namespace->num-tests nmspace))
(assoc-in [:namespace->partition nmspace] partition))))
{}
;; process namespaces in the order they appear in test-vars
(distinct (map namespace* test-vars))))]
(fn test-var->partition [test-var]
(get namespace->partition (namespace* test-var)))))

(defn- partition-tests-into-n-partitions
"Split a sequence of `test-vars` into `num-partitions` (returning a map of partition number => sequence of tests).
Attempts to divide tests up into partitions that are as equal as possible, but keeps tests in the same namespace
grouped together."
[num-partitions test-vars]
{:post [(= (count %) num-partitions)]}
;; make sure the partitioning is deterministic -- `xs` should always come back in the same order but we should sort
;; just to be safe.
(let [xs (sort-by str xs)
partition-size (/ (count xs) num-partitions)]
(into []
(comp (map-indexed (fn [i x]
[(long (math/floor (/ i partition-size))) x]))
(partition-by first)
(map (fn [partition]
(map second partition))))
xs)))
(let [test-vars (ensure-test-namespaces-are-contiguous test-vars)
test-var->partition (make-test-var->partition num-partitions test-vars)]
(reduce
(fn [m test-var]
(update m (test-var->partition test-var) #(conj (vec %) test-var)))
(sorted-map)
test-vars)))

(defn- partition-tests [tests {num-partitions :partition/total, partition-index :partition/index, :as _options}]
(if (or num-partitions partition-index)
Expand All @@ -152,8 +216,8 @@
"Invalid :partition/index - must be an integer")
(assert (<= 0 partition-index (dec num-partitions))
(format "Invalid :partition/index - must be between 0 and %d" (dec num-partitions)))
(let [partitions (partition-all-into-n-partitions num-partitions tests)
partition (nth partitions partition-index)]
(let [partition-index->tests (partition-tests-into-n-partitions num-partitions tests)
partition (get partition-index->tests partition-index)]
(printf "Running tests in partition %d of %d (%d tests of %d)...\n"
(inc partition-index)
num-partitions
Expand Down
104 changes: 63 additions & 41 deletions test/mb/hawk/core_test.clj
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
(ns ^:exclude-tags-test ^:mic/test mb.hawk.core-test
(:require
[clojure.test :refer :all]
[mb.hawk.core :as hawk]))
[mb.hawk.core :as hawk]
[mb.hawk.parallel-test]
[mb.hawk.speak-test]))

(deftest ^:exclude-this-test find-tests-test
(testing "symbol naming"
Expand Down Expand Up @@ -59,47 +61,67 @@
{:exclude-tags #{:exclude-this-test}}
{:exclude-tags [:exclude-this-test :another/tag]}))

(defn- partition-tests* [num-partitions tests]
(into (sorted-map)
(map (fn [i]
[i (#'hawk/partition-tests
tests
{:partition/index i, :partition/total num-partitions})]))
(range num-partitions)))

(deftest ^:parallel partition-tests-test
(are [i expected] (= expected
(#'hawk/partition-tests
(range 4)
{:partition/index i, :partition/total 3}))
0 [0 1]
1 [2]
2 [3])
(are [i expected] (= expected
(#'hawk/partition-tests
(range 5)
{:partition/index i, :partition/total 3}))
0 [0 1]
1 [2 3]
2 [4]))
(is (= '{0 [a/test b/test]
1 [c/test]
2 [d/test]}
(partition-tests* 3 '[a/test b/test c/test d/test])))
(is (= '{0 [a/test b/test]
1 [c/test d/test]
2 [e/test]}
(partition-tests* 3 '[a/test b/test c/test d/test e/test]))))

(deftest ^:parallel partition-tests-evenly-test
(testing "make sure we divide things roughly evenly"
(is (= '{0 [n00/test n01/test n02/test]
1 [n03/test n04/test n05/test]
2 [n06/test n07/test]
3 [n08/test n09/test n10/test]
4 [n11/test n12/test]
5 [n13/test n14/test n15/test]
6 [n16/test n17/test n18/test]
7 [n19/test n20/test]
8 [n21/test n22/test n23/test]
9 [n24/test n25/test]}
(partition-tests* 10 (map #(symbol (format "n%02d/test" %)) (range 26)))))))

(deftest ^:parallel partition-should-not-split-in-the-middle-of-a-namespace-test
(testing "Partitioning should not split in the middle of a namespace"
(is (= '{0 [a/test-1 a/test-2 a/test-3]
1 [b/test-1]}
(partition-tests* 2 '[a/test-1 a/test-2 a/test-3 b/test-1])))))

(deftest ^:parallel partition-tests-determinism-test
(testing "partitioning should be deterministic even if tests come back in a non-deterministic order for some reason"
(are [i expected] (= expected
(#'hawk/partition-tests
(shuffle (map #(format "%02d" %) (range 26)))
{:partition/index i, :partition/total 10}))
0 ["00" "01" "02"]
1 ["03" "04" "05"]
2 ["06" "07"]
3 ["08" "09" "10"]
4 ["11" "12"]
5 ["13" "14" "15"]
6 ["16" "17" "18"]
7 ["19" "20"]
8 ["21" "22" "23"]
9 ["24" "25"])))
(deftest ^:parallel partition-preserve-order-test
(testing "Partitioning should preserve order of namespaces and vars"
(is (= '{0 [b/test-1 b/test-2 b/test-3]
1 [a/test-1 a/test-3 a/test-2]}
(partition-tests* 2 '[b/test-1 b/test-2 b/test-3 a/test-1 a/test-3 a/test-2])))))

(deftest ^:parallel partition-test
(are [index expected] (= expected
(hawk/find-tests-with-options {:only `[find-tests-test
exclude-tags-test
partition-tests-test
partition-test]
:partition/index index
:partition/total 3}))
0 [#'exclude-tags-test #'find-tests-test]
1 [#'partition-test]
2 [#'partition-tests-test]))
(is (= {0 [#'find-tests-test
#'exclude-tags-test]
1 [#'mb.hawk.speak-test/speak-results-test]
2 [#'mb.hawk.parallel-test/ns-parallel-test
#'mb.hawk.parallel-test/var-not-parallel-test]}
(into (sorted-map)
(map (fn [i]
[i (hawk/find-tests-with-options
{:only [`find-tests-test
'mb.hawk.speak-test/speak-results-test
;; this var intentionally comes after a different var in a different
;; namespace to make sure we partition things in a way that groups
;; namespaces together
`exclude-tags-test
'mb.hawk.parallel-test/ns-parallel-test
'mb.hawk.parallel-test/var-not-parallel-test]
:partition/index i
:partition/total 3})]))
(range 3)))))

0 comments on commit 9577006

Please sign in to comment.