aboutsummaryrefslogtreecommitdiff
path: root/tools/buildman
diff options
context:
space:
mode:
authorSimon Glass2024-06-23 11:55:15 -0600
committerSimon Glass2024-07-03 07:36:33 +0100
commit5d679f801d05fb728678c23d75d0113512e43cca (patch)
treefe50dee2ae983a336c3be6f3c0b937d88b5be543 /tools/buildman
parent8941477e02717a7104f8400363979fc3831a4041 (diff)
buildman: Add a way to limit the number of buildmans
Buildman uses all available CPUs by default, so running more than one or two concurrent processes is not normally useful. However in some CI cases we want to be able to run several jobs at once to save time. For example, in a lab situation we may want to run a test on 20 boards at a time, since only the build step actually takes much CPU. Add an option which allows such a limit. When buildman starts up, it waits until the number of running processes goes below the limit, then claims a spot in the list. The list is maintained with a temporary file. Note that the temp file is user-specific, since it is hard to create a locked temporary file which can be accessed by any user. In most cases, only one user is running jobs on a machine, so this should not matter. Signed-off-by: Simon Glass <sjg@chromium.org>
Diffstat (limited to 'tools/buildman')
-rw-r--r--tools/buildman/buildman.rst5
-rw-r--r--tools/buildman/cmdline.py2
-rw-r--r--tools/buildman/control.py140
-rw-r--r--tools/buildman/pyproject.toml6
-rw-r--r--tools/buildman/test.py121
5 files changed, 272 insertions, 2 deletions
diff --git a/tools/buildman/buildman.rst b/tools/buildman/buildman.rst
index bd0482af5f7..b8ff3bf1ab2 100644
--- a/tools/buildman/buildman.rst
+++ b/tools/buildman/buildman.rst
@@ -1286,6 +1286,11 @@ then buildman hangs. Failing to handle any eventuality is a bug in buildman and
should be reported. But you can use -T0 to disable threading and hopefully
figure out the root cause of the build failure.
+For situations where buildman is invoked from multiple running processes, it is
+sometimes useful to have buildman wait until the others have finished. Use the
+--process-limit option for this: --process-limit 1 will allow only one buildman
+to process jobs at a time.
+
Build summary
-------------
diff --git a/tools/buildman/cmdline.py b/tools/buildman/cmdline.py
index 8dc5a8787b5..544a391a464 100644
--- a/tools/buildman/cmdline.py
+++ b/tools/buildman/cmdline.py
@@ -129,6 +129,8 @@ def add_after_m(parser):
default=False, help="Use an O= (output) directory per board rather than per thread")
parser.add_argument('--print-arch', action='store_true',
default=False, help="Print the architecture for a board (ARCH=)")
+ parser.add_argument('--process-limit', type=int,
+ default=0, help='Limit to number of buildmans running at once')
parser.add_argument('-r', '--reproducible-builds', action='store_true',
help='Set SOURCE_DATE_EPOCH=0 to suuport a reproducible build')
parser.add_argument('-R', '--regen-board-list', type=str,
diff --git a/tools/buildman/control.py b/tools/buildman/control.py
index f2dd87814c3..464835c5be5 100644
--- a/tools/buildman/control.py
+++ b/tools/buildman/control.py
@@ -7,10 +7,13 @@
This holds the main control logic for buildman, when not running tests.
"""
+import getpass
import multiprocessing
import os
import shutil
import sys
+import tempfile
+import time
from buildman import boards
from buildman import bsettings
@@ -21,10 +24,23 @@ from patman import gitutil
from patman import patchstream
from u_boot_pylib import command
from u_boot_pylib import terminal
-from u_boot_pylib.terminal import tprint
+from u_boot_pylib import tools
+from u_boot_pylib.terminal import print_clear, tprint
TEST_BUILDER = None
+# Space-separated list of buildman process IDs currently running jobs
+RUNNING_FNAME = f'buildmanq.{getpass.getuser()}'
+
+# Lock file for access to RUNNING_FILE
+LOCK_FNAME = f'{RUNNING_FNAME}.lock'
+
+# Wait time for access to lock (seconds)
+LOCK_WAIT_S = 10
+
+# Wait time to start running
+RUN_WAIT_S = 300
+
def get_plural(count):
"""Returns a plural 's' if count is not 1"""
return 's' if count != 1 else ''
@@ -578,6 +594,125 @@ def calc_adjust_cfg(adjust_cfg, reproducible_builds):
return adjust_cfg
+def read_procs(tmpdir=tempfile.gettempdir()):
+ """Read the list of running buildman processes
+
+ If the list is corrupted, returns an empty list
+
+ Args:
+ tmpdir (str): Temporary directory to use (for testing only)
+ """
+ running_fname = os.path.join(tmpdir, RUNNING_FNAME)
+ procs = []
+ if os.path.exists(running_fname):
+ items = tools.read_file(running_fname, binary=False).split()
+ try:
+ procs = [int(x) for x in items]
+ except ValueError: # Handle invalid format
+ pass
+ return procs
+
+
+def check_pid(pid):
+ """Check for existence of a unix PID
+
+ https://stackoverflow.com/questions/568271/how-to-check-if-there-exists-a-process-with-a-given-pid-in-python
+
+ Args:
+ pid (int): PID to check
+
+ Returns:
+ True if it exists, else False
+ """
+ try:
+ os.kill(pid, 0)
+ except OSError:
+ return False
+ else:
+ return True
+
+
+def write_procs(procs, tmpdir=tempfile.gettempdir()):
+ """Write the list of running buildman processes
+
+ Args:
+ tmpdir (str): Temporary directory to use (for testing only)
+ """
+ running_fname = os.path.join(tmpdir, RUNNING_FNAME)
+ tools.write_file(running_fname, ' '.join([str(p) for p in procs]),
+ binary=False)
+
+ # Allow another user to access the file
+ os.chmod(running_fname, 0o666)
+
+def wait_for_process_limit(limit, tmpdir=tempfile.gettempdir(),
+ pid=os.getpid()):
+ """Wait until the number of buildman processes drops to the limit
+
+ This uses FileLock to protect a 'running' file, which contains a list of
+ PIDs of running buildman processes. The number of PIDs in the file indicates
+ the number of running processes.
+
+ When buildman starts up, it calls this function to wait until it is OK to
+ start the build.
+
+ On exit, no attempt is made to remove the PID from the file, since other
+ buildman processes will notice that the PID is no-longer valid, and ignore
+ it.
+
+ Two timeouts are provided:
+ LOCK_WAIT_S: length of time to wait for the lock; if this occurs, the
+ lock is busted / removed before trying again
+ RUN_WAIT_S: length of time to wait to be allowed to run; if this occurs,
+ the build starts, with the PID being added to the file.
+
+ Args:
+ limit (int): Maximum number of buildman processes, including this one;
+ must be > 0
+ tmpdir (str): Temporary directory to use (for testing only)
+ pid (int): Current process ID (for testing only)
+ """
+ from filelock import Timeout, FileLock
+
+ running_fname = os.path.join(tmpdir, RUNNING_FNAME)
+ lock_fname = os.path.join(tmpdir, LOCK_FNAME)
+ lock = FileLock(lock_fname)
+
+ # Allow another user to access the file
+ col = terminal.Color()
+ tprint('Waiting for other buildman processes...', newline=False,
+ colour=col.RED)
+
+ claimed = False
+ deadline = time.time() + RUN_WAIT_S
+ while True:
+ try:
+ with lock.acquire(timeout=LOCK_WAIT_S):
+ os.chmod(lock_fname, 0o666)
+ procs = read_procs(tmpdir)
+
+ # Drop PIDs which are not running
+ procs = list(filter(check_pid, procs))
+
+ # If we haven't hit the limit, add ourself
+ if len(procs) < limit:
+ tprint('done...', newline=False)
+ claimed = True
+ if time.time() >= deadline:
+ tprint('timeout...', newline=False)
+ claimed = True
+ if claimed:
+ write_procs(procs + [pid], tmpdir)
+ break
+
+ except Timeout:
+ tprint('failed to get lock: busting...', newline=False)
+ os.remove(lock_fname)
+
+ time.sleep(1)
+ tprint('starting build', newline=False)
+ print_clear()
+
def do_buildman(args, toolchains=None, make_func=None, brds=None,
clean_dir=False, test_thread_exceptions=False):
"""The main control code for buildman
@@ -677,5 +812,8 @@ def do_buildman(args, toolchains=None, make_func=None, brds=None,
TEST_BUILDER = builder
+ if args.process_limit:
+ wait_for_process_limit(args.process_limit)
+
return run_builder(builder, series.commits if series else None,
brds.get_selected_dict(), args)
diff --git a/tools/buildman/pyproject.toml b/tools/buildman/pyproject.toml
index fe0f6421b53..68bfa45c3f4 100644
--- a/tools/buildman/pyproject.toml
+++ b/tools/buildman/pyproject.toml
@@ -8,7 +8,11 @@ version = "0.0.6"
authors = [
{ name="Simon Glass", email="sjg@chromium.org" },
]
-dependencies = ["u_boot_pylib >= 0.0.6", "patch-manager >= 0.0.6"]
+dependencies = [
+ "filelock >= 3.0.12",
+ "u_boot_pylib >= 0.0.6",
+ "patch-manager >= 0.0.6"
+]
description = "Buildman build tool for U-Boot"
readme = "README.rst"
requires-python = ">=3.7"
diff --git a/tools/buildman/test.py b/tools/buildman/test.py
index 79164bd1993..bfad3093030 100644
--- a/tools/buildman/test.py
+++ b/tools/buildman/test.py
@@ -2,12 +2,14 @@
# Copyright (c) 2012 The Chromium OS Authors.
#
+from filelock import FileLock
import os
import shutil
import sys
import tempfile
import time
import unittest
+from unittest.mock import patch
from buildman import board
from buildman import boards
@@ -156,6 +158,11 @@ class TestBuild(unittest.TestCase):
if not os.path.isdir(self.base_dir):
os.mkdir(self.base_dir)
+ self.cur_time = 0
+ self.valid_pids = []
+ self.finish_time = None
+ self.finish_pid = None
+
def tearDown(self):
shutil.rmtree(self.base_dir)
@@ -747,6 +754,120 @@ class TestBuild(unittest.TestCase):
self.assertEqual([
['MARY="mary"', 'Missing expected line: CONFIG_MARY="mary"']], result)
+ def get_procs(self):
+ running_fname = os.path.join(self.base_dir, control.RUNNING_FNAME)
+ items = tools.read_file(running_fname, binary=False).split()
+ return [int(x) for x in items]
+
+ def get_time(self):
+ return self.cur_time
+
+ def inc_time(self, amount):
+ self.cur_time += amount
+
+ # Handle a process exiting
+ if self.finish_time == self.cur_time:
+ self.valid_pids = [pid for pid in self.valid_pids
+ if pid != self.finish_pid]
+
+ def kill(self, pid, signal):
+ if pid not in self.valid_pids:
+ raise OSError('Invalid PID')
+
+ def test_process_limit(self):
+ """Test wait_for_process_limit() function"""
+ tmpdir = self.base_dir
+
+ with (patch('time.time', side_effect=self.get_time),
+ patch('time.sleep', side_effect=self.inc_time),
+ patch('os.kill', side_effect=self.kill)):
+ # Grab the process. Since there is no other profcess, this should
+ # immediately succeed
+ control.wait_for_process_limit(1, tmpdir=tmpdir, pid=1)
+ lines = terminal.get_print_test_lines()
+ self.assertEqual(0, self.cur_time)
+ self.assertEqual('Waiting for other buildman processes...',
+ lines[0].text)
+ self.assertEqual(self._col.RED, lines[0].colour)
+ self.assertEqual(False, lines[0].newline)
+ self.assertEqual(True, lines[0].bright)
+
+ self.assertEqual('done...', lines[1].text)
+ self.assertEqual(None, lines[1].colour)
+ self.assertEqual(False, lines[1].newline)
+ self.assertEqual(True, lines[1].bright)
+
+ self.assertEqual('starting build', lines[2].text)
+ self.assertEqual([1], control.read_procs(tmpdir))
+ self.assertEqual(None, lines[2].colour)
+ self.assertEqual(False, lines[2].newline)
+ self.assertEqual(True, lines[2].bright)
+
+ # Try again, with a different PID...this should eventually timeout
+ # and start the build anyway
+ self.cur_time = 0
+ self.valid_pids = [1]
+ control.wait_for_process_limit(1, tmpdir=tmpdir, pid=2)
+ lines = terminal.get_print_test_lines()
+ self.assertEqual('Waiting for other buildman processes...',
+ lines[0].text)
+ self.assertEqual('timeout...', lines[1].text)
+ self.assertEqual(None, lines[1].colour)
+ self.assertEqual(False, lines[1].newline)
+ self.assertEqual(True, lines[1].bright)
+ self.assertEqual('starting build', lines[2].text)
+ self.assertEqual([1, 2], control.read_procs(tmpdir))
+ self.assertEqual(control.RUN_WAIT_S, self.cur_time)
+
+ # Check lock-busting
+ self.cur_time = 0
+ self.valid_pids = [1, 2]
+ lock_fname = os.path.join(tmpdir, control.LOCK_FNAME)
+ lock = FileLock(lock_fname)
+ lock.acquire(timeout=1)
+ control.wait_for_process_limit(1, tmpdir=tmpdir, pid=3)
+ lines = terminal.get_print_test_lines()
+ self.assertEqual('Waiting for other buildman processes...',
+ lines[0].text)
+ self.assertEqual('failed to get lock: busting...', lines[1].text)
+ self.assertEqual(None, lines[1].colour)
+ self.assertEqual(False, lines[1].newline)
+ self.assertEqual(True, lines[1].bright)
+ self.assertEqual('timeout...', lines[2].text)
+ self.assertEqual('starting build', lines[3].text)
+ self.assertEqual([1, 2, 3], control.read_procs(tmpdir))
+ self.assertEqual(control.RUN_WAIT_S, self.cur_time)
+ lock.release()
+
+ # Check handling of dead processes. Here we have PID 2 as a running
+ # process, even though the PID file contains 1, 2 and 3. So we can
+ # add one more PID, to make 2 and 4
+ self.cur_time = 0
+ self.valid_pids = [2]
+ control.wait_for_process_limit(2, tmpdir=tmpdir, pid=4)
+ lines = terminal.get_print_test_lines()
+ self.assertEqual('Waiting for other buildman processes...',
+ lines[0].text)
+ self.assertEqual('done...', lines[1].text)
+ self.assertEqual('starting build', lines[2].text)
+ self.assertEqual([2, 4], control.read_procs(tmpdir))
+ self.assertEqual(0, self.cur_time)
+
+ # Try again, with PID 2 quitting at time 50. This allows the new
+ # build to start
+ self.cur_time = 0
+ self.valid_pids = [2, 4]
+ self.finish_pid = 2
+ self.finish_time = 50
+ control.wait_for_process_limit(2, tmpdir=tmpdir, pid=5)
+ lines = terminal.get_print_test_lines()
+ self.assertEqual('Waiting for other buildman processes...',
+ lines[0].text)
+ self.assertEqual('done...', lines[1].text)
+ self.assertEqual('starting build', lines[2].text)
+ self.assertEqual([4, 5], control.read_procs(tmpdir))
+ self.assertEqual(self.finish_time, self.cur_time)
+
if __name__ == "__main__":
unittest.main()