diff options
author | Joshua Watt <jpewhacker@gmail.com> | 2019-09-17 08:37:11 -0500 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-09-18 17:52:01 +0100 |
commit | 20f032338ff3b4b25f2cbb7f975b5fd1c105004d (patch) | |
tree | 84c1a4693fdbaac0823e99d70ed7c814b890244c /bitbake/bin | |
parent | 34923e4f772fc57c29421741d2f622eb4009961c (diff) | |
download | poky-20f032338ff3b4b25f2cbb7f975b5fd1c105004d.tar.gz |
bitbake: bitbake: Rework hash equivalence
Reworks the hash equivalence server to address performance issues that
were encountered with the REST mechanism used previously, particularly
during the heavy request load encountered during signature generation.
Notable changes are:
1) The server protocol is no longer HTTP based. Instead, it uses a
simpler JSON over a streaming protocol link. This protocol has much
lower overhead than HTTP since it eliminates the HTTP headers.
2) The hash equivalence server can either bind to a TCP port, or a Unix
domain socket. Unix domain sockets are more efficient for local
communication, and so are preferred if the user enables hash
equivalence only for the local build. The arguments to the
'bitbake-hashserve' command have been updated accordingly.
3) The value to which BB_HASHSERVE should be set to enable a local hash
equivalence server is changed to "auto" instead of "localhost:0". The
latter didn't make sense when the local server was using a Unix
domain socket.
4) Clients are expected to keep a persistent connection to the server
instead of creating a new connection each time a request is made for
optimal performance.
5) Most of the client logic has been moved to the hashserve module in
bitbake. This makes it easier to share the client code.
6) A new bitbake command has been added called 'bitbake-hashclient'.
This command can be used to query a hash equivalence server, including
fetching the statistics and running a performance stress test.
7) The table indexes in the SQLite database have been updated to
optimize hash lookups. This change is backward compatible, as the
database will delete the old indexes first if they exist.
8) The server has been reworked to use python async to maximize
performance with persistently connected clients. This requires Python
3.5 or later.
(Bitbake rev: 2124eec3a5830afe8e07ffb6f2a0df6a417ac973)
Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/bin')
-rwxr-xr-x | bitbake/bin/bitbake-hashclient | 170 | ||||
-rwxr-xr-x | bitbake/bin/bitbake-hashserv | 24 | ||||
-rwxr-xr-x | bitbake/bin/bitbake-worker | 2 |
3 files changed, 186 insertions, 10 deletions
diff --git a/bitbake/bin/bitbake-hashclient b/bitbake/bin/bitbake-hashclient new file mode 100755 index 0000000000..29ab65f177 --- /dev/null +++ b/bitbake/bin/bitbake-hashclient | |||
@@ -0,0 +1,170 @@ | |||
1 | #! /usr/bin/env python3 | ||
2 | # | ||
3 | # Copyright (C) 2019 Garmin Ltd. | ||
4 | # | ||
5 | # SPDX-License-Identifier: GPL-2.0-only | ||
6 | # | ||
7 | |||
8 | import argparse | ||
9 | import hashlib | ||
10 | import logging | ||
11 | import os | ||
12 | import pprint | ||
13 | import sys | ||
14 | import threading | ||
15 | import time | ||
16 | |||
17 | try: | ||
18 | import tqdm | ||
19 | ProgressBar = tqdm.tqdm | ||
20 | except ImportError: | ||
21 | class ProgressBar(object): | ||
22 | def __init__(self, *args, **kwargs): | ||
23 | pass | ||
24 | |||
25 | def __enter__(self): | ||
26 | return self | ||
27 | |||
28 | def __exit__(self, *args, **kwargs): | ||
29 | pass | ||
30 | |||
31 | def update(self): | ||
32 | pass | ||
33 | |||
34 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), 'lib')) | ||
35 | |||
36 | import hashserv | ||
37 | |||
38 | DEFAULT_ADDRESS = 'unix://./hashserve.sock' | ||
39 | METHOD = 'stress.test.method' | ||
40 | |||
41 | |||
42 | def main(): | ||
43 | def handle_stats(args, client): | ||
44 | if args.reset: | ||
45 | s = client.reset_stats() | ||
46 | else: | ||
47 | s = client.get_stats() | ||
48 | pprint.pprint(s) | ||
49 | return 0 | ||
50 | |||
51 | def handle_stress(args, client): | ||
52 | def thread_main(pbar, lock): | ||
53 | nonlocal found_hashes | ||
54 | nonlocal missed_hashes | ||
55 | nonlocal max_time | ||
56 | |||
57 | client = hashserv.create_client(args.address) | ||
58 | |||
59 | for i in range(args.requests): | ||
60 | taskhash = hashlib.sha256() | ||
61 | taskhash.update(args.taskhash_seed.encode('utf-8')) | ||
62 | taskhash.update(str(i).encode('utf-8')) | ||
63 | |||
64 | start_time = time.perf_counter() | ||
65 | l = client.get_unihash(METHOD, taskhash.hexdigest()) | ||
66 | elapsed = time.perf_counter() - start_time | ||
67 | |||
68 | with lock: | ||
69 | if l: | ||
70 | found_hashes += 1 | ||
71 | else: | ||
72 | missed_hashes += 1 | ||
73 | |||
74 | max_time = max(elapsed, max_time) | ||
75 | pbar.update() | ||
76 | |||
77 | max_time = 0 | ||
78 | found_hashes = 0 | ||
79 | missed_hashes = 0 | ||
80 | lock = threading.Lock() | ||
81 | total_requests = args.clients * args.requests | ||
82 | start_time = time.perf_counter() | ||
83 | with ProgressBar(total=total_requests) as pbar: | ||
84 | threads = [threading.Thread(target=thread_main, args=(pbar, lock), daemon=False) for _ in range(args.clients)] | ||
85 | for t in threads: | ||
86 | t.start() | ||
87 | |||
88 | for t in threads: | ||
89 | t.join() | ||
90 | |||
91 | elapsed = time.perf_counter() - start_time | ||
92 | with lock: | ||
93 | print("%d requests in %.1fs. %.1f requests per second" % (total_requests, elapsed, total_requests / elapsed)) | ||
94 | print("Average request time %.8fs" % (elapsed / total_requests)) | ||
95 | print("Max request time was %.8fs" % max_time) | ||
96 | print("Found %d hashes, missed %d" % (found_hashes, missed_hashes)) | ||
97 | |||
98 | if args.report: | ||
99 | with ProgressBar(total=args.requests) as pbar: | ||
100 | for i in range(args.requests): | ||
101 | taskhash = hashlib.sha256() | ||
102 | taskhash.update(args.taskhash_seed.encode('utf-8')) | ||
103 | taskhash.update(str(i).encode('utf-8')) | ||
104 | |||
105 | outhash = hashlib.sha256() | ||
106 | outhash.update(args.outhash_seed.encode('utf-8')) | ||
107 | outhash.update(str(i).encode('utf-8')) | ||
108 | |||
109 | client.report_unihash(taskhash.hexdigest(), METHOD, outhash.hexdigest(), taskhash.hexdigest()) | ||
110 | |||
111 | with lock: | ||
112 | pbar.update() | ||
113 | |||
114 | parser = argparse.ArgumentParser(description='Hash Equivalence Client') | ||
115 | parser.add_argument('--address', default=DEFAULT_ADDRESS, help='Server address (default "%(default)s")') | ||
116 | parser.add_argument('--log', default='WARNING', help='Set logging level') | ||
117 | |||
118 | subparsers = parser.add_subparsers() | ||
119 | |||
120 | stats_parser = subparsers.add_parser('stats', help='Show server stats') | ||
121 | stats_parser.add_argument('--reset', action='store_true', | ||
122 | help='Reset server stats') | ||
123 | stats_parser.set_defaults(func=handle_stats) | ||
124 | |||
125 | stress_parser = subparsers.add_parser('stress', help='Run stress test') | ||
126 | stress_parser.add_argument('--clients', type=int, default=10, | ||
127 | help='Number of simultaneous clients') | ||
128 | stress_parser.add_argument('--requests', type=int, default=1000, | ||
129 | help='Number of requests each client will perform') | ||
130 | stress_parser.add_argument('--report', action='store_true', | ||
131 | help='Report new hashes') | ||
132 | stress_parser.add_argument('--taskhash-seed', default='', | ||
133 | help='Include string in taskhash') | ||
134 | stress_parser.add_argument('--outhash-seed', default='', | ||
135 | help='Include string in outhash') | ||
136 | stress_parser.set_defaults(func=handle_stress) | ||
137 | |||
138 | args = parser.parse_args() | ||
139 | |||
140 | logger = logging.getLogger('hashserv') | ||
141 | |||
142 | level = getattr(logging, args.log.upper(), None) | ||
143 | if not isinstance(level, int): | ||
144 | raise ValueError('Invalid log level: %s' % args.log) | ||
145 | |||
146 | logger.setLevel(level) | ||
147 | console = logging.StreamHandler() | ||
148 | console.setLevel(level) | ||
149 | logger.addHandler(console) | ||
150 | |||
151 | func = getattr(args, 'func', None) | ||
152 | if func: | ||
153 | client = hashserv.create_client(args.address) | ||
154 | # Try to establish a connection to the server now to detect failures | ||
155 | # early | ||
156 | client.connect() | ||
157 | |||
158 | return func(args, client) | ||
159 | |||
160 | return 0 | ||
161 | |||
162 | |||
163 | if __name__ == '__main__': | ||
164 | try: | ||
165 | ret = main() | ||
166 | except Exception: | ||
167 | ret = 1 | ||
168 | import traceback | ||
169 | traceback.print_exc() | ||
170 | sys.exit(ret) | ||
diff --git a/bitbake/bin/bitbake-hashserv b/bitbake/bin/bitbake-hashserv index 6c911c098a..1bc1f91f38 100755 --- a/bitbake/bin/bitbake-hashserv +++ b/bitbake/bin/bitbake-hashserv | |||
@@ -11,20 +11,26 @@ import logging | |||
11 | import argparse | 11 | import argparse |
12 | import sqlite3 | 12 | import sqlite3 |
13 | 13 | ||
14 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)),'lib')) | 14 | sys.path.insert(0, os.path.join(os.path.dirname(os.path.dirname(__file__)), 'lib')) |
15 | 15 | ||
16 | import hashserv | 16 | import hashserv |
17 | 17 | ||
18 | VERSION = "1.0.0" | 18 | VERSION = "1.0.0" |
19 | 19 | ||
20 | DEFAULT_HOST = '' | 20 | DEFAULT_BIND = 'unix://./hashserve.sock' |
21 | DEFAULT_PORT = 8686 | 21 | |
22 | 22 | ||
23 | def main(): | 23 | def main(): |
24 | parser = argparse.ArgumentParser(description='HTTP Equivalence Reference Server. Version=%s' % VERSION) | 24 | parser = argparse.ArgumentParser(description='Hash Equivalence Reference Server. Version=%s' % VERSION, |
25 | parser.add_argument('--address', default=DEFAULT_HOST, help='Bind address (default "%(default)s")') | 25 | epilog='''The bind address is the path to a unix domain socket if it is |
26 | parser.add_argument('--port', type=int, default=DEFAULT_PORT, help='Bind port (default %(default)d)') | 26 | prefixed with "unix://". Otherwise, it is an IP address |
27 | parser.add_argument('--prefix', default='', help='HTTP path prefix (default "%(default)s")') | 27 | and port in form ADDRESS:PORT. To bind to all addresses, leave |
28 | the ADDRESS empty, e.g. "--bind :8686". To bind to a specific | ||
29 | IPv6 address, enclose the address in "[]", e.g. | ||
30 | "--bind [::1]:8686"''' | ||
31 | ) | ||
32 | |||
33 | parser.add_argument('--bind', default=DEFAULT_BIND, help='Bind address (default "%(default)s")') | ||
28 | parser.add_argument('--database', default='./hashserv.db', help='Database file (default "%(default)s")') | 34 | parser.add_argument('--database', default='./hashserv.db', help='Database file (default "%(default)s")') |
29 | parser.add_argument('--log', default='WARNING', help='Set logging level') | 35 | parser.add_argument('--log', default='WARNING', help='Set logging level') |
30 | 36 | ||
@@ -41,10 +47,11 @@ def main(): | |||
41 | console.setLevel(level) | 47 | console.setLevel(level) |
42 | logger.addHandler(console) | 48 | logger.addHandler(console) |
43 | 49 | ||
44 | server = hashserv.create_server((args.address, args.port), args.database, args.prefix) | 50 | server = hashserv.create_server(args.bind, args.database) |
45 | server.serve_forever() | 51 | server.serve_forever() |
46 | return 0 | 52 | return 0 |
47 | 53 | ||
54 | |||
48 | if __name__ == '__main__': | 55 | if __name__ == '__main__': |
49 | try: | 56 | try: |
50 | ret = main() | 57 | ret = main() |
@@ -53,4 +60,3 @@ if __name__ == '__main__': | |||
53 | import traceback | 60 | import traceback |
54 | traceback.print_exc() | 61 | traceback.print_exc() |
55 | sys.exit(ret) | 62 | sys.exit(ret) |
56 | |||
diff --git a/bitbake/bin/bitbake-worker b/bitbake/bin/bitbake-worker index 96369199f2..6776cadda3 100755 --- a/bitbake/bin/bitbake-worker +++ b/bitbake/bin/bitbake-worker | |||
@@ -418,7 +418,7 @@ class BitbakeWorker(object): | |||
418 | bb.msg.loggerDefaultDomains = self.workerdata["logdefaultdomain"] | 418 | bb.msg.loggerDefaultDomains = self.workerdata["logdefaultdomain"] |
419 | for mc in self.databuilder.mcdata: | 419 | for mc in self.databuilder.mcdata: |
420 | self.databuilder.mcdata[mc].setVar("PRSERV_HOST", self.workerdata["prhost"]) | 420 | self.databuilder.mcdata[mc].setVar("PRSERV_HOST", self.workerdata["prhost"]) |
421 | self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.workerdata["hashservport"]) | 421 | self.databuilder.mcdata[mc].setVar("BB_HASHSERVE", self.workerdata["hashservaddr"]) |
422 | 422 | ||
423 | def handle_newtaskhashes(self, data): | 423 | def handle_newtaskhashes(self, data): |
424 | self.workerdata["newhashes"] = pickle.loads(data) | 424 | self.workerdata["newhashes"] = pickle.loads(data) |