diff options
author | Joshua Watt <jpewhacker@gmail.com> | 2019-07-23 09:16:37 -0500 |
---|---|---|
committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2019-08-06 11:21:31 +0100 |
commit | 9802b2e6509bfc67f979f742e93b35340af62af8 (patch) | |
tree | 6e664fc7b1fdcf1132ff9f547341530104a23c04 /bitbake/lib | |
parent | 6c7c0cefd34067311144a1d4c01986fe0a4aef26 (diff) | |
download | poky-9802b2e6509bfc67f979f742e93b35340af62af8.tar.gz |
bitbake: hashserv: SQL Optimizations
Implements a number of optimizations to the SQL used in the hash
equivalence server:
1) Two indexes are created for the two methods (method, taskhash and
method outhash) by which rows are found in order to speed up the
lookup
2) An extra SELECT to lookup the just inserted row was removed. This
SELECT is unnecessary since all of the information about the newly
inserted row is already available.
3) A uniqueness constraint was added to the table. This should allow
the server to be multithreaded in the future since duplicate inserts
can be detected (and ignored). This change requires bumping the
database version to '2', since a uniqueness constraint can't be
added to an existing table.
4) Some comments are added to clarify the trick SELECT statement used
when inserting new equivalent hashes
(Bitbake rev: 7aec8632e67b4f0ab7b72692c40a42f6926608c3)
Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib')
-rw-r--r-- | bitbake/lib/hashserv/__init__.py | 37 |
1 files changed, 27 insertions, 10 deletions
diff --git a/bitbake/lib/hashserv/__init__.py b/bitbake/lib/hashserv/__init__.py index fdc9ced9f2..768c5504cf 100644 --- a/bitbake/lib/hashserv/__init__.py +++ b/bitbake/lib/hashserv/__init__.py | |||
@@ -1,4 +1,4 @@ | |||
1 | # Copyright (C) 2018 Garmin Ltd. | 1 | # Copyright (C) 2018-2019 Garmin Ltd. |
2 | # | 2 | # |
3 | # SPDX-License-Identifier: GPL-2.0-only | 3 | # SPDX-License-Identifier: GPL-2.0-only |
4 | # | 4 | # |
@@ -32,7 +32,7 @@ class HashEquivalenceServer(BaseHTTPRequestHandler): | |||
32 | 32 | ||
33 | d = None | 33 | d = None |
34 | with contextlib.closing(self.db.cursor()) as cursor: | 34 | with contextlib.closing(self.db.cursor()) as cursor: |
35 | cursor.execute('SELECT taskhash, method, unihash FROM tasks_v1 WHERE method=:method AND taskhash=:taskhash ORDER BY created ASC LIMIT 1', | 35 | cursor.execute('SELECT taskhash, method, unihash FROM tasks_v2 WHERE method=:method AND taskhash=:taskhash ORDER BY created ASC LIMIT 1', |
36 | {'method': method, 'taskhash': taskhash}) | 36 | {'method': method, 'taskhash': taskhash}) |
37 | 37 | ||
38 | row = cursor.fetchone() | 38 | row = cursor.fetchone() |
@@ -63,15 +63,29 @@ class HashEquivalenceServer(BaseHTTPRequestHandler): | |||
63 | 63 | ||
64 | with contextlib.closing(self.db.cursor()) as cursor: | 64 | with contextlib.closing(self.db.cursor()) as cursor: |
65 | cursor.execute(''' | 65 | cursor.execute(''' |
66 | SELECT taskhash, method, unihash FROM tasks_v1 WHERE method=:method AND outhash=:outhash | 66 | -- Find tasks with a matching outhash (that is, tasks that |
67 | -- are equivalent) | ||
68 | SELECT taskhash, method, unihash FROM tasks_v2 WHERE method=:method AND outhash=:outhash | ||
69 | |||
70 | -- If there is an exact match on the taskhash, return it. | ||
71 | -- Otherwise return the oldest matching outhash of any | ||
72 | -- taskhash | ||
67 | ORDER BY CASE WHEN taskhash=:taskhash THEN 1 ELSE 2 END, | 73 | ORDER BY CASE WHEN taskhash=:taskhash THEN 1 ELSE 2 END, |
68 | created ASC | 74 | created ASC |
75 | |||
76 | -- Only return one row | ||
69 | LIMIT 1 | 77 | LIMIT 1 |
70 | ''', {k: data[k] for k in ('method', 'outhash', 'taskhash')}) | 78 | ''', {k: data[k] for k in ('method', 'outhash', 'taskhash')}) |
71 | 79 | ||
72 | row = cursor.fetchone() | 80 | row = cursor.fetchone() |
73 | 81 | ||
82 | # If no matching outhash was found, or one *was* found but it | ||
83 | # wasn't an exact match on the taskhash, a new entry for this | ||
84 | # taskhash should be added | ||
74 | if row is None or row['taskhash'] != data['taskhash']: | 85 | if row is None or row['taskhash'] != data['taskhash']: |
86 | # If a row matching the outhash was found, the unihash for | ||
87 | # the new taskhash should be the same as that one. | ||
88 | # Otherwise the caller provided unihash is used. | ||
75 | unihash = data['unihash'] | 89 | unihash = data['unihash'] |
76 | if row is not None: | 90 | if row is not None: |
77 | unihash = row['unihash'] | 91 | unihash = row['unihash'] |
@@ -88,18 +102,17 @@ class HashEquivalenceServer(BaseHTTPRequestHandler): | |||
88 | if k in data: | 102 | if k in data: |
89 | insert_data[k] = data[k] | 103 | insert_data[k] = data[k] |
90 | 104 | ||
91 | cursor.execute('''INSERT INTO tasks_v1 (%s) VALUES (%s)''' % ( | 105 | cursor.execute('''INSERT INTO tasks_v2 (%s) VALUES (%s)''' % ( |
92 | ', '.join(sorted(insert_data.keys())), | 106 | ', '.join(sorted(insert_data.keys())), |
93 | ', '.join(':' + k for k in sorted(insert_data.keys()))), | 107 | ', '.join(':' + k for k in sorted(insert_data.keys()))), |
94 | insert_data) | 108 | insert_data) |
95 | 109 | ||
96 | logger.info('Adding taskhash %s with unihash %s', data['taskhash'], unihash) | 110 | logger.info('Adding taskhash %s with unihash %s', data['taskhash'], unihash) |
97 | cursor.execute('SELECT taskhash, method, unihash FROM tasks_v1 WHERE id=:id', {'id': cursor.lastrowid}) | ||
98 | row = cursor.fetchone() | ||
99 | 111 | ||
100 | self.db.commit() | 112 | self.db.commit() |
101 | 113 | d = {'taskhash': data['taskhash'], 'method': data['method'], 'unihash': unihash} | |
102 | d = {k: row[k] for k in ('taskhash', 'method', 'unihash')} | 114 | else: |
115 | d = {k: row[k] for k in ('taskhash', 'method', 'unihash')} | ||
103 | 116 | ||
104 | self.send_response(200) | 117 | self.send_response(200) |
105 | self.send_header('Content-Type', 'application/json; charset=utf-8') | 118 | self.send_header('Content-Type', 'application/json; charset=utf-8') |
@@ -120,7 +133,7 @@ def create_server(addr, db, prefix=''): | |||
120 | 133 | ||
121 | with contextlib.closing(db.cursor()) as cursor: | 134 | with contextlib.closing(db.cursor()) as cursor: |
122 | cursor.execute(''' | 135 | cursor.execute(''' |
123 | CREATE TABLE IF NOT EXISTS tasks_v1 ( | 136 | CREATE TABLE IF NOT EXISTS tasks_v2 ( |
124 | id INTEGER PRIMARY KEY AUTOINCREMENT, | 137 | id INTEGER PRIMARY KEY AUTOINCREMENT, |
125 | method TEXT NOT NULL, | 138 | method TEXT NOT NULL, |
126 | outhash TEXT NOT NULL, | 139 | outhash TEXT NOT NULL, |
@@ -134,9 +147,13 @@ def create_server(addr, db, prefix=''): | |||
134 | PV TEXT, | 147 | PV TEXT, |
135 | PR TEXT, | 148 | PR TEXT, |
136 | task TEXT, | 149 | task TEXT, |
137 | outhash_siginfo TEXT | 150 | outhash_siginfo TEXT, |
151 | |||
152 | UNIQUE(method, outhash, taskhash) | ||
138 | ) | 153 | ) |
139 | ''') | 154 | ''') |
155 | cursor.execute('CREATE INDEX IF NOT EXISTS taskhash_lookup ON tasks_v2 (method, taskhash)') | ||
156 | cursor.execute('CREATE INDEX IF NOT EXISTS outhash_lookup ON tasks_v2 (method, outhash)') | ||
140 | 157 | ||
141 | logger.info('Starting server on %s', addr) | 158 | logger.info('Starting server on %s', addr) |
142 | return HTTPServer(addr, Handler) | 159 | return HTTPServer(addr, Handler) |