summaryrefslogtreecommitdiffstats
path: root/bitbake/lib/hashserv/sqlite.py
diff options
context:
space:
mode:
authorJoshua Watt <JPEWhacker@gmail.com>2024-02-18 15:59:46 -0700
committerRichard Purdie <richard.purdie@linuxfoundation.org>2024-02-19 11:58:12 +0000
commit1effd1014d9140905093efe25eeefedb28a10875 (patch)
treeb34fb1d26f020b361d22904695cab6b9a7c1ea50 /bitbake/lib/hashserv/sqlite.py
parent324c9fd666117afb0dd689eaa8551bb02d6a042b (diff)
downloadpoky-1effd1014d9140905093efe25eeefedb28a10875.tar.gz
bitbake: hashserv: Add Unihash Garbage Collection
Adds support for removing unused unihashes from the database. This is done using a "mark and sweep" style of garbage collection where a collection is started by marking which unihashes should be kept in the database, then performing a sweep to remove any unmarked hashes. (Bitbake rev: 433d4a075a1acfbd2a2913061739353a84bb01ed) Signed-off-by: Joshua Watt <JPEWhacker@gmail.com> Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/hashserv/sqlite.py')
-rw-r--r--bitbake/lib/hashserv/sqlite.py205
1 files changed, 169 insertions, 36 deletions
diff --git a/bitbake/lib/hashserv/sqlite.py b/bitbake/lib/hashserv/sqlite.py
index f93cb2c1dd..608490730d 100644
--- a/bitbake/lib/hashserv/sqlite.py
+++ b/bitbake/lib/hashserv/sqlite.py
@@ -15,6 +15,7 @@ UNIHASH_TABLE_DEFINITION = (
15 ("method", "TEXT NOT NULL", "UNIQUE"), 15 ("method", "TEXT NOT NULL", "UNIQUE"),
16 ("taskhash", "TEXT NOT NULL", "UNIQUE"), 16 ("taskhash", "TEXT NOT NULL", "UNIQUE"),
17 ("unihash", "TEXT NOT NULL", ""), 17 ("unihash", "TEXT NOT NULL", ""),
18 ("gc_mark", "TEXT NOT NULL", ""),
18) 19)
19 20
20UNIHASH_TABLE_COLUMNS = tuple(name for name, _, _ in UNIHASH_TABLE_DEFINITION) 21UNIHASH_TABLE_COLUMNS = tuple(name for name, _, _ in UNIHASH_TABLE_DEFINITION)
@@ -44,6 +45,14 @@ USERS_TABLE_DEFINITION = (
44USERS_TABLE_COLUMNS = tuple(name for name, _, _ in USERS_TABLE_DEFINITION) 45USERS_TABLE_COLUMNS = tuple(name for name, _, _ in USERS_TABLE_DEFINITION)
45 46
46 47
48CONFIG_TABLE_DEFINITION = (
49 ("name", "TEXT NOT NULL", "UNIQUE"),
50 ("value", "TEXT", ""),
51)
52
53CONFIG_TABLE_COLUMNS = tuple(name for name, _, _ in CONFIG_TABLE_DEFINITION)
54
55
47def _make_table(cursor, name, definition): 56def _make_table(cursor, name, definition):
48 cursor.execute( 57 cursor.execute(
49 """ 58 """
@@ -71,6 +80,35 @@ def map_user(row):
71 ) 80 )
72 81
73 82
83def _make_condition_statement(columns, condition):
84 where = {}
85 for c in columns:
86 if c in condition and condition[c] is not None:
87 where[c] = condition[c]
88
89 return where, " AND ".join("%s=:%s" % (k, k) for k in where.keys())
90
91
92def _get_sqlite_version(cursor):
93 cursor.execute("SELECT sqlite_version()")
94
95 version = []
96 for v in cursor.fetchone()[0].split("."):
97 try:
98 version.append(int(v))
99 except ValueError:
100 version.append(v)
101
102 return tuple(version)
103
104
105def _schema_table_name(version):
106 if version >= (3, 33):
107 return "sqlite_schema"
108
109 return "sqlite_master"
110
111
74class DatabaseEngine(object): 112class DatabaseEngine(object):
75 def __init__(self, dbname, sync): 113 def __init__(self, dbname, sync):
76 self.dbname = dbname 114 self.dbname = dbname
@@ -82,9 +120,10 @@ class DatabaseEngine(object):
82 db.row_factory = sqlite3.Row 120 db.row_factory = sqlite3.Row
83 121
84 with closing(db.cursor()) as cursor: 122 with closing(db.cursor()) as cursor:
85 _make_table(cursor, "unihashes_v2", UNIHASH_TABLE_DEFINITION) 123 _make_table(cursor, "unihashes_v3", UNIHASH_TABLE_DEFINITION)
86 _make_table(cursor, "outhashes_v2", OUTHASH_TABLE_DEFINITION) 124 _make_table(cursor, "outhashes_v2", OUTHASH_TABLE_DEFINITION)
87 _make_table(cursor, "users", USERS_TABLE_DEFINITION) 125 _make_table(cursor, "users", USERS_TABLE_DEFINITION)
126 _make_table(cursor, "config", CONFIG_TABLE_DEFINITION)
88 127
89 cursor.execute("PRAGMA journal_mode = WAL") 128 cursor.execute("PRAGMA journal_mode = WAL")
90 cursor.execute( 129 cursor.execute(
@@ -96,17 +135,38 @@ class DatabaseEngine(object):
96 cursor.execute("DROP INDEX IF EXISTS outhash_lookup") 135 cursor.execute("DROP INDEX IF EXISTS outhash_lookup")
97 cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v2") 136 cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v2")
98 cursor.execute("DROP INDEX IF EXISTS outhash_lookup_v2") 137 cursor.execute("DROP INDEX IF EXISTS outhash_lookup_v2")
138 cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v3")
99 139
100 # TODO: Upgrade from tasks_v2? 140 # TODO: Upgrade from tasks_v2?
101 cursor.execute("DROP TABLE IF EXISTS tasks_v2") 141 cursor.execute("DROP TABLE IF EXISTS tasks_v2")
102 142
103 # Create new indexes 143 # Create new indexes
104 cursor.execute( 144 cursor.execute(
105 "CREATE INDEX IF NOT EXISTS taskhash_lookup_v3 ON unihashes_v2 (method, taskhash)" 145 "CREATE INDEX IF NOT EXISTS taskhash_lookup_v4 ON unihashes_v3 (method, taskhash)"
106 ) 146 )
107 cursor.execute( 147 cursor.execute(
108 "CREATE INDEX IF NOT EXISTS outhash_lookup_v3 ON outhashes_v2 (method, outhash)" 148 "CREATE INDEX IF NOT EXISTS outhash_lookup_v3 ON outhashes_v2 (method, outhash)"
109 ) 149 )
150 cursor.execute("CREATE INDEX IF NOT EXISTS config_lookup ON config (name)")
151
152 sqlite_version = _get_sqlite_version(cursor)
153
154 cursor.execute(
155 f"""
156 SELECT name FROM {_schema_table_name(sqlite_version)} WHERE type = 'table' AND name = 'unihashes_v2'
157 """
158 )
159 if cursor.fetchone():
160 self.logger.info("Upgrading Unihashes V2 -> V3...")
161 cursor.execute(
162 """
163 INSERT INTO unihashes_v3 (id, method, unihash, taskhash, gc_mark)
164 SELECT id, method, unihash, taskhash, '' FROM unihashes_v2
165 """
166 )
167 cursor.execute("DROP TABLE unihashes_v2")
168 db.commit()
169 self.logger.info("Upgrade complete")
110 170
111 def connect(self, logger): 171 def connect(self, logger):
112 return Database(logger, self.dbname, self.sync) 172 return Database(logger, self.dbname, self.sync)
@@ -126,16 +186,7 @@ class Database(object):
126 "PRAGMA synchronous = %s" % ("NORMAL" if sync else "OFF") 186 "PRAGMA synchronous = %s" % ("NORMAL" if sync else "OFF")
127 ) 187 )
128 188
129 cursor.execute("SELECT sqlite_version()") 189 self.sqlite_version = _get_sqlite_version(cursor)
130
131 version = []
132 for v in cursor.fetchone()[0].split("."):
133 try:
134 version.append(int(v))
135 except ValueError:
136 version.append(v)
137
138 self.sqlite_version = tuple(version)
139 190
140 async def __aenter__(self): 191 async def __aenter__(self):
141 return self 192 return self
@@ -143,6 +194,30 @@ class Database(object):
143 async def __aexit__(self, exc_type, exc_value, traceback): 194 async def __aexit__(self, exc_type, exc_value, traceback):
144 await self.close() 195 await self.close()
145 196
197 async def _set_config(self, cursor, name, value):
198 cursor.execute(
199 """
200 INSERT OR REPLACE INTO config (id, name, value) VALUES
201 ((SELECT id FROM config WHERE name=:name), :name, :value)
202 """,
203 {
204 "name": name,
205 "value": value,
206 },
207 )
208
209 async def _get_config(self, cursor, name):
210 cursor.execute(
211 "SELECT value FROM config WHERE name=:name",
212 {
213 "name": name,
214 },
215 )
216 row = cursor.fetchone()
217 if row is None:
218 return None
219 return row["value"]
220
146 async def close(self): 221 async def close(self):
147 self.db.close() 222 self.db.close()
148 223
@@ -150,8 +225,8 @@ class Database(object):
150 with closing(self.db.cursor()) as cursor: 225 with closing(self.db.cursor()) as cursor:
151 cursor.execute( 226 cursor.execute(
152 """ 227 """
153 SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2 228 SELECT *, unihashes_v3.unihash AS unihash FROM outhashes_v2
154 INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash 229 INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash
155 WHERE outhashes_v2.method=:method AND outhashes_v2.taskhash=:taskhash 230 WHERE outhashes_v2.method=:method AND outhashes_v2.taskhash=:taskhash
156 ORDER BY outhashes_v2.created ASC 231 ORDER BY outhashes_v2.created ASC
157 LIMIT 1 232 LIMIT 1
@@ -167,8 +242,8 @@ class Database(object):
167 with closing(self.db.cursor()) as cursor: 242 with closing(self.db.cursor()) as cursor:
168 cursor.execute( 243 cursor.execute(
169 """ 244 """
170 SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2 245 SELECT *, unihashes_v3.unihash AS unihash FROM outhashes_v2
171 INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash 246 INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash
172 WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash 247 WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash
173 ORDER BY outhashes_v2.created ASC 248 ORDER BY outhashes_v2.created ASC
174 LIMIT 1 249 LIMIT 1
@@ -200,8 +275,8 @@ class Database(object):
200 with closing(self.db.cursor()) as cursor: 275 with closing(self.db.cursor()) as cursor:
201 cursor.execute( 276 cursor.execute(
202 """ 277 """
203 SELECT outhashes_v2.taskhash AS taskhash, unihashes_v2.unihash AS unihash FROM outhashes_v2 278 SELECT outhashes_v2.taskhash AS taskhash, unihashes_v3.unihash AS unihash FROM outhashes_v2
204 INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash 279 INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash
205 -- Select any matching output hash except the one we just inserted 280 -- Select any matching output hash except the one we just inserted
206 WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash AND outhashes_v2.taskhash!=:taskhash 281 WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash AND outhashes_v2.taskhash!=:taskhash
207 -- Pick the oldest hash 282 -- Pick the oldest hash
@@ -219,7 +294,7 @@ class Database(object):
219 async def get_equivalent(self, method, taskhash): 294 async def get_equivalent(self, method, taskhash):
220 with closing(self.db.cursor()) as cursor: 295 with closing(self.db.cursor()) as cursor:
221 cursor.execute( 296 cursor.execute(
222 "SELECT taskhash, method, unihash FROM unihashes_v2 WHERE method=:method AND taskhash=:taskhash", 297 "SELECT taskhash, method, unihash FROM unihashes_v3 WHERE method=:method AND taskhash=:taskhash",
223 { 298 {
224 "method": method, 299 "method": method,
225 "taskhash": taskhash, 300 "taskhash": taskhash,
@@ -229,15 +304,9 @@ class Database(object):
229 304
230 async def remove(self, condition): 305 async def remove(self, condition):
231 def do_remove(columns, table_name, cursor): 306 def do_remove(columns, table_name, cursor):
232 where = {} 307 where, clause = _make_condition_statement(columns, condition)
233 for c in columns:
234 if c in condition and condition[c] is not None:
235 where[c] = condition[c]
236
237 if where: 308 if where:
238 query = ("DELETE FROM %s WHERE " % table_name) + " AND ".join( 309 query = f"DELETE FROM {table_name} WHERE {clause}"
239 "%s=:%s" % (k, k) for k in where.keys()
240 )
241 cursor.execute(query, where) 310 cursor.execute(query, where)
242 return cursor.rowcount 311 return cursor.rowcount
243 312
@@ -246,17 +315,80 @@ class Database(object):
246 count = 0 315 count = 0
247 with closing(self.db.cursor()) as cursor: 316 with closing(self.db.cursor()) as cursor:
248 count += do_remove(OUTHASH_TABLE_COLUMNS, "outhashes_v2", cursor) 317 count += do_remove(OUTHASH_TABLE_COLUMNS, "outhashes_v2", cursor)
249 count += do_remove(UNIHASH_TABLE_COLUMNS, "unihashes_v2", cursor) 318 count += do_remove(UNIHASH_TABLE_COLUMNS, "unihashes_v3", cursor)
250 self.db.commit() 319 self.db.commit()
251 320
252 return count 321 return count
253 322
323 async def get_current_gc_mark(self):
324 with closing(self.db.cursor()) as cursor:
325 return await self._get_config(cursor, "gc-mark")
326
327 async def gc_status(self):
328 with closing(self.db.cursor()) as cursor:
329 cursor.execute(
330 """
331 SELECT COUNT() FROM unihashes_v3 WHERE
332 gc_mark=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
333 """
334 )
335 keep_rows = cursor.fetchone()[0]
336
337 cursor.execute(
338 """
339 SELECT COUNT() FROM unihashes_v3 WHERE
340 gc_mark!=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
341 """
342 )
343 remove_rows = cursor.fetchone()[0]
344
345 current_mark = await self._get_config(cursor, "gc-mark")
346
347 return (keep_rows, remove_rows, current_mark)
348
349 async def gc_mark(self, mark, condition):
350 with closing(self.db.cursor()) as cursor:
351 await self._set_config(cursor, "gc-mark", mark)
352
353 where, clause = _make_condition_statement(UNIHASH_TABLE_COLUMNS, condition)
354
355 new_rows = 0
356 if where:
357 cursor.execute(
358 f"""
359 UPDATE unihashes_v3 SET
360 gc_mark=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
361 WHERE {clause}
362 """,
363 where,
364 )
365 new_rows = cursor.rowcount
366
367 self.db.commit()
368 return new_rows
369
370 async def gc_sweep(self):
371 with closing(self.db.cursor()) as cursor:
372 # NOTE: COALESCE is not used in this query so that if the current
373 # mark is NULL, nothing will happen
374 cursor.execute(
375 """
376 DELETE FROM unihashes_v3 WHERE
377 gc_mark!=(SELECT value FROM config WHERE name='gc-mark')
378 """
379 )
380 count = cursor.rowcount
381 await self._set_config(cursor, "gc-mark", None)
382
383 self.db.commit()
384 return count
385
254 async def clean_unused(self, oldest): 386 async def clean_unused(self, oldest):
255 with closing(self.db.cursor()) as cursor: 387 with closing(self.db.cursor()) as cursor:
256 cursor.execute( 388 cursor.execute(
257 """ 389 """
258 DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS ( 390 DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS (
259 SELECT unihashes_v2.id FROM unihashes_v2 WHERE unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash LIMIT 1 391 SELECT unihashes_v3.id FROM unihashes_v3 WHERE unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash LIMIT 1
260 ) 392 )
261 """, 393 """,
262 { 394 {
@@ -271,7 +403,13 @@ class Database(object):
271 prevrowid = cursor.lastrowid 403 prevrowid = cursor.lastrowid
272 cursor.execute( 404 cursor.execute(
273 """ 405 """
274 INSERT OR IGNORE INTO unihashes_v2 (method, taskhash, unihash) VALUES(:method, :taskhash, :unihash) 406 INSERT OR IGNORE INTO unihashes_v3 (method, taskhash, unihash, gc_mark) VALUES
407 (
408 :method,
409 :taskhash,
410 :unihash,
411 COALESCE((SELECT value FROM config WHERE name='gc-mark'), '')
412 )
275 """, 413 """,
276 { 414 {
277 "method": method, 415 "method": method,
@@ -383,14 +521,9 @@ class Database(object):
383 async def get_usage(self): 521 async def get_usage(self):
384 usage = {} 522 usage = {}
385 with closing(self.db.cursor()) as cursor: 523 with closing(self.db.cursor()) as cursor:
386 if self.sqlite_version >= (3, 33):
387 table_name = "sqlite_schema"
388 else:
389 table_name = "sqlite_master"
390
391 cursor.execute( 524 cursor.execute(
392 f""" 525 f"""
393 SELECT name FROM {table_name} WHERE type = 'table' AND name NOT LIKE 'sqlite_%' 526 SELECT name FROM {_schema_table_name(self.sqlite_version)} WHERE type = 'table' AND name NOT LIKE 'sqlite_%'
394 """ 527 """
395 ) 528 )
396 for row in cursor.fetchall(): 529 for row in cursor.fetchall():