diff options
| author | Joshua Watt <JPEWhacker@gmail.com> | 2024-02-18 15:59:46 -0700 |
|---|---|---|
| committer | Richard Purdie <richard.purdie@linuxfoundation.org> | 2024-02-19 11:58:12 +0000 |
| commit | 1effd1014d9140905093efe25eeefedb28a10875 (patch) | |
| tree | b34fb1d26f020b361d22904695cab6b9a7c1ea50 /bitbake/lib/hashserv/sqlite.py | |
| parent | 324c9fd666117afb0dd689eaa8551bb02d6a042b (diff) | |
| download | poky-1effd1014d9140905093efe25eeefedb28a10875.tar.gz | |
bitbake: hashserv: Add Unihash Garbage Collection
Adds support for removing unused unihashes from the database. This is
done using a "mark and sweep" style of garbage collection where a
collection is started by marking which unihashes should be kept in the
database, then performing a sweep to remove any unmarked hashes.
(Bitbake rev: 433d4a075a1acfbd2a2913061739353a84bb01ed)
Signed-off-by: Joshua Watt <JPEWhacker@gmail.com>
Signed-off-by: Richard Purdie <richard.purdie@linuxfoundation.org>
Diffstat (limited to 'bitbake/lib/hashserv/sqlite.py')
| -rw-r--r-- | bitbake/lib/hashserv/sqlite.py | 205 |
1 files changed, 169 insertions, 36 deletions
diff --git a/bitbake/lib/hashserv/sqlite.py b/bitbake/lib/hashserv/sqlite.py index f93cb2c1dd..608490730d 100644 --- a/bitbake/lib/hashserv/sqlite.py +++ b/bitbake/lib/hashserv/sqlite.py | |||
| @@ -15,6 +15,7 @@ UNIHASH_TABLE_DEFINITION = ( | |||
| 15 | ("method", "TEXT NOT NULL", "UNIQUE"), | 15 | ("method", "TEXT NOT NULL", "UNIQUE"), |
| 16 | ("taskhash", "TEXT NOT NULL", "UNIQUE"), | 16 | ("taskhash", "TEXT NOT NULL", "UNIQUE"), |
| 17 | ("unihash", "TEXT NOT NULL", ""), | 17 | ("unihash", "TEXT NOT NULL", ""), |
| 18 | ("gc_mark", "TEXT NOT NULL", ""), | ||
| 18 | ) | 19 | ) |
| 19 | 20 | ||
| 20 | UNIHASH_TABLE_COLUMNS = tuple(name for name, _, _ in UNIHASH_TABLE_DEFINITION) | 21 | UNIHASH_TABLE_COLUMNS = tuple(name for name, _, _ in UNIHASH_TABLE_DEFINITION) |
| @@ -44,6 +45,14 @@ USERS_TABLE_DEFINITION = ( | |||
| 44 | USERS_TABLE_COLUMNS = tuple(name for name, _, _ in USERS_TABLE_DEFINITION) | 45 | USERS_TABLE_COLUMNS = tuple(name for name, _, _ in USERS_TABLE_DEFINITION) |
| 45 | 46 | ||
| 46 | 47 | ||
| 48 | CONFIG_TABLE_DEFINITION = ( | ||
| 49 | ("name", "TEXT NOT NULL", "UNIQUE"), | ||
| 50 | ("value", "TEXT", ""), | ||
| 51 | ) | ||
| 52 | |||
| 53 | CONFIG_TABLE_COLUMNS = tuple(name for name, _, _ in CONFIG_TABLE_DEFINITION) | ||
| 54 | |||
| 55 | |||
| 47 | def _make_table(cursor, name, definition): | 56 | def _make_table(cursor, name, definition): |
| 48 | cursor.execute( | 57 | cursor.execute( |
| 49 | """ | 58 | """ |
| @@ -71,6 +80,35 @@ def map_user(row): | |||
| 71 | ) | 80 | ) |
| 72 | 81 | ||
| 73 | 82 | ||
| 83 | def _make_condition_statement(columns, condition): | ||
| 84 | where = {} | ||
| 85 | for c in columns: | ||
| 86 | if c in condition and condition[c] is not None: | ||
| 87 | where[c] = condition[c] | ||
| 88 | |||
| 89 | return where, " AND ".join("%s=:%s" % (k, k) for k in where.keys()) | ||
| 90 | |||
| 91 | |||
| 92 | def _get_sqlite_version(cursor): | ||
| 93 | cursor.execute("SELECT sqlite_version()") | ||
| 94 | |||
| 95 | version = [] | ||
| 96 | for v in cursor.fetchone()[0].split("."): | ||
| 97 | try: | ||
| 98 | version.append(int(v)) | ||
| 99 | except ValueError: | ||
| 100 | version.append(v) | ||
| 101 | |||
| 102 | return tuple(version) | ||
| 103 | |||
| 104 | |||
| 105 | def _schema_table_name(version): | ||
| 106 | if version >= (3, 33): | ||
| 107 | return "sqlite_schema" | ||
| 108 | |||
| 109 | return "sqlite_master" | ||
| 110 | |||
| 111 | |||
| 74 | class DatabaseEngine(object): | 112 | class DatabaseEngine(object): |
| 75 | def __init__(self, dbname, sync): | 113 | def __init__(self, dbname, sync): |
| 76 | self.dbname = dbname | 114 | self.dbname = dbname |
| @@ -82,9 +120,10 @@ class DatabaseEngine(object): | |||
| 82 | db.row_factory = sqlite3.Row | 120 | db.row_factory = sqlite3.Row |
| 83 | 121 | ||
| 84 | with closing(db.cursor()) as cursor: | 122 | with closing(db.cursor()) as cursor: |
| 85 | _make_table(cursor, "unihashes_v2", UNIHASH_TABLE_DEFINITION) | 123 | _make_table(cursor, "unihashes_v3", UNIHASH_TABLE_DEFINITION) |
| 86 | _make_table(cursor, "outhashes_v2", OUTHASH_TABLE_DEFINITION) | 124 | _make_table(cursor, "outhashes_v2", OUTHASH_TABLE_DEFINITION) |
| 87 | _make_table(cursor, "users", USERS_TABLE_DEFINITION) | 125 | _make_table(cursor, "users", USERS_TABLE_DEFINITION) |
| 126 | _make_table(cursor, "config", CONFIG_TABLE_DEFINITION) | ||
| 88 | 127 | ||
| 89 | cursor.execute("PRAGMA journal_mode = WAL") | 128 | cursor.execute("PRAGMA journal_mode = WAL") |
| 90 | cursor.execute( | 129 | cursor.execute( |
| @@ -96,17 +135,38 @@ class DatabaseEngine(object): | |||
| 96 | cursor.execute("DROP INDEX IF EXISTS outhash_lookup") | 135 | cursor.execute("DROP INDEX IF EXISTS outhash_lookup") |
| 97 | cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v2") | 136 | cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v2") |
| 98 | cursor.execute("DROP INDEX IF EXISTS outhash_lookup_v2") | 137 | cursor.execute("DROP INDEX IF EXISTS outhash_lookup_v2") |
| 138 | cursor.execute("DROP INDEX IF EXISTS taskhash_lookup_v3") | ||
| 99 | 139 | ||
| 100 | # TODO: Upgrade from tasks_v2? | 140 | # TODO: Upgrade from tasks_v2? |
| 101 | cursor.execute("DROP TABLE IF EXISTS tasks_v2") | 141 | cursor.execute("DROP TABLE IF EXISTS tasks_v2") |
| 102 | 142 | ||
| 103 | # Create new indexes | 143 | # Create new indexes |
| 104 | cursor.execute( | 144 | cursor.execute( |
| 105 | "CREATE INDEX IF NOT EXISTS taskhash_lookup_v3 ON unihashes_v2 (method, taskhash)" | 145 | "CREATE INDEX IF NOT EXISTS taskhash_lookup_v4 ON unihashes_v3 (method, taskhash)" |
| 106 | ) | 146 | ) |
| 107 | cursor.execute( | 147 | cursor.execute( |
| 108 | "CREATE INDEX IF NOT EXISTS outhash_lookup_v3 ON outhashes_v2 (method, outhash)" | 148 | "CREATE INDEX IF NOT EXISTS outhash_lookup_v3 ON outhashes_v2 (method, outhash)" |
| 109 | ) | 149 | ) |
| 150 | cursor.execute("CREATE INDEX IF NOT EXISTS config_lookup ON config (name)") | ||
| 151 | |||
| 152 | sqlite_version = _get_sqlite_version(cursor) | ||
| 153 | |||
| 154 | cursor.execute( | ||
| 155 | f""" | ||
| 156 | SELECT name FROM {_schema_table_name(sqlite_version)} WHERE type = 'table' AND name = 'unihashes_v2' | ||
| 157 | """ | ||
| 158 | ) | ||
| 159 | if cursor.fetchone(): | ||
| 160 | self.logger.info("Upgrading Unihashes V2 -> V3...") | ||
| 161 | cursor.execute( | ||
| 162 | """ | ||
| 163 | INSERT INTO unihashes_v3 (id, method, unihash, taskhash, gc_mark) | ||
| 164 | SELECT id, method, unihash, taskhash, '' FROM unihashes_v2 | ||
| 165 | """ | ||
| 166 | ) | ||
| 167 | cursor.execute("DROP TABLE unihashes_v2") | ||
| 168 | db.commit() | ||
| 169 | self.logger.info("Upgrade complete") | ||
| 110 | 170 | ||
| 111 | def connect(self, logger): | 171 | def connect(self, logger): |
| 112 | return Database(logger, self.dbname, self.sync) | 172 | return Database(logger, self.dbname, self.sync) |
| @@ -126,16 +186,7 @@ class Database(object): | |||
| 126 | "PRAGMA synchronous = %s" % ("NORMAL" if sync else "OFF") | 186 | "PRAGMA synchronous = %s" % ("NORMAL" if sync else "OFF") |
| 127 | ) | 187 | ) |
| 128 | 188 | ||
| 129 | cursor.execute("SELECT sqlite_version()") | 189 | self.sqlite_version = _get_sqlite_version(cursor) |
| 130 | |||
| 131 | version = [] | ||
| 132 | for v in cursor.fetchone()[0].split("."): | ||
| 133 | try: | ||
| 134 | version.append(int(v)) | ||
| 135 | except ValueError: | ||
| 136 | version.append(v) | ||
| 137 | |||
| 138 | self.sqlite_version = tuple(version) | ||
| 139 | 190 | ||
| 140 | async def __aenter__(self): | 191 | async def __aenter__(self): |
| 141 | return self | 192 | return self |
| @@ -143,6 +194,30 @@ class Database(object): | |||
| 143 | async def __aexit__(self, exc_type, exc_value, traceback): | 194 | async def __aexit__(self, exc_type, exc_value, traceback): |
| 144 | await self.close() | 195 | await self.close() |
| 145 | 196 | ||
| 197 | async def _set_config(self, cursor, name, value): | ||
| 198 | cursor.execute( | ||
| 199 | """ | ||
| 200 | INSERT OR REPLACE INTO config (id, name, value) VALUES | ||
| 201 | ((SELECT id FROM config WHERE name=:name), :name, :value) | ||
| 202 | """, | ||
| 203 | { | ||
| 204 | "name": name, | ||
| 205 | "value": value, | ||
| 206 | }, | ||
| 207 | ) | ||
| 208 | |||
| 209 | async def _get_config(self, cursor, name): | ||
| 210 | cursor.execute( | ||
| 211 | "SELECT value FROM config WHERE name=:name", | ||
| 212 | { | ||
| 213 | "name": name, | ||
| 214 | }, | ||
| 215 | ) | ||
| 216 | row = cursor.fetchone() | ||
| 217 | if row is None: | ||
| 218 | return None | ||
| 219 | return row["value"] | ||
| 220 | |||
| 146 | async def close(self): | 221 | async def close(self): |
| 147 | self.db.close() | 222 | self.db.close() |
| 148 | 223 | ||
| @@ -150,8 +225,8 @@ class Database(object): | |||
| 150 | with closing(self.db.cursor()) as cursor: | 225 | with closing(self.db.cursor()) as cursor: |
| 151 | cursor.execute( | 226 | cursor.execute( |
| 152 | """ | 227 | """ |
| 153 | SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2 | 228 | SELECT *, unihashes_v3.unihash AS unihash FROM outhashes_v2 |
| 154 | INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash | 229 | INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash |
| 155 | WHERE outhashes_v2.method=:method AND outhashes_v2.taskhash=:taskhash | 230 | WHERE outhashes_v2.method=:method AND outhashes_v2.taskhash=:taskhash |
| 156 | ORDER BY outhashes_v2.created ASC | 231 | ORDER BY outhashes_v2.created ASC |
| 157 | LIMIT 1 | 232 | LIMIT 1 |
| @@ -167,8 +242,8 @@ class Database(object): | |||
| 167 | with closing(self.db.cursor()) as cursor: | 242 | with closing(self.db.cursor()) as cursor: |
| 168 | cursor.execute( | 243 | cursor.execute( |
| 169 | """ | 244 | """ |
| 170 | SELECT *, unihashes_v2.unihash AS unihash FROM outhashes_v2 | 245 | SELECT *, unihashes_v3.unihash AS unihash FROM outhashes_v2 |
| 171 | INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash | 246 | INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash |
| 172 | WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash | 247 | WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash |
| 173 | ORDER BY outhashes_v2.created ASC | 248 | ORDER BY outhashes_v2.created ASC |
| 174 | LIMIT 1 | 249 | LIMIT 1 |
| @@ -200,8 +275,8 @@ class Database(object): | |||
| 200 | with closing(self.db.cursor()) as cursor: | 275 | with closing(self.db.cursor()) as cursor: |
| 201 | cursor.execute( | 276 | cursor.execute( |
| 202 | """ | 277 | """ |
| 203 | SELECT outhashes_v2.taskhash AS taskhash, unihashes_v2.unihash AS unihash FROM outhashes_v2 | 278 | SELECT outhashes_v2.taskhash AS taskhash, unihashes_v3.unihash AS unihash FROM outhashes_v2 |
| 204 | INNER JOIN unihashes_v2 ON unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash | 279 | INNER JOIN unihashes_v3 ON unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash |
| 205 | -- Select any matching output hash except the one we just inserted | 280 | -- Select any matching output hash except the one we just inserted |
| 206 | WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash AND outhashes_v2.taskhash!=:taskhash | 281 | WHERE outhashes_v2.method=:method AND outhashes_v2.outhash=:outhash AND outhashes_v2.taskhash!=:taskhash |
| 207 | -- Pick the oldest hash | 282 | -- Pick the oldest hash |
| @@ -219,7 +294,7 @@ class Database(object): | |||
| 219 | async def get_equivalent(self, method, taskhash): | 294 | async def get_equivalent(self, method, taskhash): |
| 220 | with closing(self.db.cursor()) as cursor: | 295 | with closing(self.db.cursor()) as cursor: |
| 221 | cursor.execute( | 296 | cursor.execute( |
| 222 | "SELECT taskhash, method, unihash FROM unihashes_v2 WHERE method=:method AND taskhash=:taskhash", | 297 | "SELECT taskhash, method, unihash FROM unihashes_v3 WHERE method=:method AND taskhash=:taskhash", |
| 223 | { | 298 | { |
| 224 | "method": method, | 299 | "method": method, |
| 225 | "taskhash": taskhash, | 300 | "taskhash": taskhash, |
| @@ -229,15 +304,9 @@ class Database(object): | |||
| 229 | 304 | ||
| 230 | async def remove(self, condition): | 305 | async def remove(self, condition): |
| 231 | def do_remove(columns, table_name, cursor): | 306 | def do_remove(columns, table_name, cursor): |
| 232 | where = {} | 307 | where, clause = _make_condition_statement(columns, condition) |
| 233 | for c in columns: | ||
| 234 | if c in condition and condition[c] is not None: | ||
| 235 | where[c] = condition[c] | ||
| 236 | |||
| 237 | if where: | 308 | if where: |
| 238 | query = ("DELETE FROM %s WHERE " % table_name) + " AND ".join( | 309 | query = f"DELETE FROM {table_name} WHERE {clause}" |
| 239 | "%s=:%s" % (k, k) for k in where.keys() | ||
| 240 | ) | ||
| 241 | cursor.execute(query, where) | 310 | cursor.execute(query, where) |
| 242 | return cursor.rowcount | 311 | return cursor.rowcount |
| 243 | 312 | ||
| @@ -246,17 +315,80 @@ class Database(object): | |||
| 246 | count = 0 | 315 | count = 0 |
| 247 | with closing(self.db.cursor()) as cursor: | 316 | with closing(self.db.cursor()) as cursor: |
| 248 | count += do_remove(OUTHASH_TABLE_COLUMNS, "outhashes_v2", cursor) | 317 | count += do_remove(OUTHASH_TABLE_COLUMNS, "outhashes_v2", cursor) |
| 249 | count += do_remove(UNIHASH_TABLE_COLUMNS, "unihashes_v2", cursor) | 318 | count += do_remove(UNIHASH_TABLE_COLUMNS, "unihashes_v3", cursor) |
| 250 | self.db.commit() | 319 | self.db.commit() |
| 251 | 320 | ||
| 252 | return count | 321 | return count |
| 253 | 322 | ||
| 323 | async def get_current_gc_mark(self): | ||
| 324 | with closing(self.db.cursor()) as cursor: | ||
| 325 | return await self._get_config(cursor, "gc-mark") | ||
| 326 | |||
| 327 | async def gc_status(self): | ||
| 328 | with closing(self.db.cursor()) as cursor: | ||
| 329 | cursor.execute( | ||
| 330 | """ | ||
| 331 | SELECT COUNT() FROM unihashes_v3 WHERE | ||
| 332 | gc_mark=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '') | ||
| 333 | """ | ||
| 334 | ) | ||
| 335 | keep_rows = cursor.fetchone()[0] | ||
| 336 | |||
| 337 | cursor.execute( | ||
| 338 | """ | ||
| 339 | SELECT COUNT() FROM unihashes_v3 WHERE | ||
| 340 | gc_mark!=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '') | ||
| 341 | """ | ||
| 342 | ) | ||
| 343 | remove_rows = cursor.fetchone()[0] | ||
| 344 | |||
| 345 | current_mark = await self._get_config(cursor, "gc-mark") | ||
| 346 | |||
| 347 | return (keep_rows, remove_rows, current_mark) | ||
| 348 | |||
| 349 | async def gc_mark(self, mark, condition): | ||
| 350 | with closing(self.db.cursor()) as cursor: | ||
| 351 | await self._set_config(cursor, "gc-mark", mark) | ||
| 352 | |||
| 353 | where, clause = _make_condition_statement(UNIHASH_TABLE_COLUMNS, condition) | ||
| 354 | |||
| 355 | new_rows = 0 | ||
| 356 | if where: | ||
| 357 | cursor.execute( | ||
| 358 | f""" | ||
| 359 | UPDATE unihashes_v3 SET | ||
| 360 | gc_mark=COALESCE((SELECT value FROM config WHERE name='gc-mark'), '') | ||
| 361 | WHERE {clause} | ||
| 362 | """, | ||
| 363 | where, | ||
| 364 | ) | ||
| 365 | new_rows = cursor.rowcount | ||
| 366 | |||
| 367 | self.db.commit() | ||
| 368 | return new_rows | ||
| 369 | |||
| 370 | async def gc_sweep(self): | ||
| 371 | with closing(self.db.cursor()) as cursor: | ||
| 372 | # NOTE: COALESCE is not used in this query so that if the current | ||
| 373 | # mark is NULL, nothing will happen | ||
| 374 | cursor.execute( | ||
| 375 | """ | ||
| 376 | DELETE FROM unihashes_v3 WHERE | ||
| 377 | gc_mark!=(SELECT value FROM config WHERE name='gc-mark') | ||
| 378 | """ | ||
| 379 | ) | ||
| 380 | count = cursor.rowcount | ||
| 381 | await self._set_config(cursor, "gc-mark", None) | ||
| 382 | |||
| 383 | self.db.commit() | ||
| 384 | return count | ||
| 385 | |||
| 254 | async def clean_unused(self, oldest): | 386 | async def clean_unused(self, oldest): |
| 255 | with closing(self.db.cursor()) as cursor: | 387 | with closing(self.db.cursor()) as cursor: |
| 256 | cursor.execute( | 388 | cursor.execute( |
| 257 | """ | 389 | """ |
| 258 | DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS ( | 390 | DELETE FROM outhashes_v2 WHERE created<:oldest AND NOT EXISTS ( |
| 259 | SELECT unihashes_v2.id FROM unihashes_v2 WHERE unihashes_v2.method=outhashes_v2.method AND unihashes_v2.taskhash=outhashes_v2.taskhash LIMIT 1 | 391 | SELECT unihashes_v3.id FROM unihashes_v3 WHERE unihashes_v3.method=outhashes_v2.method AND unihashes_v3.taskhash=outhashes_v2.taskhash LIMIT 1 |
| 260 | ) | 392 | ) |
| 261 | """, | 393 | """, |
| 262 | { | 394 | { |
| @@ -271,7 +403,13 @@ class Database(object): | |||
| 271 | prevrowid = cursor.lastrowid | 403 | prevrowid = cursor.lastrowid |
| 272 | cursor.execute( | 404 | cursor.execute( |
| 273 | """ | 405 | """ |
| 274 | INSERT OR IGNORE INTO unihashes_v2 (method, taskhash, unihash) VALUES(:method, :taskhash, :unihash) | 406 | INSERT OR IGNORE INTO unihashes_v3 (method, taskhash, unihash, gc_mark) VALUES |
| 407 | ( | ||
| 408 | :method, | ||
| 409 | :taskhash, | ||
| 410 | :unihash, | ||
| 411 | COALESCE((SELECT value FROM config WHERE name='gc-mark'), '') | ||
| 412 | ) | ||
| 275 | """, | 413 | """, |
| 276 | { | 414 | { |
| 277 | "method": method, | 415 | "method": method, |
| @@ -383,14 +521,9 @@ class Database(object): | |||
| 383 | async def get_usage(self): | 521 | async def get_usage(self): |
| 384 | usage = {} | 522 | usage = {} |
| 385 | with closing(self.db.cursor()) as cursor: | 523 | with closing(self.db.cursor()) as cursor: |
| 386 | if self.sqlite_version >= (3, 33): | ||
| 387 | table_name = "sqlite_schema" | ||
| 388 | else: | ||
| 389 | table_name = "sqlite_master" | ||
| 390 | |||
| 391 | cursor.execute( | 524 | cursor.execute( |
| 392 | f""" | 525 | f""" |
| 393 | SELECT name FROM {table_name} WHERE type = 'table' AND name NOT LIKE 'sqlite_%' | 526 | SELECT name FROM {_schema_table_name(self.sqlite_version)} WHERE type = 'table' AND name NOT LIKE 'sqlite_%' |
| 394 | """ | 527 | """ |
| 395 | ) | 528 | ) |
| 396 | for row in cursor.fetchall(): | 529 | for row in cursor.fetchall(): |
