Resolve collisions by prioritising groups from the same source

This doesn't fix all cases, as a master index can have multiple sources,
each with a distinct copy of the same (archive, group, checksum,
version) tuple. However, it's probably as good as we'll be able to do
automatically - and it'll work particularly well for master indexes
downloaded directly over JS5, where we won't have done multiple imports
of the same cache.

Signed-off-by: Graham <gpe@openrs2.org>
Graham 4 years ago
parent 1f735f4b99
commit 18d7fdc343
  1. 4
      archive/src/main/kotlin/org/openrs2/archive/cache/CacheImporter.kt
  2. 41
      archive/src/main/resources/org/openrs2/archive/migrations/V1__init.sql

@ -247,7 +247,7 @@ public class CacheImporter @Inject constructor(
FROM master_index_archives a FROM master_index_archives a
LEFT JOIN master_index_archives a2 ON a2.master_index_id = ? AND a2.archive_id = a.archive_id AND LEFT JOIN master_index_archives a2 ON a2.master_index_id = ? AND a2.archive_id = a.archive_id AND
a2.crc32 = a.crc32 AND a2.version = a.version a2.crc32 = a.crc32 AND a2.version = a.version
LEFT JOIN LATERAL resolve_index(a2.archive_id, a2.crc32, a2.version) c ON TRUE LEFT JOIN LATERAL resolve_index(a2.master_index_id, a2.archive_id, a2.crc32, a2.version) c ON TRUE
WHERE a.master_index_id = ? WHERE a.master_index_id = ?
ORDER BY a.archive_id ASC ORDER BY a.archive_id ASC
""".trimIndent() """.trimIndent()
@ -308,7 +308,7 @@ public class CacheImporter @Inject constructor(
i.archive_id = ? i.archive_id = ?
LEFT JOIN index_groups ig2 ON ig2.container_id = i.container_id AND ig2.group_id = ig.group_id AND LEFT JOIN index_groups ig2 ON ig2.container_id = i.container_id AND ig2.group_id = ig.group_id AND
ig2.crc32 = ig.crc32 AND ig2.version = ig.version ig2.crc32 = ig.crc32 AND ig2.version = ig.version
LEFT JOIN LATERAL resolve_group(i.archive_id, ig2.group_id, ig2.crc32, ig2.version) c ON TRUE LEFT JOIN LATERAL resolve_group(i.master_index_id, i.archive_id, ig2.group_id, ig2.crc32, ig2.version) c ON TRUE
WHERE ig.container_id = ? AND c.id IS NULL WHERE ig.container_id = ? AND c.id IS NULL
ORDER BY ig.group_id ASC ORDER BY ig.group_id ASC
""".trimIndent() """.trimIndent()

@ -163,11 +163,26 @@ CREATE TABLE names (
CREATE UNIQUE INDEX ON names (hash, name); CREATE UNIQUE INDEX ON names (hash, name);
CREATE FUNCTION resolve_index(archive_id uint1, crc32 INTEGER, version INTEGER) RETURNS containers AS $$ CREATE FUNCTION resolve_index(master_index_id INTEGER, archive_id uint1, crc32 INTEGER, version INTEGER) RETURNS containers AS $$
#variable_conflict use_variable #variable_conflict use_variable
DECLARE DECLARE
resolved containers%ROWTYPE; resolved containers%ROWTYPE;
BEGIN BEGIN
SELECT c.*
INTO resolved
FROM source_groups g
JOIN containers c ON c.id = g.container_id
JOIN indexes i ON i.container_id = c.id
JOIN sources s ON s.id = g.source_id
WHERE g.archive_id = 255 AND g.group_id = archive_id::INTEGER AND c.crc32 = crc32 AND g.version = version AND
NOT g.version_truncated AND i.version = version AND s.master_index_id = master_index_id
ORDER BY c.id ASC
LIMIT 1;
IF FOUND THEN
RETURN resolved;
END IF;
SELECT c.* SELECT c.*
INTO resolved INTO resolved
FROM groups g FROM groups g
@ -182,11 +197,25 @@ BEGIN
END; END;
$$ LANGUAGE plpgsql STABLE STRICT PARALLEL SAFE; $$ LANGUAGE plpgsql STABLE STRICT PARALLEL SAFE;
CREATE FUNCTION resolve_group(archive_id uint1, group_id INTEGER, crc32 INTEGER, version INTEGER) RETURNS containers AS $$ CREATE FUNCTION resolve_group(master_index_id INTEGER, archive_id uint1, group_id INTEGER, crc32 INTEGER, version INTEGER) RETURNS containers AS $$
#variable_conflict use_variable #variable_conflict use_variable
DECLARE DECLARE
resolved containers%ROWTYPE; resolved containers%ROWTYPE;
BEGIN BEGIN
SELECT c.*
INTO resolved
FROM source_groups g
JOIN containers c ON c.id = g.container_id
JOIN sources s ON s.id = g.source_id
WHERE g.archive_id = archive_id AND g.group_id = group_id AND c.crc32 = crc32 AND g.version = version AND
NOT g.version_truncated AND s.master_index_id = master_index_id
ORDER BY c.id ASC
LIMIT 1;
IF FOUND THEN
RETURN resolved;
END IF;
SELECT c.* SELECT c.*
INTO resolved INTO resolved
FROM groups g FROM groups g
@ -217,7 +246,7 @@ CREATE VIEW resolved_indexes AS
SELECT m.id AS master_index_id, a.archive_id, c.data, c.id AS container_id SELECT m.id AS master_index_id, a.archive_id, c.data, c.id AS container_id
FROM master_indexes m FROM master_indexes m
JOIN master_index_archives a ON a.master_index_id = m.id JOIN master_index_archives a ON a.master_index_id = m.id
JOIN LATERAL resolve_index(a.archive_id, a.crc32, a.version) c ON TRUE; JOIN LATERAL resolve_index(m.id, a.archive_id, a.crc32, a.version) c ON TRUE;
CREATE VIEW resolved_groups (master_index_id, archive_id, group_id, name_hash, version, data, key_id) AS CREATE VIEW resolved_groups (master_index_id, archive_id, group_id, name_hash, version, data, key_id) AS
WITH i AS NOT MATERIALIZED ( WITH i AS NOT MATERIALIZED (
@ -230,7 +259,7 @@ UNION ALL
SELECT i.master_index_id, i.archive_id, ig.group_id, ig.name_hash, ig.version, c.data, c.key_id SELECT i.master_index_id, i.archive_id, ig.group_id, ig.name_hash, ig.version, c.data, c.key_id
FROM i FROM i
JOIN index_groups ig ON ig.container_id = i.container_id JOIN index_groups ig ON ig.container_id = i.container_id
JOIN LATERAL resolve_group(i.archive_id, ig.group_id, ig.crc32, ig.version) c ON TRUE; JOIN LATERAL resolve_group(i.master_index_id, i.archive_id, ig.group_id, ig.crc32, ig.version) c ON TRUE;
CREATE MATERIALIZED VIEW master_index_stats ( CREATE MATERIALIZED VIEW master_index_stats (
master_index_id, master_index_id,
@ -256,7 +285,7 @@ LEFT JOIN (
COUNT(*) FILTER (WHERE c.id IS NOT NULL OR (a.version = 0 AND a.crc32 = 0)) AS valid_indexes, COUNT(*) FILTER (WHERE c.id IS NOT NULL OR (a.version = 0 AND a.crc32 = 0)) AS valid_indexes,
COUNT(*) AS indexes COUNT(*) AS indexes
FROM master_index_archives a FROM master_index_archives a
LEFT JOIN LATERAL resolve_index(a.archive_id, a.crc32, a.version) c ON TRUE LEFT JOIN LATERAL resolve_index(a.master_index_id, a.archive_id, a.crc32, a.version) c ON TRUE
GROUP BY a.master_index_id GROUP BY a.master_index_id
) a ON a.master_index_id = m.id ) a ON a.master_index_id = m.id
LEFT JOIN ( LEFT JOIN (
@ -268,7 +297,7 @@ LEFT JOIN (
COUNT(*) FILTER (WHERE c.encrypted) AS keys COUNT(*) FILTER (WHERE c.encrypted) AS keys
FROM resolved_indexes i FROM resolved_indexes i
JOIN index_groups ig ON ig.container_id = i.container_id JOIN index_groups ig ON ig.container_id = i.container_id
LEFT JOIN LATERAL resolve_group(i.archive_id, ig.group_id, ig.crc32, ig.version) c ON TRUE LEFT JOIN LATERAL resolve_group(i.master_index_id, i.archive_id, ig.group_id, ig.crc32, ig.version) c ON TRUE
LEFT JOIN keys k ON k.id = c.key_id LEFT JOIN keys k ON k.id = c.key_id
GROUP BY i.master_index_id GROUP BY i.master_index_id
) g ON g.master_index_id = m.id; ) g ON g.master_index_id = m.id;

Loading…
Cancel
Save