User:Difool/WikidataCleanup.js

// <nowiki>

(function (mw) {
  "use strict";

  if (
    mw.config.get("wgNamespaceNumber") !== 0 ||
    mw.config.get("wgAction") !== "view" ||
    mw.config.get("wgArticleId") === 0
  ) {
    return;
  }

  // ==== 01 Constants and Config =============================================

  const TOOL_NAME = "WikidataCleanup";

  const PID_ARCHIVE_DATE = "P2960";
  const PID_ARCHIVE_URL = "P1065";
  const PID_CITES_WORK = "P2860";
  const PID_CITIZENSHIP = "P27";
  const PID_DATE_OF_BIRTH = "P569";
  const PID_DATE_OF_DEATH = "P570";
  const PID_END_TIME = "P582";
  const PID_IMPORTED_FROM = "P143";
  const PID_OCCUPATION = "P106";
  const PID_REASON_FOR_DEPRECATED_RANK = "P2241";
  const PID_REASON_FOR_PREFERRED_RANK = "P7452";
  const PID_REFERENCE_URL = "P854";
  const PID_RETRIEVED = "P813";
  const PID_START_TIME = "P580";
  const PID_STATED_IN = "P248";
  const PID_SUBCLASS_OF = "P279";
  const PID_SUBJECT_NAMED_AS = "P1810";
  const PID_TITLE = "P1476";
  const PID_URL = "P2699";
  const PID_WIKIMEDIA_IMPORT_URL = "P4656";
  const QID_LESS_PRECISE = "Q42727519"; // item/value with less precision and/or accuracy
  const QID_MOST_PRECISE = "Q71536040"; // most precise value (reason for preferred rank)

  const WIKIDATA_ENTITY_BASE_URL = "http://www.wikidata.org/entity/";
  const URL_PROLEPTIC_JULIAN_CALENDAR = WIKIDATA_ENTITY_BASE_URL + "Q1985786";
  const URL_PROLEPTIC_GREGORIAN_CALENDAR =
    WIKIDATA_ENTITY_BASE_URL + "Q1985727";

  // German ID sources
  const PID_GND_ID = "P227";
  const PID_DEUTSCHE_BIOGRAPHIE_GND_ID = "P7902";

  // Inferred
  const PID_INFERRED = "P3452";
  const PID_DETERMINATION_METHOD = "P459";
  const PID_MATCHED_BY_IDENTIFIER_FROM = "P11797";
  const PID_BASED_ON_HEURISTIC = "P887";

  // Described-by-source / absorb-url detector
  const PID_DESCRIBED_BY_SOURCE = "P1343";

  // ABSORB_URL_CLAIM_SKIP: ext-id properties that should never be absorption
  // targets for certain item types, because they identify content items rather
  // than the subject itself. If the item's P31 matches any listed QID, the
  // URL claim match is skipped entirely (no diff emitted).
  //
  // Example: P1651 (YouTube video ID) identifies a video, not the person who
  // uploaded it — so a YouTube URL on a human item should not be absorbed into
  // a video ID statement, even if one happens to exist on the item.
  const ABSORB_URL_CLAIM_SKIP = {
    P1651: ["Q5"], // YouTube video ID — skip for humans
    P2969: ["Q5"], // Goodreads version/edition ID (P2969) — skip for humans
    P10304: ["Q5"], // Apple Podcasts episode ID (P10304) — skip for humans
    P9882: ["Q5"], // Spotify episode ID (P9882) — skip for humans
    P5842: ["Q5"], // Apple Podcasts podcast ID (P5842)

    // Add further entries as new cases are found:
    // "PXXX": ["QYYY", "QZZZ"],
  };

  // ABSORB_URL_CLAIM_PLATFORM_PAIRS: when a URL resolves to a "content item"
  // ext-id property (e.g. Amazon book ID P5749) but the item already has a
  // related "creator/author" property on the same platform (e.g. Amazon author
  // ID P4862), the URL claim is redundant and should be removed — with the
  // creator property cited as the reason.
  //
  // Structure: contentPid -> { creatorPid, itemTypes: [P31 QIDs] }
  // The removal only fires when the item's P31 matches one of the itemTypes
  // AND the item has at least one non-deprecated claim for creatorPid.
  const ABSORB_URL_CLAIM_PLATFORM_PAIRS = {
    P5749: { creatorPid: "P4862", itemTypes: ["Q5"] }, // Amazon book ID -> Amazon author ID
    P2969: { creatorPid: "P2963", itemTypes: ["Q5"] }, // Goodreads version/edition ID -> Goodreads author ID
    // Add further platform pairs as needed:
    // "PXXX": { creatorPid: "PYYY", itemTypes: ["QZZZ"] },
  };

  // ==== Developer flags =====================================================
  // Set to true in test builds only — allows detectAbsorbUrlClaim to propose
  // converting a URL claim into a new ext-id statement when the ext-id does not
  // yet exist on the item.  Never enable in production.
  const DEV_ABSORB_URL_CLAIM_CREATE_MISSING = false;
  // extra checks for Wikimedia import references with no sitelinks (only enabled in dev mode)
  const DEV_WIKIMEDIA_NO_SITELINKS = false;
  // when true, detectFixCopiedLabels runs on all item types; when false (production),
  // only humans (P31=Q5) are checked to avoid false positives from Wikipedia's
  // capitalisation conventions and comma-based geographic qualifications.
  const DEV_FIX_COPIED_LABELS_ALL_TYPES = false;

  // Actions
  const ACTION_REMOVE_REFS = "removeRefs";
  const ACTION_REMOVE_CLAIM = "removeClaim";
  const ACTION_MERGE_CLAIM = "mergeClaim";
  const ACTION_NORMALIZE = "normalize";
  const ACTION_REMOVE_ALIAS = "removeAlias";
  const ACTION_DOWNGRADE_PREFERRED = "downgradePreferred";
  const ACTION_CHANGE_PROPERTY = "changeProperty";
  const ACTION_CHANGE_VALUE = "changeValue";
  const ACTION_REMOVE_QUALIFIER = "removeQualifier";
  const ACTION_CLEAN_URL = "cleanUrl";
  const ACTION_MOVE_QUALIFIER_TO_REFERENCE = "moveQualifierToReference";
  const ACTION_SPLIT_REFERENCE_URLS = "splitReferenceUrls";
  const ACTION_ADD_EXTERNAL_ID_TO_REFERENCE = "addExternalIdToReference";
  const ACTION_ABSORB_CLAIM = "absorbClaim";
  const ACTION_ABSORB_URL_CLAIM = "absorbUrlClaim";
  const ACTION_CONVERT_URL_TO_EXT_ID = "convertUrlToExtId";
  const ACTION_MERGE_DUPLICATE_URL_CLAIMS = "mergeDuplicateUrlClaims";
  const ACTION_UPGRADE_PRECISE_DATE = "upgradePreciseDate";
  const ACTION_REMOVE_REDUNDANT_REF_URL = "removeRedundantRefUrl";
  const ACTION_DEPRECATE_URL_CLAIM = "deprecateUrlClaim";
  const ACTION_SET_MUL_LABEL = "setMulLabel";
  const ACTION_ADD_MUL_ALIAS = "addMulAlias";
  const ACTION_REMOVE_OBSOLETE_SNAKS = "removeObsoleteSnaks";

  // Timing / cache
  const ONE_DAY = 1000 * 60 * 60 * 24;

  const OCC_CACHE_KEY = "wd_cleanup_occupationParents_v1";
  const OCC_CACHE_TTL_MS = ONE_DAY * 7;
  const WIKIPEDIA_EDITIONS_CACHE_KEY = "wd_cleanup_wikipediaEditions_v3";
  const WIKIPEDIA_EDITIONS_TTL_MS = ONE_DAY * 7;
  const LANG_NAMES_CACHE_KEY = "wd_cleanup_langNames_v1";
  const LANG_NAMES_TTL_MS = ONE_DAY * 7;
  const OBSOLETE_IDS_CACHE_KEY = "wd_cleanup_obsoleteIds_v1";
  const OBSOLETE_IDS_TTL_MS = ONE_DAY * 7;
  const REGEX_CACHE_KEY = "wd_cleanup_propertyRegex_v1";
  const REGEX_CACHE_TTL_MS = ONE_DAY * 7;
  const URL_PATTERNS_CACHE_KEY = "wd_cleanup_propertyUrlPatterns_v1";
  const URL_PATTERNS_TTL_MS = ONE_DAY * 7;
  const STATED_IN_CACHE_KEY = "wd_cleanup_propertyStatedIn_v3";
  const STATED_IN_TTL_MS = ONE_DAY * 7;
  const URL_STRIP_CACHE_KEY = "wd_cleanup_urlStrip_v1";
  const URL_STRIP_CACHE_TTL_MS = ONE_DAY * 7;
  const URL_STRIP_PAGE = "User:Difool/url_tracking_params";

  const URL_BLOCKLIST_CACHE_KEY = "wd_cleanup_urlBlocklist_v1";
  const URL_BLOCKLIST_CACHE_TTL_MS = ONE_DAY * 7;
  const URL_BLOCKLIST_PAGE = "User:Difool/URL-deprecation-blocklist";

  const SOURCE_CATEGORIES_CACHE_KEY = "wd_cleanup_sourceCategories_v1";
  const SOURCE_CATEGORIES_TTL_MS = ONE_DAY * 7;
  const SOURCE_CATEGORIES_PAGE = "User:Difool/reference-source-categories";

  const CHUNK_SIZE = 50; // wbgetentities page size
  const MAX_TRAVERSAL_DEPTH = 3; // P279 BFS depth limit
  const FETCH_BACKOFF_MS = 2 * 60 * 60 * 1000; // 2 h
  const FETCH_FAILURE_TIMES_KEY = `${TOOL_NAME}_fetchFailureTimes`;

  const INDEXEDDB_NAME = `${TOOL_NAME}_DB`;
  const INDEXEDDB_VERSION = 2;

  const precisionLabels = {
    6: "millennium",
    7: "century",
    8: "decade",
    9: "year",
    10: "month",
    11: "day",
  };

  // ==== 02 Internationalization =============================================

  const i18n = {
    en: {
      cleanupTitle: "🧹 Cleanup Tool",
      cleanupIntro: "The following cleanups are possible:",
      runButton: "Run cleanup",
      settingsButton: "⚙ Settings",
      noCleanups: "No cleanups found",
      settingsTitle: "Cleanup Tool Settings",
      close: "Close",
      // detector labels
      obsolete: "Remove obsolete external-id references",
      wikimedia: "Remove 'imported from Wikimedia project' references",
      aggregator: "Remove aggregator references",
      community: "Remove community references",
      redundant: "Remove FAST if LC is present",
      inferred: "Remove inferred-only references",
      invalid: "Remove references with invalid external-id values",
      normalizeLabels: "Normalize label/description/alias text",
      removeAliasEqualsLabel: "Remove alias = label (same language)",
      removeLowPrecisionDates:
        "Remove redundant low-precision birth/death dates",
      upgradePreciseDate:
        "Upgrade precise date to preferred rank (demote deprecated less-precise duplicate to normal)",
      removeRedundantPreferred:
        "Downgrade redundant 'preferred' ranks (remove 'reason for preferred rank' (P7452) if present)",
      removeExpiredPreferred:
        "Downgrade preferred rank on a statement that has an end time in the past",
      mergeSameDateClaims: "Merge date claims with same normalized value",
      replaceWrongProperty: "Replace wrong property in claims/references",
      moveRetrievedFromExternalId: "Move qualifiers to references",
      duplicateValues: "Merge duplicate values",
      dupRetrieved: "Remove duplicate references",
      removeRedundantOccupation: "Remove redundant occupations",
      removeJulianGregorianDates:
        "Remove unreferenced Julian/Gregorian duplicate dates",
      convertWikipediaStatedIn:
        "Replace 'stated in' (P248) with 'imported from Wikimedia project' (P143) for Wikipedia editions",
      convertInvalidStatedInReference:
        "Fix invalid 'stated in' (P248) in external ID references",
      mismatchedWikimediaImport:
        "Fix mismatched 'imported from' (P143) vs Wikimedia import URL (P4656)",
      removeIdDescriptions: "Remove ID-style descriptions",
      removeEmptyEndTime: "Remove empty 'end time' (P582) qualifiers",
      addExternalIdToReference:
        "Add external ID to reference based on URL match",
      splitMultipleReferenceUrls:
        "Split multiple reference URLs into separate references",
      cleanUrls: "Clean up reference URLs",
      removeSelfCite: "Remove self-citations",
      absorbDescribedBySource:
        "Remove 'described by source' (P1343) redundant with an external ID's applicable 'stated in'",
      absorbUrlClaim:
        "Remove URL claim redundant with a matching external ID on the same item",
      convertUrlToExtId: "Convert URL claim to external ID (dev mode)",
      duplicateUrlClaims:
        "Merge duplicate URL property values (same URL on multiple properties)",
      removeRedundantRefUrl:
        "Remove reference URL (P854) redundant with the statement's own URL value",
      blocklistedUrlClaims:
        "Deprecate or remove URL claims matching the blocklist",
      partial_obsolete: "Remove obsolete identifiers from references",
      blocklistAction: "Action",
      blocklistReason: "Reason",
      extractedId: "Extracted ID",
      urlClaim: "URL claim",
      addMulLabel:
        "Add default (mul) label for humans with identical en/de/fr labels",
      mulLabelValue: "Label value",
      mulLabelLanguages: "Matching languages",
      addMulAlias:
        "Add default (mul) alias and remove per-language duplicates (≥6 languages)",
      mulAliasLangCount: "Language count",
      self_stated_in:
        "Remove tautological 'stated in' (P248) references on external-ID claims",
      redundantCitizenshipDates:
        "Remove redundant start/end time on country of citizenship (P27) matching date of birth/death",
      fixCopiedLabel:
        "Replace label copied from English with the local Wikipedia title",
      fixCopiedLabelLang: "Language",
      fixCopiedLabelBefore: "Current label (= English)",
      fixCopiedLabelAfter: "Wikipedia title",
      // settings sections
      generalSettings: "General",
      detectorSettings: "Options",
      cacheSettings: "Cache & Buffers",
      // column headers
      property: "Property",
      removedValues: "Removed values",
      field: "Field",
      lang: "Language",
      original: "Original text",
      normalized: "Normalized text",
      aliasLabel: "Alias identical to label",
      value: "Value",
      precision: "Precision",
      p7452: "P7452 present?",
      mergedInto: "Merged into",
      context: "Context",
      oldProperty: "Old property",
      newProperty: "New property",
      reference: "Reference",
      removed: "Removed",
      becauseOf: "Because of",
      description: "Description text",
      idPresent: "ID property present",
      suggestedProperty: "Suggested Property",
      extractedId: "Extracted ID",
      referenceUrl: "Reference URL",
      externalIdProperty: "External ID property",
      externalIdClaim: "Matching external ID",
      oldValue: "Old value",
      newValue: "New value",
      count: "Count",
      // menu
      startPreview: "Preview cleanup",
      runPreview: "Run a preview of all cleanup changes",
      autoStart: "Auto-start cleanup preview on page load",
      applyFailed: "Failed to apply cleanup.",
      cleanupFailed: "Cleanup failed.",
      yes: "yes",
      no: "no",
    },
    nl: {
      cleanupTitle: "🧹 Opschoontool",
      cleanupIntro: "De volgende opschoonacties zijn mogelijk:",
      runButton: "Opschonen uitvoeren",
      settingsButton: "⚙ Instellingen",
      noCleanups: "Geen opschoonacties gevonden",
      settingsTitle: "Instellingen voor opschoonacties",
      save: "Opslaan",
      cancel: "Annuleren",
      wikimedia: "Verwijder 'geïmporteerd uit Wikimedia-project'-referenties",
      aggregator: "Verwijder aggregatorreferenties",
      community: "Verwijder communityreferenties",
      redundant: "Verwijder FAST als LC aanwezig is",
      inferred: "Verwijder alleen-afgeleide referenties",
      normalizeLabels: "Normaliseer label-/beschrijving-/alias-tekst",
      removeAliasEqualsLabel: "Verwijder alias = label (zelfde taal)",
      removeLowPrecisionDates:
        "Verwijder onnauwkeurige dubbele geboortedata/sterfdata",
      removeRedundantPreferred:
        "Verlaag overbodige 'voorkeurs'-rangen (verwijder 'reden voor voorkeursrang' (P7452) indien aanwezig)",
      mergeSameDateClaims:
        "Datumclaims met dezelfde genormaliseerde waarde samenvoegen",
      property: "Eigenschap",
      removedValues: "Verwijderde waarden",
      field: "Veld",
      lang: "Taal",
      original: "Oorspronkelijke tekst",
      normalized: "Genormaliseerde tekst",
      aliasLabel: "Alias identiek aan label",
      value: "Waarde",
      precision: "Precisie",
      p7452: "P7452 aanwezig?",
      mergedInto: "Samengevoegd met",
      startPreview: "Opschoonpreview",
      runPreview: "Bekijk de mogelijke opschoonacties",
      autoStart: "Preview automatisch starten",
      applyFailed: "Uitvoeren mislukt",
      cleanupFailed: "Opschonen mislukt",
      yes: "Ja",
      no: "Nee",
    },
  };

  /** Return the best available translation for `key`. */
  function getMsg(key) {
    for (const lang of mw.language.getFallbackLanguageChain()) {
      if (i18n[lang]?.[key]) return i18n[lang][key];
    }
    return i18n.en[key] || key;
  }

  // ==== 03 Utilities ========================================================

  function isQid(id) {
    return typeof id === "string" && /^Q\d+$/.test(id);
  }

  function uniq(arr) {
    return [...new Set(arr.filter(isQid))];
  }

  /**
   * Normalise display text: replace fancy characters, collapse whitespace,
   * and strip leading/trailing commas or spaces.
   */
  function normalizeText(str) {
    if (!str) return str;
    return str
      .replace(/\u2010/g, "-") // Unicode hyphen -> ASCII hyphen
      .replace(/\u00A0/g, " ") // non-breaking space -> space
      .replace(/^[,\s]+|[,\s]+$/g, "")
      .replace(/\s+/g, " ");
  }

  /** Trim and percent-decode a URL string. */
  function normalizeUrl(urlValue) {
    return decodeURIComponent(urlValue.trim());
  }

  function removeTrailingSlash(url) {
    return typeof url === "string" && url.endsWith("/")
      ? url.slice(0, -1)
      : url;
  }

  /**
   * Clean tracking/functional parameters from a URL.
   * @param {string}  rawUrl
   * @param {boolean} [opts.recognitionMode=false] – also strip functional params
   */
  function cleanUrl(rawUrl, { recognitionMode = false } = {}) {
    const url = new URL(rawUrl);
    let changed = false;
    let keepUrl = false;

    // Always-remove tracking params per hostname.
    // Key conventions (apply to both hardcoded and wiki-sourced rules):
    //   "example.com"   – exact match after stripping leading "www."
    //   ".example.com"  – suffix match: matches example.com, fr.example.com,
    //                     example.com.au, fr.example.com.au, etc.
    //                     Use this for domains with two-part TLDs (co.uk, com.au).
    //   Plain entries also match via base-domain fallback (last two segments),
    //   so "linkedin.com" covers fr.linkedin.com, de.linkedin.com, etc.
    //
    // Hardcoded defaults are merged with rules fetched from [[User:Difool/unrecognized_urls]].
    const ALWAYS_STRIP = Object.assign(
      {
        "imdb.com": ["ref_"],
        "m.imdb.com": ["ref_"],
        "open.spotify.com": ["si"],
        "researchgate.net": ["ev"],
        "linkedin.com": [
          "originalSubdomain",
          "trk",
          "success",
          "original_referer",
        ],
        ".scholar.google": ["oi", "view_op", "sortby", "authuser"],
      },
      urlStripCache.always,
    );

    // Remove only in recognition mode (functional / UI params)
    const RECOGNITION_STRIP = Object.assign(
      {
        "youtube.com": ["t", "ab_channel", "mode"],
        "open.spotify.com": ["dl_branch", "nd"],
        "itunes.apple.com": ["mt"],
      },
      urlStripCache.recognition,
    );

    const hostname = url.hostname.replace(/^www\./, "");

    // Resolve strip params for a hostname:
    //   1. Exact match (e.g. "imdb.com")
    //   2. Suffix match for keys starting with "." (e.g. ".scholar.google"
    //      matches scholar.google.com, scholar.google.com.au, etc.)
    //   3. Base-domain fallback — last two segments (e.g. "linkedin.com" matches
    //      fr.linkedin.com, de.linkedin.com, etc.)
    //      Note: use "." suffix keys for two-part TLDs like co.uk / com.au.
    function paramsFor(map, host) {
      // Global wildcard — applies to every hostname
      const wildcard = map["*"] || [];

      let specific = [];
      if (map[host]) {
        specific = map[host];
      } else {
        for (const key of Object.keys(map)) {
          if (
            key.startsWith(".") &&
            (host === key.slice(1) || host.startsWith(key.slice(1) + "."))
          ) {
            specific = map[key];
            break;
          }
        }
        if (!specific.length) {
          const base = host.split(".").slice(-2).join(".");
          specific = map[base] || [];
        }
      }

      // Merge, deduplicating so a param listed globally isn't applied twice
      // if a hostname entry also lists it.
      return [...new Set([...wildcard, ...specific])];
    }

    // Remove empty hash
    if (url.hash === "#") {
      url.hash = "";
      changed = true;
    }

    for (const param of paramsFor(ALWAYS_STRIP, hostname)) {
      if (url.searchParams.has(param)) {
        url.searchParams.delete(param);
        changed = true;
      }
    }

    if (recognitionMode) {
      for (const param of paramsFor(RECOGNITION_STRIP, hostname)) {
        if (url.searchParams.has(param)) {
          url.searchParams.delete(param);
          changed = true;
          keepUrl = true;
        }
      }

      if (url.hash && url.hash !== "#") {
        url.hash = "";
        changed = true;
      }

      for (const param of ["hl", "lang", "locale", "lr"]) {
        if (url.searchParams.has(param)) {
          url.searchParams.delete(param);
          changed = true;
        }
      }

      const trimmed = removeTrailingSlash(url.href);
      if (trimmed !== url.href) {
        url.href = trimmed;
        changed = true;
      }
    }

    let result = rawUrl;
    if (changed) {
      result = url.href;
      if (!recognitionMode && /[^\x00-\x7F]/.test(rawUrl)) {
        result = decodeURI(result);
      }
    }

    return { url: result, keepUrl };
  }

  /** Make invisible/special characters visible for preview tables. */
  function visualizeInvisibleChars(str) {
    if (!str) return str;
    return str
      .replace(/\u2010/g, "[HYPHEN]")
      .replace(/\u00A0/g, "[NO-BREAK]")
      .replace(/ {2,}/g, (m) => `[SPACE×${m.length}]`)
      .replace(/\t/g, "[TAB]");
  }

  /** Insert zero-width spaces to allow line-breaks inside URLs in tables. */
  function formatUrlForDisplay(url) {
    if (!url || typeof url !== "string") return url;
    return url.replace(/\//g, "/\u200b").replace(/&/g, "\u200b&");
  }

  /** Parse a Wikibase time string into a JS Date (or null on failure). */
  function parseWikibaseTime(timeStr) {
    if (!timeStr) return null;
    const t = timeStr.replace(/^\+/, "").replace(/-00/g, "-01");
    const d = new Date(t);
    return isNaN(d.getTime()) ? null : d;
  }

  function validateUrlPattern(pattern) {
    try {
      compileAnchoredRegex(pattern).test("https://example.com/test");
      return { valid: true, error: null };
    } catch (e) {
      return { valid: false, error: e.message };
    }
  }

  function validatePropertyRegex(pattern) {
    try {
      new RegExp(pattern, "u").test("1234567890");
      return { valid: true, error: null };
    } catch (e) {
      return { valid: false, error: e.message };
    }
  }

  function sanitizePattern(rawPattern) {
    // \- inside a character class [...] is valid JS and means "literal hyphen"
    // unambiguously — leave it alone.  Only strip \- outside character classes,
    // where the backslash is unnecessary.  Similarly, \/ is unnecessary outside
    // character classes.
    let p = "";
    let inClass = false;
    for (let i = 0; i < rawPattern.length; i++) {
      const ch = rawPattern[i];
      const next = rawPattern[i + 1];
      if (ch === "\\" && !inClass && (next === "-" || next === "/")) {
        // Drop the backslash — the character is a literal outside a class
        p += next;
        i++;
      } else {
        if (ch === "[" && (i === 0 || rawPattern[i - 1] !== "\\"))
          inClass = true;
        if (ch === "]" && inClass) inClass = false;
        p += ch;
      }
    }
    return p;
  }

  /** Convert Wikidata-dialect regex syntax to JavaScript. */
  function convertWikidataRegexToJS(rawPattern) {
    let p = sanitizePattern(rawPattern);
    p = p.replace(/\(\?P<([a-zA-Z0-9_]+)>/g, "(?<$1>");
    p = p.replace(/\(\?'([a-zA-Z0-9_]+)'/g, "(?<$1>");
    p = p.replace(/\\g<([a-zA-Z0-9_]+)>/g, "\\k<$1>");
    p = p.replace(/\\g'([a-zA-Z0-9_]+)'/g, "\\k<$1>");
    p = p.replace(/\(\?>/g, "("); // atomic groups -> plain groups
    return p;
  }

  const ARCHIVE_DOMAINS = [
    "web.archive.org",
    "archive.is",
    "wayback.archive-it.org",
  ];

  function isArchiveUrl(url) {
    if (!url) return false;
    try {
      const host = new URL(url).hostname.toLowerCase();
      return ARCHIVE_DOMAINS.some((d) => host === d || host.endsWith("." + d));
    } catch {
      return false;
    }
  }

  function analyzeWikimediaUrl(url) {
    try {
      const { hostname } = new URL(url);
      const isWikimediaHost =
        /^.+(mediawiki|wik(i(books|data|(m|p)edia|functions|news|quote|source|species|versity|voyage)|tionary)|wmflabs)\.org$/.test(
          hostname,
        );
      return {
        isWikimediaHost,
        isWikidataItem: isWikimediaHost && hostname.includes("wikidata.org"),
        isWikisourceItem:
          isWikimediaHost && hostname.includes("wikisource.org"),
      };
    } catch {
      return {
        isWikimediaHost: false,
        isWikidataItem: false,
        isWikisourceItem: false,
      };
    }
  }

  function isWikimediaImportUrl(url) {
    const { isWikimediaHost, isWikidataItem, isWikisourceItem } =
      analyzeWikimediaUrl(url);
    return isWikimediaHost && !isWikidataItem && !isWikisourceItem;
  }

  // ==== 04 In-memory caches =================================================

  const occupationParentsCache = new Map(); // QID -> Set<parentQID>
  // urlStripCache holds { always: { hostname: [param,...] }, recognition: { hostname: [param,...] } }
  // loaded from the wiki page and merged with the hardcoded defaults at runtime.
  const urlStripCache = { always: {}, recognition: {} };
  const wikipediaEditionsCache = new Map(); // QID -> language code
  const wikipediaLangNamesCache = new Map(); // language code -> language name

  // urlBlocklistCache holds an array of rule objects parsed from the blocklist page.
  // Each rule: { pattern: string, matchType: "prefix"|"regex", action: "remove"|"deprecate",
  //             sectionLabel: string, compiledRegex?: RegExp }
  const urlBlocklistCache = { rules: [], timestamp: 0 };
  const obsoleteIdProps = new Set();
  const propertyRegexCache = new Map(); // PID -> regex string
  const propertyUrlPatternsCache = new Map(); // PID -> [{pattern, replacement}]
  const propertyStatedInCache = new Map(); // PID -> {preferred, invalid}
  // sourceCategoryCache holds the rules parsed from the wiki page.
  // { aggregator: Set<PID>, community: Set<PID>,
  //   redundant: Array<{ weakPid: string, strongPid: string, strongQid: string|null }> }
  const sourceCategoryCache = {
    aggregator: new Set(),
    community: new Set(),
    redundant: [],
  };

  let indexedDBReady = false;
  let indexedDB_instance = null;

  // ==== 05 IndexedDB helpers ================================================

  async function initIndexedDB() {
    return new Promise((resolve, reject) => {
      const request = indexedDB.open(INDEXEDDB_NAME, INDEXEDDB_VERSION);

      request.onerror = () => {
        console.error(`${TOOL_NAME}: failed to open IndexedDB`, request.error);
        indexedDBReady = false;
        reject(request.error);
      };

      request.onsuccess = () => {
        indexedDB_instance = request.result;
        indexedDBReady = true;
        resolve(indexedDB_instance);
      };

      request.onupgradeneeded = (event) => {
        const db = event.target.result;
        for (const entry of caches) {
          if (
            entry.indexeddb_name &&
            !db.objectStoreNames.contains(entry.indexeddb_name)
          ) {
            db.createObjectStore(entry.indexeddb_name, { keyPath: "key" });
          }
        }
      };
    });
  }

  function getIndexedDBStore(storeName, mode) {
    if (!indexedDBReady || !indexedDB_instance) return null;
    try {
      return indexedDB_instance
        .transaction([storeName], mode)
        .objectStore(storeName);
    } catch (e) {
      console.error(`${TOOL_NAME}: failed to open store "${storeName}"`, e);
      return null;
    }
  }

  const getIndexedDBReadTx = (n) => getIndexedDBStore(n, "readonly");
  const getIndexedDBWriteTx = (n) => getIndexedDBStore(n, "readwrite");

  async function cache_saveIndexedDB(entry) {
    if (!indexedDBReady || !indexedDB_instance) {
      console.warn(
        `${TOOL_NAME}: IndexedDB not ready, cannot save ${entry.key}`,
      );
      return false;
    }
    const {
      key,
      target,
      type,
      valueKind = "plain",
      indexeddb_name: storeName,
    } = entry;
    if (!storeName) return false;

    const tx = getIndexedDBWriteTx(storeName);
    if (!tx) return false;

    let payload;
    if (type === "map" && target instanceof Map) {
      const entries = Array.from(target.entries()).map(([k, v]) =>
        valueKind === "set" ? [k, Array.from(v)] : [k, v],
      );
      payload = { key, timestamp: Date.now(), kind: "map", valueKind, entries };
    } else if (type === "set" && target instanceof Set) {
      payload = {
        key,
        timestamp: Date.now(),
        kind: "set",
        items: Array.from(target),
      };
    } else {
      console.warn(`${TOOL_NAME}: unsupported cache type for ${key}`);
      return false;
    }

    return new Promise((resolve, reject) => {
      const req = tx.put(payload);
      req.onerror = () => {
        console.warn(`${TOOL_NAME}: save failed for ${key}`, req.error);
        reject(req.error);
      };
      req.onsuccess = () => resolve(true);
    });
  }

  async function cache_loadIndexedDB(entry) {
    if (!indexedDBReady || !indexedDB_instance) return false;
    const { key, target, ttl, type, indexeddb_name: storeName } = entry;
    if (!storeName) return false;

    const tx = getIndexedDBReadTx(storeName);
    if (!tx) return false;

    return new Promise((resolve, reject) => {
      const req = tx.get(key);
      req.onerror = () => reject(req.error);
      req.onsuccess = () => {
        const parsed = req.result;
        if (!parsed) {
          resolve(false);
          return;
        }

        if (parsed.timestamp && Date.now() - parsed.timestamp > ttl) {
          cache_resetIndexedDB(entry).catch(() => {});
          resolve(false);
          return;
        }

        target.clear();
        if (parsed.kind === "map" && target instanceof Map) {
          parsed.entries.forEach(([k, v]) =>
            target.set(k, parsed.valueKind === "set" ? new Set(v) : v),
          );
        } else if (parsed.kind === "set" && target instanceof Set) {
          parsed.items.forEach((item) => target.add(item));
        } else {
          console.warn(`${TOOL_NAME}: type mismatch for ${key}`);
          resolve(false);
          return;
        }
        resolve(true);
      };
    });
  }

  async function cache_resetIndexedDB(entry) {
    if (!indexedDBReady || !indexedDB_instance) return false;
    const { key, target, indexeddb_name: storeName } = entry;
    if (!storeName) return false;

    const tx = getIndexedDBWriteTx(storeName);
    if (!tx) return false;

    target.clear();
    return new Promise((resolve, reject) => {
      const req = tx.delete(key);
      req.onerror = () => {
        console.warn(`${TOOL_NAME}: reset failed for ${key}`, req.error);
        reject(req.error);
      };
      req.onsuccess = () => resolve(true);
    });
  }

  async function cache_getStatusIndexedDB(entry) {
    if (!indexedDBReady || !indexedDB_instance) {
      return `${entry.label} IndexedDB cache is unavailable`;
    }
    const { key, target, ttl: ttlMs, label, indexeddb_name: storeName } = entry;
    if (!storeName) return `${label} IndexedDB cache status unavailable`;

    const tx = getIndexedDBReadTx(storeName);
    if (!tx) return `${label} IndexedDB cache status unavailable`;

    return new Promise((resolve) => {
      const req = tx.get(key);
      req.onerror = () =>
        resolve(`${label} IndexedDB cache status unavailable`);
      req.onsuccess = () => {
        const parsed = req.result;
        const count = target.size;
        if (!parsed) {
          resolve(
            count > 0
              ? `${label} cache contains ${count} items (in memory, not yet synced to IndexedDB)`
              : `${label} IndexedDB cache is empty`,
          );
          return;
        }
        const expiry = parsed.timestamp
          ? new Date(parsed.timestamp + ttlMs).toLocaleDateString()
          : "unknown";
        resolve(
          `${label} IndexedDB cache contains ${count} items, will be reset on ${expiry}`,
        );
      };
    });
  }

  // ==== 06 localStorage cache helpers =======================================

  function cache_saveLocalSt(entry) {
    const { key, target, type, valueKind = "plain" } = entry;
    try {
      let payload;
      if (type === "map" && target instanceof Map) {
        const entries = Array.from(target.entries()).map(([k, v]) =>
          valueKind === "set" ? [k, Array.from(v)] : [k, v],
        );
        payload = { timestamp: Date.now(), kind: "map", valueKind, entries };
      } else if (type === "set" && target instanceof Set) {
        payload = {
          timestamp: Date.now(),
          kind: "set",
          items: Array.from(target),
        };
      } else {
        console.warn(`${TOOL_NAME}: unsupported cache type for ${key}`);
        return;
      }
      localStorage.setItem(key, JSON.stringify(payload));
    } catch (e) {
      console.warn(`${TOOL_NAME}: failed to save ${key}`, e);
    }
  }

  function cache_loadLocalSt(entry) {
    const { key, target, ttl, type } = entry;
    try {
      const raw = localStorage.getItem(key);
      if (!raw) return false;
      const parsed = JSON.parse(raw);
      if (!parsed) return false;
      if (parsed.timestamp && Date.now() - parsed.timestamp > ttl) {
        localStorage.removeItem(key);
        return false;
      }
      target.clear();
      if (parsed.kind === "map" && target instanceof Map) {
        parsed.entries.forEach(([k, v]) =>
          target.set(k, parsed.valueKind === "set" ? new Set(v) : v),
        );
      } else if (parsed.kind === "set" && target instanceof Set) {
        parsed.items.forEach((item) => target.add(item));
      } else {
        console.warn(`${TOOL_NAME}: type mismatch for ${key}`);
        return false;
      }
      return true;
    } catch (e) {
      console.warn(`${TOOL_NAME}: failed to load cache ${key}`, e);
      return false;
    }
  }

  function cache_resetLocalSt(entry) {
    try {
      entry.target.clear();
      localStorage.removeItem(entry.key);
    } catch (e) {
      console.warn(`${TOOL_NAME}: failed to reset cache ${entry.key}`, e);
    }
  }

  function cache_getStatusLocalSt(entry) {
    const { key, target, ttl: ttlMs, label } = entry;
    const raw = localStorage.getItem(key);
    if (!raw) return `${label} cache is empty`;
    try {
      const parsed = JSON.parse(raw);
      const expiry = parsed.timestamp
        ? new Date(parsed.timestamp + ttlMs).toLocaleDateString()
        : "unknown";
      return `${label} cache contains ${target.size} items, will be reset on ${expiry}`;
    } catch {
      return `${label} cache status unavailable`;
    }
  }

  /** Unified status query (routes to localStorage or IndexedDB). */
  function cache_getStatus(entry) {
    if (entry._customStatus) return Promise.resolve(entry._customStatus());
    return entry.indexeddb_name
      ? cache_getStatusIndexedDB(entry)
      : Promise.resolve(cache_getStatusLocalSt(entry));
  }

  /** Unified reset (routes to localStorage or IndexedDB). */
  function cache_reset(entry) {
    if (entry._customReset) return Promise.resolve(entry._customReset());
    return entry.indexeddb_name
      ? cache_resetIndexedDB(entry)
      : Promise.resolve(cache_resetLocalSt(entry));
  }

  // ==== 07 Settings =========================================================

  const defaultSettings = {
    autoStartPreview: true,
    enableLargeBuffers: false,
    enableHeavyComputing: false,
    enabledDetectors: {
      wikimedia: true,
      aggregator: true,
      community: true,
      redundant: true,
      inferred: true,
      normalizeLabels: true,
      removeAliasEqualsLabel: true,
      removeLowPrecisionDates: true,
      removeRedundantPreferred: true,
    },
  };

  function cache_loadSettings() {
    try {
      return {
        ...defaultSettings,
        ...JSON.parse(localStorage.getItem(`${TOOL_NAME}_settings`)),
      };
    } catch {
      return { ...defaultSettings };
    }
  }

  function cache_saveSettings(settings) {
    localStorage.setItem(`${TOOL_NAME}_settings`, JSON.stringify(settings));
  }

  // ==== 08 API helpers ======================================================

  /** Fetch Wikipedia edition QID->langcode map from the DB reports page. */
  async function api_fetchWikipediaEditions() {
    try {
      const data = await new mw.Api().get({
        action: "parse",
        page: "Wikidata:Database_reports/Wikipedia_versions",
        prop: "text",
        format: "json",
      });
      const html = data?.parse?.text?.["*"];
      if (!html) return new Map();

      const doc = new DOMParser().parseFromString(html, "text/html");
      const qidToLanguage = new Map();

      doc.querySelectorAll("table.wikitable tr").forEach((tr) => {
        const cells = tr.querySelectorAll("td");
        if (cells.length < 2) return;
        const link = cells[1].querySelector("a[href*='/wiki/Q']");
        if (!link) return;
        const m = link.getAttribute("href").match(/\/wiki\/(Q\d+)/);
        if (!m) return;
        // cells[3] may contain two values separated by <br> (e.g. "da<br>dk").
        // We only want the first (the Wikipedia language code), not the concatenation.
        // Take the text of the first child text node; fall back to splitting on whitespace.
        const langCell = cells.length >= 4 ? cells[3] : null;
        const langCode = langCell
          ? langCell.firstChild?.nodeType === Node.TEXT_NODE
            ? langCell.firstChild.textContent.trim()
            : langCell.textContent.split(/\s+/)[0] || ""
          : "";
        qidToLanguage.set(m[1], langCode);
      });

      console.log(
        `${TOOL_NAME}: fetched ${qidToLanguage.size} Wikipedia edition QIDs`,
      );
      return qidToLanguage;
    } catch (e) {
      console.error(`${TOOL_NAME}: failed to fetch Wikipedia edition QIDs`, e);
      return new Map();
    }
  }

  /** Fetch all Wikidata label/description/alias language codes and names via wbcontentlanguages. */
  async function api_fetchContentLanguages() {
    try {
      const data = await new mw.Api().get({
        action: "query",
        meta: "wbcontentlanguages",
        wbclcontext: "term",
        wbclprop: "name",
        formatversion: 2,
        format: "json",
      });
      const langs = data?.query?.wbcontentlanguages;
      if (!langs) return {};
      console.log(
        `${TOOL_NAME}: fetched ${Object.keys(langs).length} content languages`,
      );
      return langs; // { code: { name } }
    } catch (e) {
      console.error(`${TOOL_NAME}: failed to fetch content languages`, e);
      return {};
    }
  }
  /** Fetch multiple entities by QID, chunking at CHUNK_SIZE. */
  async function api_fetchEntities(qids) {
    const unique = uniq(qids);
    if (!unique.length) return {};
    const results = {};
    for (let i = 0; i < unique.length; i += CHUNK_SIZE) {
      const chunk = unique.slice(i, i + CHUNK_SIZE);
      try {
        const data = await new mw.Api().get({
          action: "wbgetentities",
          format: "json",
          ids: chunk.join("|"),
          props: "claims",
        });
        Object.assign(results, data.entities || {});
      } catch (e) {
        console.error(`${TOOL_NAME}: wbgetentities failed for chunk`, chunk, e);
      }
    }
    return results;
  }

  async function api_fetchAllObsoleteIdProps() {
    const api = new mw.Api();
    const baseParams = {
      action: "query",
      format: "json",
      list: "search",
      formatversion: 2,
      srsearch: "haswbstatement:P31=Q108951239|P31=Q60457486",
      srnamespace: 120,
      srlimit: 50,
      srprop: "",
    };
    const allProps = [];
    let cont = {};
    do {
      try {
        const data = await api.get({ ...baseParams, ...cont });
        if (data.query?.search) {
          allProps.push(
            ...data.query.search.map((r) => r.title.replace("Property:", "")),
          );
        }
        cont = data.continue || {};
      } catch (e) {
        console.error(`${TOOL_NAME}: api_fetchAllObsoleteIdProps failed`, e);
        break;
      }
    } while (cont.sroffset);
    return new Set(allProps);
  }

  async function api_fetchPropertyRegexConstraints() {
    const query = `
      SELECT ?property ?regex WHERE {
        ?property wikibase:propertyType wikibase:ExternalId;
          p:P2302 ?constraintStatement.
        ?constraintStatement ps:P2302 wd:Q21502404;
          pq:P1793 ?regex.
      }
    `;
    try {
      const resp = await fetch(
        "https://query.wikidata.org/sparql?query=" +
          encodeURIComponent(query) +
          "&format=json",
      );
      const { results } = await resp.json();
      return results.bindings.map((r) => {
        const propId = r.property.value.replace(WIKIDATA_ENTITY_BASE_URL, "");
        const raw = r.regex.value;
        const san = sanitizePattern(raw);
        if (raw !== san)
          console.warn(
            `${TOOL_NAME}: sanitized regex for ${propId}: "${raw}" -> "${san}"`,
          );
        return [propId, san];
      });
    } catch (e) {
      console.error(
        `${TOOL_NAME}: failed to fetch property regex constraints`,
        e,
      );
      return [];
    }
  }

  /** Returns true if this property+pattern combination should be skipped. */
  function shouldIgnorePattern(pid, expr) {
    if (pid === "P1184") return true;
    if (pid === PID_GND_ID) {
      return (
        expr.includes("deutsche-biographie") || expr.includes("lagis-hessen")
      );
    }
    if (pid === PID_DEUTSCHE_BIOGRAPHIE_GND_ID) {
      return !expr.includes("deutsche-biographie");
    }
    return false;
  }

  async function api_fetchPropertyUrlMatchPatterns() {
    const query = `
      SELECT DISTINCT ?property ?expr ?repl WHERE {
        ?property wikibase:propertyType wikibase:ExternalId;
          wdt:P8966 ?expr.
        OPTIONAL {
          ?property p:P8966 ?statement.
          ?statement ps:P8966 ?expr;
            pq:P8967 ?repl.
        }
        FILTER((STR(?expr)) != "")
      }
    `;
    try {
      const resp = await fetch(
        "https://query.wikidata.org/sparql?query=" +
          encodeURIComponent(query) +
          "&format=json",
      );
      const { results } = await resp.json();
      const map = new Map();
      for (const r of results.bindings) {
        const propId = r.property.value.replace(WIKIDATA_ENTITY_BASE_URL, "");
        const raw = r.expr.value;
        const san = convertWikidataRegexToJS(raw);
        if (raw !== san)
          console.warn(
            `${TOOL_NAME}: sanitized pattern for ${propId}: "${raw}" -> "${san}"`,
          );
        if (!map.has(propId)) map.set(propId, []);
        map.get(propId).push({ pattern: san, replacement: r.repl?.value });
      }
      return map;
    } catch (e) {
      console.error(
        `${TOOL_NAME}: failed to fetch property URL match patterns`,
        e,
      );
      return new Map();
    }
  }

  function extractQID(entityUrl) {
    return entityUrl.replace(WIKIDATA_ENTITY_BASE_URL, "");
  }

  /**
   * Fetch URL strip rules from [[User:Difool/unrecognized_urls]].
   *
   * The page must contain a wikitable with three columns:
   *   Hostname | Mode | Parameters
   *
   * Hostname conventions (same as the hardcoded maps):
   *   "example.com"   – exact/base-domain match
   *   ".example.com"  – suffix match (covers country-code subdomains)
   *
   * Mode must be "always" or "recognition".
   * Parameters is a comma-separated list of URL query parameter names.
   *
   * Example table row:
   *   | twitter.com || always || fbclid, utm_source
   */
  async function api_fetchUrlStripRules() {
    try {
      const data = await new mw.Api().get({
        action: "parse",
        page: URL_STRIP_PAGE,
        prop: "text",
        format: "json",
      });
      const html = data?.parse?.text?.["*"];
      if (!html) return { always: {}, recognition: {} };

      const doc = new DOMParser().parseFromString(html, "text/html");
      const result = { always: {}, recognition: {} };

      doc.querySelectorAll("table.wikitable tr").forEach((tr) => {
        const cells = tr.querySelectorAll("td");
        if (cells.length < 3) return; // skip header rows

        const hostname = cells[0].textContent.trim();
        const mode = cells[1].textContent.trim().toLowerCase();
        const params = cells[2].textContent
          .trim()
          .split(",")
          .map((p) => p.trim())
          .filter(Boolean);

        if (!hostname || !params.length) return;
        if (mode !== "always" && mode !== "recognition") return;

        if (!result[mode][hostname]) result[mode][hostname] = [];
        result[mode][hostname].push(...params);
      });

      console.log(
        `${TOOL_NAME}: fetched URL strip rules —`,
        Object.keys(result.always).length,
        "always,",
        Object.keys(result.recognition).length,
        "recognition",
      );
      return result;
    } catch (e) {
      console.error(`${TOOL_NAME}: failed to fetch URL strip rules`, e);
      return { always: {}, recognition: {} };
    }
  }

  /**
   * Fetch URL deprecation/removal blocklist from [[User:Difool/URL-deprecation-blocklist]].
   *
   * The page is structured as a series of === level-3 === sections, each containing
   * one wikitable.  The first column ("Pattern") is the URL pattern string, the
   * second column ("Match type") is "prefix" or "regex".
   *
   * The section title determines the action:
   *   "URLs to remove"  ->  action: "remove"
   *   anything else     ->  action: "deprecate", sectionLabel = section title text
   *
   * Returns an array of rule objects:
   *   { pattern, matchType, action, sectionLabel, compiledRegex? }
   */
  async function api_fetchUrlBlocklist() {
    try {
      const data = await new mw.Api().get({
        action: "parse",
        page: URL_BLOCKLIST_PAGE,
        prop: "text",
        format: "json",
      });
      const html = data?.parse?.text?.["*"];
      if (!html) return [];

      const doc = new DOMParser().parseFromString(html, "text/html");
      const rules = [];

      // Walk through h3 headings, paragraphs, and the tables that follow each one.
      // The rendered HTML from the wiki parser places <h3>, <p>, and <table> as
      // siblings inside the content div.
      // Paragraphs between an H3 and its table may contain the deprecation reason,
      // e.g. "Deprecation reason: {{Q|Q139894521}}" rendered as a QID link.
      let currentSection = null;
      let currentAction = "deprecate";
      let currentDeprecationReason = null; // QID string or null

      for (const el of doc.querySelectorAll("h3, p, table.wikitable")) {
        if (el.tagName === "H3") {
          currentSection = el.textContent.replace(/\[edit\]/i, "").trim();
          currentAction = currentSection
            .toLowerCase()
            .includes("urls to remove")
            ? "remove"
            : "deprecate";
          currentDeprecationReason = null; // reset for each new section
          continue;
        }

        // Paragraph between the heading and the table — look for a QID link that
        // represents the P2241 (reason for deprecated rank) value, e.g. the rendered
        // form of {{Q|Q139894521}}.
        if (el.tagName === "P") {
          if (currentSection && !currentDeprecationReason) {
            const link = el.querySelector('a[href*="/wiki/Q"]');
            if (link) {
              const m = link.getAttribute("href").match(/\/wiki\/(Q\d+)/);
              if (m) currentDeprecationReason = m[1];
            }
          }
          continue;
        }

        // It's a table
        if (!currentSection) continue;

        el.querySelectorAll("tr").forEach((tr) => {
          const cells = tr.querySelectorAll("td");
          if (cells.length < 2) return; // header row

          const pattern = cells[0].textContent.trim();
          const matchType = cells[1].textContent.trim().toLowerCase();

          if (!pattern) return;
          if (matchType !== "prefix" && matchType !== "regex") return;

          const rule = {
            pattern,
            matchType,
            action: currentAction,
            sectionLabel: currentSection,
            deprecationReason: currentDeprecationReason || null,
          };

          if (matchType === "regex") {
            try {
              rule.compiledRegex = new RegExp(pattern, "iu");
            } catch (e) {
              console.warn(
                `${TOOL_NAME}: invalid blocklist regex "${pattern}": ${e.message}`,
              );
              return; // skip broken regex rules
            }
          }

          rules.push(rule);
        });
      }

      console.log(`${TOOL_NAME}: fetched ${rules.length} URL blocklist rules`);
      return rules;
    } catch (e) {
      console.error(`${TOOL_NAME}: failed to fetch URL blocklist`, e);
      return [];
    }
  }

  /**
   * Fetch P9073 (applicable "stated in") values with their rank, plus related-entity
   * QIDs (issuer, maintainer, editor, etc.) that users sometimes wrongly use as
   * "stated in" values in references.
   *
   * Two queries are run in parallel:
   *   1. Ranked P9073 values -> builds allowed / preferred
   *   2. Related-entity fields on the property and on each stated-in item
   *      -> any QID not already in allowed goes into notAllowed
   */
  async function api_fetchPropertyStatedInPreferences() {
    const statedInQuery = `
      SELECT ?prop ?stated_in ?rank WHERE {
        ?prop wikibase:propertyType wikibase:ExternalId;
              p:P9073 ?stmt.
        ?stmt ps:P9073 ?stated_in;
              wikibase:rank ?rank.
        FILTER(?rank != wikibase:DeprecatedRank)
      }
    `;
    // P2378 issued by, P126 maintained by, P10726 class of, P1629 subject item,
    // P98 editor (on the stated-in item itself).
    const relatedQuery = `
      SELECT DISTINCT ?prop ?related WHERE {
        ?prop wikibase:propertyType wikibase:ExternalId.
        {
          ?prop wdt:P9073 ?stated_in.
          { ?stated_in wdt:P98 ?related. }
        } UNION {
          { ?prop wdt:P2378 ?related. }
          UNION { ?prop wdt:P126 ?related. }
          UNION { ?prop wdt:P10726 ?related. }
          UNION { ?prop wdt:P1629 ?related. }
        }
      }
    `;
    try {
      const [statedInResp, relatedResp] = await Promise.all([
        fetch(
          "https://query.wikidata.org/sparql?query=" +
            encodeURIComponent(statedInQuery) +
            "&format=json",
        ),
        fetch(
          "https://query.wikidata.org/sparql?query=" +
            encodeURIComponent(relatedQuery) +
            "&format=json",
        ),
      ]);
      const statedInRows = (await statedInResp.json()).results.bindings;
      const relatedRows = (await relatedResp.json()).results.bindings;

      // Pass 1: collect P9073 values grouped by rank
      const byProp = new Map(); // propId -> { preferred: QID[], normal: QID[] }
      for (const row of statedInRows) {
        const propId = extractQID(row.prop.value);
        const statedInId = extractQID(row.stated_in.value);
        const isPreferred = row.rank.value.endsWith("PreferredRank");

        if (!byProp.has(propId))
          byProp.set(propId, { preferred: [], normal: [] });
        const bucket = byProp.get(propId);
        if (isPreferred) bucket.preferred.push(statedInId);
        else bucket.normal.push(statedInId);
      }

      // Pass 2: collect related QIDs per property
      const relatedByProp = new Map(); // propId -> Set<QID>
      for (const row of relatedRows) {
        const propId = extractQID(row.prop.value);
        const relatedId = extractQID(row.related.value);
        if (!relatedByProp.has(propId)) relatedByProp.set(propId, new Set());
        relatedByProp.get(propId).add(relatedId);
      }

      // Build final map: propId -> { preferred: QID, allowed: Set<QID>, notAllowed: Set<QID> }
      //
      // allowed rules (same as before):
      //   - if any preferred-rank P9073 values exist:
      //       preferred = first preferred-rank value
      //       allowed   = all preferred-rank + all normal-rank values
      //   - otherwise:
      //       preferred = first normal-rank value
      //       allowed   = all normal-rank values
      //
      // notAllowed: related QIDs (P2378/P126/P10726/P1629/P98) that are not in allowed.
      // These are QIDs users sometimes wrongly use as "stated in" but which are not
      // valid P9073 values for the property.
      const map = new Map();
      const allPropIds = new Set([...byProp.keys(), ...relatedByProp.keys()]);
      for (const propId of allPropIds) {
        const { preferred: pref = [], normal = [] } = byProp.get(propId) || {};
        const hasPref = pref.length > 0;
        const preferred = hasPref ? pref[0] : (normal[0] ?? null);
        const allowed = new Set(hasPref ? [...pref, ...normal] : normal);

        const notAllowed = new Set();
        for (const qid of relatedByProp.get(propId) || []) {
          if (!allowed.has(qid)) notAllowed.add(qid);
        }

        if (preferred || notAllowed.size) {
          map.set(propId, {
            preferred: preferred ?? null,
            allowed,
            notAllowed,
          });
        }
      }

      return map;
    } catch (e) {
      console.error(
        `${TOOL_NAME}: failed to fetch property stated-in preferences`,
        e,
      );
      return new Map();
    }
  }

  /**
   * Fetch source-category rules from [[User:Difool/reference-source-categories]].
   *
   * Parses three wikitable sections identified by their === heading ===:
   *   "Aggregator sources"  -> category key "aggregator"
   *   "Community sources"   -> category key "community"
   *   "Redundant sources"   -> category key "redundant"
   *
   * Aggregator / community tables:
   *   Column 0: {{P|Pxx}} or bare Pxx  -> PID
   *   Column 1: Notes (ignored)
   *
   * Redundant table:
   *   Column 0: Weak property   {{P|Pxx}}
   *   Column 1: Strong property {{P|Pxx}}
   *   Column 2: Strong stated-in QID  {{Q|Qxx}} or bare Qxx  (optional)
   *   Column 3: Notes (ignored)
   *
   * Returns { aggregator: Set<PID>, community: Set<PID>,
   *           redundant: [{weakPid, strongPid, strongQid}] }
   */
  async function api_fetchSourceCategoryRules() {
    const empty = {
      aggregator: new Set(),
      community: new Set(),
      redundant: [],
    };
    try {
      const data = await new mw.Api().get({
        action: "parse",
        page: SOURCE_CATEGORIES_PAGE,
        prop: "text",
        format: "json",
      });
      const html = data?.parse?.text?.["*"];
      if (!html) return empty;

      const doc = new DOMParser().parseFromString(html, "text/html");
      const result = {
        aggregator: new Set(),
        community: new Set(),
        redundant: [],
      };

      // Extract a PID string from a table cell.
      // Handles both {{P|Pxx}} rendered as a link with title "Property:Pxx"
      // and plain text like "P214".
      function extractPid(cell) {
        const link = cell.querySelector('a[href*="Property:P"]');
        if (link) {
          const m = link.getAttribute("href").match(/Property:(P\d+)/);
          if (m) return m[1];
          const title = link.getAttribute("title");
          if (title?.startsWith("Property:"))
            return title.replace("Property:", "");
        }
        const text = cell.textContent.trim();
        return /^P\d+$/.test(text) ? text : null;
      }

      let currentSection = null;

      for (const el of doc.querySelectorAll("h3, table.wikitable")) {
        if (el.tagName === "H3") {
          const heading = el.textContent
            .replace(/\[edit\]/i, "")
            .trim()
            .toLowerCase();
          if (heading.includes("aggregator")) currentSection = "aggregator";
          else if (heading.includes("community")) currentSection = "community";
          else if (heading.includes("redundant")) currentSection = "redundant";
          else currentSection = null;
          continue;
        }

        // It's a table
        if (!currentSection) continue;

        el.querySelectorAll("tr").forEach((tr) => {
          const cells = tr.querySelectorAll("td");
          if (!cells.length) return; // header row

          if (
            currentSection === "aggregator" ||
            currentSection === "community"
          ) {
            const pid = extractPid(cells[0]);
            if (pid) result[currentSection].add(pid);
          } else if (currentSection === "redundant") {
            if (cells.length < 2) return;
            const weakPid = extractPid(cells[0]);
            const strongPid = extractPid(cells[1]);
            if (!weakPid || !strongPid) return;
            result.redundant.push({ weakPid, strongPid });
          }
        });
      }

      console.log(
        `${TOOL_NAME}: fetched source category rules —`,
        `${result.aggregator.size} aggregator,`,
        `${result.community.size} community,`,
        `${result.redundant.length} redundant`,
      );
      return result;
    } catch (e) {
      console.error(`${TOOL_NAME}: failed to fetch source category rules`, e);
      return empty;
    }
  }

  // ==== 09 Cache registry ===================================================

  const caches = [
    {
      key: OCC_CACHE_KEY,
      label: "Occupations",
      ttl: OCC_CACHE_TTL_MS,
      target: occupationParentsCache,
      type: "map",
      valueKind: "set",
      fetchFn: null,
      assignFn(val) {
        occupationParentsCache.clear();
        for (const [qid, parents] of val)
          occupationParentsCache.set(qid, parents);
      },
    },
    {
      key: WIKIPEDIA_EDITIONS_CACHE_KEY,
      label: "Wikipedia editions",
      ttl: WIKIPEDIA_EDITIONS_TTL_MS,
      target: wikipediaEditionsCache,
      type: "map",
      valueKind: "plain",
      fetchFn: api_fetchWikipediaEditions,
      assignFn(val) {
        wikipediaEditionsCache.clear();
        for (const [qid, code] of val) wikipediaEditionsCache.set(qid, code);
      },
    },

    {
      key: LANG_NAMES_CACHE_KEY,
      label: "Language names",
      ttl: LANG_NAMES_TTL_MS,
      target: null,
      type: "map",
      valueKind: "plain",
      fetchFn: api_fetchContentLanguages,
      assignFn(val) {
        wikipediaLangNamesCache.clear();
        // val is a plain object { code: { name } } (fresh fetch or JSON round-trip)
        for (const [code, entry] of Object.entries(val || {})) {
          if (entry?.name) wikipediaLangNamesCache.set(code, entry.name);
        }
      },
      _customSave() {
        try {
          localStorage.setItem(
            LANG_NAMES_CACHE_KEY,
            JSON.stringify({
              timestamp: Date.now(),
              // Store as flat code->name object; wrap back to { name } on load
              // so assignFn can handle both fresh-fetch and round-trip shapes.
              langs: Object.fromEntries(
                [...wikipediaLangNamesCache.entries()].map(([code, name]) => [
                  code,
                  { name },
                ]),
              ),
            }),
          );
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to save language names`, e);
        }
      },
      _customLoad() {
        try {
          const raw = localStorage.getItem(LANG_NAMES_CACHE_KEY);
          if (!raw) return false;
          const parsed = JSON.parse(raw);
          if (!parsed?.langs) return false;
          if (
            parsed.timestamp &&
            Date.now() - parsed.timestamp > LANG_NAMES_TTL_MS
          ) {
            localStorage.removeItem(LANG_NAMES_CACHE_KEY);
            return false;
          }
          wikipediaLangNamesCache.clear();
          for (const [code, entry] of Object.entries(parsed.langs)) {
            if (entry?.name) wikipediaLangNamesCache.set(code, entry.name);
          }
          return true;
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to load language names`, e);
          return false;
        }
      },
      _customReset() {
        wikipediaLangNamesCache.clear();
        localStorage.removeItem(LANG_NAMES_CACHE_KEY);
      },
      _customStatus() {
        const raw = localStorage.getItem(LANG_NAMES_CACHE_KEY);
        if (!raw) return "Language names cache is empty";
        try {
          const parsed = JSON.parse(raw);
          const expiry = parsed.timestamp
            ? new Date(
                parsed.timestamp + LANG_NAMES_TTL_MS,
              ).toLocaleDateString()
            : "unknown";
          return `Language names cache contains ${wikipediaLangNamesCache.size} entries, will be reset on ${expiry}`;
        } catch {
          return "Language names cache status unavailable";
        }
      },
    },

    {
      key: OBSOLETE_IDS_CACHE_KEY,
      label: "Obsolete ID properties",
      ttl: OBSOLETE_IDS_TTL_MS,
      target: obsoleteIdProps,
      type: "set",
      fetchFn: api_fetchAllObsoleteIdProps,
      assignFn(val) {
        obsoleteIdProps.clear();
        for (const item of val) obsoleteIdProps.add(item);
      },
    },
    {
      key: REGEX_CACHE_KEY,
      label: "Property regex constraints",
      ttl: REGEX_CACHE_TTL_MS,
      target: propertyRegexCache,
      type: "map",
      valueKind: "plain",
      fetchFn: api_fetchPropertyRegexConstraints,
      indexeddb_name: "propertyRegex",
      assignFn(val) {
        propertyRegexCache.clear();
        let skipped = 0;
        for (const [prop, regex] of val) {
          const { valid, error } = validatePropertyRegex(regex);
          if (valid) {
            propertyRegexCache.set(prop, regex);
          } else {
            console.warn(
              `${TOOL_NAME}: invalid regex for ${prop}: "${regex}" — ${error}`,
            );
            skipped++;
          }
        }
        if (skipped)
          console.log(
            `${TOOL_NAME}: skipped ${skipped} invalid property regex constraints`,
          );
      },
    },
    {
      key: URL_PATTERNS_CACHE_KEY,
      label: "Property URL match patterns",
      ttl: URL_PATTERNS_TTL_MS,
      target: propertyUrlPatternsCache,
      type: "map",
      valueKind: "array",
      fetchFn: api_fetchPropertyUrlMatchPatterns,
      indexeddb_name: "propertyUrlPatterns",
      assignFn(val) {
        propertyUrlPatternsCache.clear();
        let skipped = 0;
        for (const [prop, patternObjs] of val) {
          const valid = [];
          for (const pObj of patternObjs) {
            const expr = pObj.pattern || pObj;
            if (shouldIgnorePattern(prop, expr)) {
              console.warn(
                `${TOOL_NAME}: ignored pattern for ${prop}: "${expr}"`,
              );
              skipped++;
              continue;
            }
            const result = validateUrlPattern(expr);
            if (result.valid) {
              valid.push(pObj);
            } else {
              console.warn(
                `${TOOL_NAME}: invalid URL pattern for ${prop}: "${expr}" — ${result.error}`,
              );
              skipped++;
            }
          }
          if (valid.length) propertyUrlPatternsCache.set(prop, valid);
        }
        if (skipped)
          console.log(
            `${TOOL_NAME}: skipped ${skipped} invalid/ignored URL patterns`,
          );
      },
    },
    {
      key: URL_STRIP_CACHE_KEY,
      label: "URL strip rules",
      ttl: URL_STRIP_CACHE_TTL_MS,
      target: null,
      type: "map",
      valueKind: "plain",
      fetchFn: api_fetchUrlStripRules,
      assignFn(val) {
        // After a fresh fetch val is the raw { always, recognition } object.
        // After a localStorage round-trip it arrives as-is from JSON.parse.
        const rules = val instanceof Map ? val.get("rules") : val;
        if (!rules || typeof rules !== "object") return;
        urlStripCache.always = rules.always || {};
        urlStripCache.recognition = rules.recognition || {};
      },
      // Custom persistence: store as a single JSON blob (not a Map/Set).
      _customSave() {
        try {
          localStorage.setItem(
            URL_STRIP_CACHE_KEY,
            JSON.stringify({
              timestamp: Date.now(),
              rules: {
                always: urlStripCache.always,
                recognition: urlStripCache.recognition,
              },
            }),
          );
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to save URL strip rules`, e);
        }
      },
      _customLoad() {
        try {
          const raw = localStorage.getItem(URL_STRIP_CACHE_KEY);
          if (!raw) return false;
          const parsed = JSON.parse(raw);
          if (!parsed?.rules) return false;
          if (
            parsed.timestamp &&
            Date.now() - parsed.timestamp > URL_STRIP_CACHE_TTL_MS
          ) {
            localStorage.removeItem(URL_STRIP_CACHE_KEY);
            return false;
          }
          urlStripCache.always = parsed.rules.always || {};
          urlStripCache.recognition = parsed.rules.recognition || {};
          return true;
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to load URL strip rules`, e);
          return false;
        }
      },
      _customReset() {
        urlStripCache.always = {};
        urlStripCache.recognition = {};
        localStorage.removeItem(URL_STRIP_CACHE_KEY);
      },
      _customStatus() {
        const raw = localStorage.getItem(URL_STRIP_CACHE_KEY);
        if (!raw) return "URL strip rules cache is empty";
        try {
          const parsed = JSON.parse(raw);
          const expiry = parsed.timestamp
            ? new Date(
                parsed.timestamp + URL_STRIP_CACHE_TTL_MS,
              ).toLocaleDateString()
            : "unknown";
          const n =
            Object.keys(urlStripCache.always).length +
            Object.keys(urlStripCache.recognition).length;
          const wildcardNote =
            urlStripCache.always["*"] || urlStripCache.recognition["*"]
              ? " (incl. global wildcard)"
              : "";
          return `URL strip rules cache contains ${n} entries${wildcardNote}, will be reset on ${expiry}`;
        } catch {
          return "URL strip rules cache status unavailable";
        }
      },
    },
    {
      key: STATED_IN_CACHE_KEY,
      label: "Property 'stated in' preferences",
      ttl: STATED_IN_TTL_MS,
      target: propertyStatedInCache,
      type: "map",
      valueKind: "plain",
      fetchFn: api_fetchPropertyStatedInPreferences,
      indexeddb_name: "propertyStatedIn",
      assignFn(val) {
        propertyStatedInCache.clear();
        for (const [prop, data] of val) {
          // After JSON round-trip Sets are serialised as arrays; restore them.
          const allowed =
            data.allowed instanceof Set
              ? data.allowed
              : new Set(data.allowed || []);
          const notAllowed =
            data.notAllowed instanceof Set
              ? data.notAllowed
              : new Set(data.notAllowed || []);
          propertyStatedInCache.set(prop, {
            preferred: data.preferred,
            allowed,
            notAllowed,
          });
        }
      },
    },
    {
      key: URL_BLOCKLIST_CACHE_KEY,
      label: "URL deprecation blocklist",
      ttl: URL_BLOCKLIST_CACHE_TTL_MS,
      target: null,
      type: "map", // placeholder — custom persistence used
      valueKind: "plain",
      fetchFn: api_fetchUrlBlocklist,
      assignFn(val) {
        // val may be a raw array (fresh fetch) or the parsed JSON (localStorage round-trip)
        const rules = Array.isArray(val)
          ? val
          : val instanceof Map
            ? val.get("rules")
            : null;
        if (!Array.isArray(rules)) return;
        // Re-compile regex rules after JSON round-trip (RegExp is not JSON-serialisable)
        urlBlocklistCache.rules = rules.map((r) => {
          if (r.matchType === "regex" && !r.compiledRegex) {
            try {
              r.compiledRegex = new RegExp(r.pattern, "iu");
            } catch {}
          }
          return r;
        });
      },
      _customSave() {
        try {
          // Serialise without compiledRegex (not JSON-safe)
          const serialisable = urlBlocklistCache.rules.map(
            ({ compiledRegex: _cr, ...rest }) => rest,
          );
          localStorage.setItem(
            URL_BLOCKLIST_CACHE_KEY,
            JSON.stringify({ timestamp: Date.now(), rules: serialisable }),
          );
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to save URL blocklist`, e);
        }
      },
      _customLoad() {
        try {
          const raw = localStorage.getItem(URL_BLOCKLIST_CACHE_KEY);
          if (!raw) return false;
          const parsed = JSON.parse(raw);
          if (!Array.isArray(parsed?.rules)) return false;
          if (
            parsed.timestamp &&
            Date.now() - parsed.timestamp > URL_BLOCKLIST_CACHE_TTL_MS
          ) {
            localStorage.removeItem(URL_BLOCKLIST_CACHE_KEY);
            return false;
          }
          urlBlocklistCache.rules = parsed.rules.map((r) => {
            if (r.matchType === "regex") {
              try {
                r.compiledRegex = new RegExp(r.pattern, "iu");
              } catch {}
            }
            return r;
          });
          return true;
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to load URL blocklist`, e);
          return false;
        }
      },
      _customReset() {
        urlBlocklistCache.rules = [];
        localStorage.removeItem(URL_BLOCKLIST_CACHE_KEY);
      },
      _customStatus() {
        const raw = localStorage.getItem(URL_BLOCKLIST_CACHE_KEY);
        if (!raw) return "URL blocklist cache is empty";
        try {
          const parsed = JSON.parse(raw);
          const expiry = parsed.timestamp
            ? new Date(
                parsed.timestamp + URL_BLOCKLIST_CACHE_TTL_MS,
              ).toLocaleDateString()
            : "unknown";
          return `URL blocklist cache contains ${urlBlocklistCache.rules.length} rules, will be reset on ${expiry}`;
        } catch {
          return "URL blocklist cache status unavailable";
        }
      },
    },

    {
      key: SOURCE_CATEGORIES_CACHE_KEY,
      label: "Source category rules",
      ttl: SOURCE_CATEGORIES_TTL_MS,
      target: null,
      type: "map",
      valueKind: "plain",
      fetchFn: api_fetchSourceCategoryRules,
      assignFn(val) {
        sourceCategoryCache.aggregator.clear();
        sourceCategoryCache.community.clear();
        sourceCategoryCache.redundant = [];

        // val arrives as a plain object after JSON round-trip
        const src = val instanceof Map ? Object.fromEntries(val) : val;
        for (const pid of src?.aggregator || [])
          sourceCategoryCache.aggregator.add(pid);
        for (const pid of src?.community || [])
          sourceCategoryCache.community.add(pid);
        sourceCategoryCache.redundant = Array.isArray(src?.redundant)
          ? src.redundant.slice()
          : [];
      },
      _customSave() {
        try {
          localStorage.setItem(
            SOURCE_CATEGORIES_CACHE_KEY,
            JSON.stringify({
              timestamp: Date.now(),
              aggregator: Array.from(sourceCategoryCache.aggregator),
              community: Array.from(sourceCategoryCache.community),
              redundant: sourceCategoryCache.redundant,
            }),
          );
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to save source category rules`, e);
        }
      },
      _customLoad() {
        try {
          const raw = localStorage.getItem(SOURCE_CATEGORIES_CACHE_KEY);
          if (!raw) return false;
          const parsed = JSON.parse(raw);
          if (!parsed?.aggregator) return false;
          if (
            parsed.timestamp &&
            Date.now() - parsed.timestamp > SOURCE_CATEGORIES_TTL_MS
          ) {
            localStorage.removeItem(SOURCE_CATEGORIES_CACHE_KEY);
            return false;
          }
          sourceCategoryCache.aggregator = new Set(parsed.aggregator || []);
          sourceCategoryCache.community = new Set(parsed.community || []);
          sourceCategoryCache.redundant = parsed.redundant || [];
          return true;
        } catch (e) {
          console.warn(`${TOOL_NAME}: failed to load source category rules`, e);
          return false;
        }
      },
      _customReset() {
        sourceCategoryCache.aggregator.clear();
        sourceCategoryCache.community.clear();
        sourceCategoryCache.redundant = [];
        localStorage.removeItem(SOURCE_CATEGORIES_CACHE_KEY);
      },
      _customStatus() {
        const raw = localStorage.getItem(SOURCE_CATEGORIES_CACHE_KEY);
        if (!raw) return "Source category rules cache is empty";
        try {
          const parsed = JSON.parse(raw);
          const expiry = parsed.timestamp
            ? new Date(
                parsed.timestamp + SOURCE_CATEGORIES_TTL_MS,
              ).toLocaleDateString()
            : "unknown";
          return (
            `Source category rules cache: ` +
            `${sourceCategoryCache.aggregator.size} aggregator, ` +
            `${sourceCategoryCache.community.size} community, ` +
            `${sourceCategoryCache.redundant.length} redundant — ` +
            `will be reset on ${expiry}`
          );
        } catch {
          return "Source category rules cache status unavailable";
        }
      },
    },
  ];

  // ==== 10 Fetch-failure backoff =============================================

  function loadFetchFailureTimes() {
    try {
      const stored = localStorage.getItem(FETCH_FAILURE_TIMES_KEY);
      if (!stored) return {};
      const parsed = JSON.parse(stored);
      const now = Date.now();
      // prune expired entries
      return Object.fromEntries(
        Object.entries(parsed).filter(([, t]) => now - t < FETCH_BACKOFF_MS),
      );
    } catch {
      return {};
    }
  }

  function saveFetchFailureTimes(times) {
    try {
      localStorage.setItem(FETCH_FAILURE_TIMES_KEY, JSON.stringify(times));
    } catch (e) {
      console.warn(`${TOOL_NAME}: failed to save fetch failure times`, e);
    }
  }

  const fetchFailureTimes = loadFetchFailureTimes();

  async function refreshCacheWithNotify(entry, settings) {
    const { key, label, type, fetchFn, assignFn } = entry;
    const tag = key;

    if (entry.indexeddb_name && !settings?.enableLargeBuffers) {
      console.warn(`${TOOL_NAME}: skipping ${label} — requires large buffers`);
      return;
    }

    const lastFail = fetchFailureTimes[key];
    if (lastFail) {
      const elapsed = Date.now() - lastFail;
      const remaining = FETCH_BACKOFF_MS - elapsed;
      if (remaining > 0) {
        const retryIn = Math.ceil(remaining / 60000);
        console.warn(
          `${TOOL_NAME}: skipping ${label} — retrying in ~${retryIn} min`,
        );
        mw.notify(
          `Skipping ${label}: will retry later (last attempt failed recently)`,
          {
            type: "warn",
            autoHide: true,
            tag,
          },
        );
        return;
      }
    }

    mw.notify(`Loading ${label}...`, { type: "info", autoHide: false, tag });

    try {
      const data = await fetchFn();
      // Custom entries (e.g. urlStripCache) return a plain object rather than
      // an iterable, so skip the Map/Set wrapper and pass the data directly.
      const container = entry._customSave
        ? data
        : type === "set"
          ? new Set(data)
          : new Map(data);
      assignFn(container);

      if (entry._customSave) {
        entry._customSave();
      } else if (entry.indexeddb_name) {
        await cache_saveIndexedDB(entry);
      } else {
        cache_saveLocalSt(entry);
      }

      delete fetchFailureTimes[key];
      saveFetchFailureTimes(fetchFailureTimes);

      const count = entry._customSave
        ? Object.keys(urlStripCache.always).length +
          Object.keys(urlStripCache.recognition).length
        : container.size;
      mw.notify(
        count > 0
          ? `Reloaded ${label} (${count} entries)`
          : `Failed to reload ${label}: empty result`,
        { type: count > 0 ? "success" : "error", autoHide: true, tag },
      );
    } catch (err) {
      console.error(`${TOOL_NAME}: failed to reload ${label}`, err);
      fetchFailureTimes[key] = Date.now();
      saveFetchFailureTimes(fetchFailureTimes);
      mw.notify(`Failed to reload ${label}: ${err.message}`, {
        type: "error",
        autoHide: true,
        tag,
      });
    }
  }

  // ==== 11 Cache init =======================================================

  async function initCaches(settings) {
    // Wipe IndexedDB-backed caches when large buffers are disabled
    if (!settings.enableLargeBuffers) {
      for (const entry of caches) {
        if (entry.indexeddb_name) {
          await cache_resetIndexedDB(entry).catch(() => {});
        }
      }
    }

    try {
      await initIndexedDB();
    } catch {
      console.warn(
        `${TOOL_NAME}: IndexedDB unavailable, falling back to localStorage`,
      );
      indexedDBReady = false;
    }

    for (const entry of caches) {
      // Skip IndexedDB-only entries when large buffers are off
      if (entry.indexeddb_name && !settings.enableLargeBuffers) continue;

      let loaded = false;
      try {
        if (entry._customLoad) {
          loaded = entry._customLoad();
        } else {
          loaded = entry.indexeddb_name
            ? await cache_loadIndexedDB(entry)
            : cache_loadLocalSt(entry);
        }
      } catch (e) {
        console.warn(`${TOOL_NAME}: failed to load ${entry.key}`, e);
      }

      if (!loaded && entry.fetchFn) {
        await refreshCacheWithNotify(entry, settings);
      }
    }
  }

  // ==== 12 Claim helpers ====================================================

  function determineSourceCategory(entity, ref, allRefs, claim = null) {
    const snaks = ref.snaks || {};
    const pids = Object.keys(snaks);

    // References that only carry metadata are not real sources
    if (
      pids.length > 0 &&
      pids.every((pid) =>
        [PID_RETRIEVED, PID_TITLE, PID_SUBJECT_NAMED_AS].includes(pid),
      )
    ) {
      return "ignore";
    }

    const hasRetrieved = pids.includes(PID_RETRIEVED);

    // Validate external IDs in non-retrieved references
    if (!hasRetrieved) {
      for (const pid of pids) {
        for (const claim of snaks[pid] || []) {
          if (claim.datatype !== "external-id") continue;
          const validation = validateExternalIdValue(
            pid,
            claim.datavalue?.value,
          );
          if (!validation.valid) {
            return validation.reason === "regex_error"
              ? "external-id:error"
              : "invalid";
          }
        }
      }
    }

    const statedInQIDs = (snaks[PID_STATED_IN] || [])
      .map((c) => c.datavalue?.value?.["numeric-id"])
      .filter((n) => typeof n === "number")
      .map((n) => "Q" + n);

    // Tautological "stated in": the reference consists solely of P248 whose
    // value is an allowed stated-in for the claim's own external-id property,
    // plus at most P1476 (title) / P1810 (subject named as) metadata — and
    // crucially no P813 (retrieved).  Such a reference adds no information
    // beyond what the external-id property itself already implies.
    // Example: ISNI claim (P213) with a reference that only has
    //   stated in (P248) = International Standard Name Identifier (Q423048).
    if (
      !hasRetrieved &&
      claim?.mainsnak?.datatype === "external-id" &&
      pids.includes(PID_STATED_IN) &&
      pids.every(
        (p) =>
          p === PID_STATED_IN || p === PID_TITLE || p === PID_SUBJECT_NAMED_AS,
      )
    ) {
      const claimPid = claim.mainsnak.property;
      const prefs = propertyStatedInCache.get(claimPid);
      if (
        prefs?.allowed?.size &&
        statedInQIDs.some((q) => prefs.allowed.has(q))
      ) {
        return "self_stated_in";
      }
    }

    const wikimediaAllowed = new Set([
      PID_INFERRED,
      PID_WIKIMEDIA_IMPORT_URL,
      PID_IMPORTED_FROM,
      PID_RETRIEVED,
    ]);

    // Wikimedia imports
    if (
      pids.includes(PID_IMPORTED_FROM) ||
      pids.includes(PID_WIKIMEDIA_IMPORT_URL)
    ) {
      if (
        DEV_WIKIMEDIA_NO_SITELINKS &&
        analyzeWikimediaReference(entity, ref).hasMissingSitelink
      )
        return "wikimedia_no_sitelinks";
      if (
        pids.includes(PID_INFERRED) &&
        pids.every((p) => wikimediaAllowed.has(p))
      )
        return "wikimedia";
      if (
        pids.includes(PID_DETERMINATION_METHOD) ||
        pids.includes(PID_INFERRED)
      )
        return "wikimedia+";
      return "wikimedia";
    }

    if (pids.includes(PID_MATCHED_BY_IDENTIFIER_FROM)) return "inferred+";
    if (
      (pids.includes(PID_INFERRED) || pids.includes(PID_BASED_ON_HEURISTIC)) &&
      pids.length === 1
    ) {
      return "inferred";
    }

    // Aggregator check: direct snak PID match, or P248 stated-in QID match
    // (QIDs derived at runtime from propertyStatedInCache via the aggregator PID).
    if (sourceCategoryCache.aggregator.size) {
      for (const aggPid of sourceCategoryCache.aggregator) {
        if (pids.includes(aggPid)) return "aggregator";
        const aggQids = propertyStatedInCache.get(aggPid)?.allowed;
        if (aggQids && statedInQIDs.some((q) => aggQids.has(q)))
          return "aggregator";
      }
    }

    // Community check: same pattern.
    if (sourceCategoryCache.community.size) {
      for (const comPid of sourceCategoryCache.community) {
        if (pids.includes(comPid)) return "community";
        const comQids = propertyStatedInCache.get(comPid)?.allowed;
        if (comQids && statedInQIDs.some((q) => comQids.has(q)))
          return "community";
      }
    }

    // Redundant check: weak PID/QID present in this ref, strong PID/QID present
    // in at least one *other* ref on the same statement.
    for (const { weakPid, strongPid } of sourceCategoryCache.redundant) {
      const weakQids = propertyStatedInCache.get(weakPid)?.allowed;
      const isWeak =
        pids.includes(weakPid) ||
        (weakQids && statedInQIDs.some((q) => weakQids.has(q)));
      if (!isWeak) continue;

      const strongPresent = allRefs.some((r) => {
        if (r === ref) return false;
        const rPids = Object.keys(r.snaks || {});
        if (rPids.includes(strongPid)) return true;
        const strongQids = propertyStatedInCache.get(strongPid)?.allowed;
        if (strongQids) {
          const rStatedIn = (r.snaks[PID_STATED_IN] || []).map(
            (c) => "Q" + c.datavalue?.value?.["numeric-id"],
          );
          if (rStatedIn.some((q) => strongQids.has(q))) return true;
        }
        return false;
      });

      if (strongPresent) return "redundant";
    }

    // Obsolete external-id check (skip if there is a P813)
    if (!hasRetrieved) {
      const obsoletePids = pids.filter(
        (pid) =>
          obsoleteIdProps.has(pid) &&
          (snaks[pid] || []).some(
            (c) => c.datavalue?.value && c.datatype === "external-id",
          ),
      );
      if (obsoletePids.length) {
        const hasOtherExtId = pids.some(
          (pid) =>
            !obsoletePids.includes(pid) &&
            (snaks[pid] || []).some((c) => c.datatype === "external-id"),
        );
        if (!hasOtherExtId) return "obsolete";
      }
    }

    return null;
  }

  /** Returns true when the statement's mainsnak datatype is not external-id,
   * i.e. it carries a 'real' value that warrants stricter sourcing checks.
   */
  function isStrictStatement(claim) {
    return claim.mainsnak?.datatype !== "external-id";
  }

  /**
   * Returns a numeric quality level for a single reference:
   *   2 – genuine source (used to decide whether a claim is well-sourced)
   *   1 – weak: aggregator, community, inferred, invalid, obsolete, or redundant
   *   0 – wikimedia import / metadata-only / ignored
   */
  function getReferenceLevel(entity, ref, allRefs, claim = null) {
    const cat = determineSourceCategory(entity, ref, allRefs, claim);
    switch (cat) {
      case "wikimedia_no_sitelinks":
      case "wikimedia":
      case "ignore":
      case "self_stated_in":
        return 0;
      case "aggregator":
      case "community":
      case "inferred":
      case "invalid":
      case "obsolete":
      case "redundant":
        return 1;
      default:
        return 2;
    }
  }

  function isWeakReference(entity, ref, allRefs, claim = null) {
    return getReferenceLevel(entity, ref, allRefs, claim) < 2;
  }

  function hasValidReference(entity, claim) {
    const refs = claim.references || [];
    return (
      refs.length > 0 &&
      refs.some((ref) => getReferenceLevel(entity, ref, refs, claim) === 2)
    );
  }

  function extractP279Parents(entity) {
    const parents = new Set();
    for (const claim of entity?.claims?.[PID_SUBCLASS_OF] || []) {
      const id = claim?.mainsnak?.datavalue?.value?.id;
      if (isQid(id)) parents.add(id);
    }
    return parents;
  }

  /** Return all PIDs that have at least one time-datatype claim. */
  function getDateProperties(entity) {
    const result = [];
    for (const pid in entity.claims) {
      if (entity.claims[pid].some((c) => c?.mainsnak?.datatype === "time")) {
        result.push(pid);
      }
    }
    return result;
  }

  /** Normalize a WbTime value object to the given precision level. */
  function normalizeDateValue(val, precision) {
    if (!val) return null;
    const match = val.time.match(/^([+-]\d+)-/);
    const year = match ? parseInt(match[1], 10) : null;
    if (year === null) return null;

    const fmtYear = (y) => (y >= 0 ? `+${y}` : `${y}`);
    let ny = year;

    if (precision <= 5) {
      const pow = Math.pow(10, 9 - precision);
      ny = Math.round(year / pow) * pow;
    } else if (precision === 6) {
      const yf = year / 1000;
      ny = (yf < 0 ? Math.floor(yf) : Math.ceil(yf)) * 1000;
    } else if (precision === 7) {
      const yf = year / 100;
      ny = (yf < 0 ? Math.floor(yf) : Math.ceil(yf)) * 100;
    } else if (precision === 8) {
      ny = Math.trunc(year / 10) * 10;
    }
    // precision 9 (year): ny = year unchanged

    // Downgrade precision when month/day are "00"
    const parts = val.time.split("-");
    if (
      precision === 11 &&
      (parts[1] === "00" || parts[2].slice(0, 2) === "00")
    )
      precision = 9;
    if (precision === 10 && parts[1] === "00") precision = 9;

    if (precision === 11) {
      return {
        time: `${fmtYear(ny)}-${parts[1]}-${parts[2].slice(0, 2)}`,
        precision,
        calendarmodel: val.calendarmodel,
      };
    }
    if (precision === 10) {
      return {
        time: `${fmtYear(ny)}-${parts[1]}`,
        precision,
        calendarmodel: val.calendarmodel,
      };
    }
    return {
      time: `${fmtYear(ny)}-01-01T00:00:00Z`,
      precision,
      calendarmodel: val.calendarmodel,
    };
  }

  /**
   * Compare two date claims at the lowest common precision.
   * E.g. "1955" == "2 Mar 1955", "1590s" == "1591".
   */
  function has_same_normalized_date(
    c1,
    c2,
    at_lowest_precision,
    ignore_calendarmodel,
  ) {
    const v1 = c1.mainsnak?.datavalue?.value;
    const v2 = c2.mainsnak?.datavalue?.value;
    if (!v1 && !v2) return true;
    if (!v1 || !v2) return false;

    let n1 = normalizeDateValue(v1, v1.precision);
    let n2 = normalizeDateValue(v2, v2.precision);

    if (at_lowest_precision) {
      const lowPrec = Math.min(v1.precision, v2.precision);
      if (
        lowPrec <= 9 ||
        v1.precision !== lowPrec ||
        v2.precision !== lowPrec
      ) {
        n1 = {
          ...normalizeDateValue(v1, lowPrec),
          calendarmodel: normalizeDateValue(v2, lowPrec).calendarmodel,
        };
        n2 = normalizeDateValue(v2, lowPrec);
      } else {
        n1 = normalizeDateValue(v1, lowPrec);
        n2 = normalizeDateValue(v2, lowPrec);
      }
    }

    return (
      n1.time === n2.time &&
      n1.precision === n2.precision &&
      (ignore_calendarmodel || n1.calendarmodel === n2.calendarmodel)
    );
  }

  const TOLERANT_QUALIFIER = { pid: "P31", value: "Q26961029" };

  function qualifiersEqualExceptP31(claimA, claimB) {
    const qa = { ...claimA.qualifiers };
    const qb = { ...claimB.qualifiers };
    delete qa[TOLERANT_QUALIFIER.pid];
    delete qb[TOLERANT_QUALIFIER.pid];
    return JSON.stringify(qa) === JSON.stringify(qb);
  }

  function normalizeDataValueKey(datavalue) {
    return JSON.stringify(datavalue || {});
  }

  function hasNoQualifiers(claim) {
    const q = claim.qualifiers || {};
    const pids = Object.keys(q);
    return pids.length === 0 || pids.every((pid) => !q[pid]?.length);
  }

  function rankOrder(rank) {
    return rank === "preferred"
      ? 2
      : rank === "normal"
        ? 1
        : rank === "deprecated"
          ? 0
          : -1;
  }

  /**
   * Pick the claim to keep when merging duplicates.
   * @param {Object}  [opts]
   * @param {boolean} [opts.preferDeprecated=false] – when true and no preferred
   *   claim is present in the group, choose a deprecated claim as target rather
   *   than a normal one.  Used when a normal duplicate is being folded into an
   *   intentionally-deprecated claim that carries P2241.
   */
  function chooseMergeTarget(claims, { preferDeprecated = false } = {}) {
    let pool;
    if (preferDeprecated && !claims.some((c) => c.rank === "preferred")) {
      // All claims are normal/deprecated: prefer the deprecated claim as target.
      pool = claims.some((c) => c.rank === "deprecated")
        ? claims.filter((c) => c.rank === "deprecated")
        : claims;
    } else {
      pool = claims.some((c) => c.rank !== "deprecated")
        ? claims.filter((c) => c.rank !== "deprecated")
        : claims;
    }

    return pool.reduce((a, b) => {
      if (rankOrder(a.rank) !== rankOrder(b.rank))
        return rankOrder(b.rank) > rankOrder(a.rank) ? b : a;
      const aRefs = (a.references || []).length;
      const bRefs = (b.references || []).length;
      if (aRefs !== bRefs) return bRefs > aRefs ? b : a;
      const aQ = Object.values(a.qualifiers || {}).reduce(
        (s, arr) => s + arr.length,
        0,
      );
      const bQ = Object.values(b.qualifiers || {}).reduce(
        (s, arr) => s + arr.length,
        0,
      );
      if (aQ !== bQ) return bQ > aQ ? b : a;
      return (b.id || "") < (a.id || "") ? b : a;
    });
  }

  /** Parse P813 (retrieved) as a timestamp (ms), or null. */
  function parseRetrievedTimestamp(ref) {
    try {
      const snak = (ref.snaks?.[PID_RETRIEVED] || [])[0];
      if (!snak?.datavalue?.value?.time) return null;
      const d = parseWikibaseTime(snak.datavalue.value.time);
      return d ? d.getTime() : null;
    } catch {
      return null;
    }
  }

  function isValidWikipediaEdition(langcode) {
    for (const [, code] of wikipediaEditionsCache) {
      if (code === langcode) return true;
    }
    return false;
  }

  /**
   * Inspects the P143 (imported from) and P4656 (Wikimedia import URL) snaks
   * of a reference and returns:
   *   { language: string|null, hasMissingSitelink: boolean }
   * hasMissingSitelink is true when the Wikipedia edition implied by the
   * reference is not among the item's sitelinks.
   */
  function analyzeWikimediaReference(entity, ref) {
    if (!ref?.snaks) return { language: null, hasMissingSitelink: false };
    const sitelinks = entity?.sitelinks || {};
    let language = null;
    let hasMissingSitelink = false;

    // P143 (imported from Wikimedia project)
    for (const snak of ref.snaks[PID_IMPORTED_FROM] || []) {
      const qid = snak.datavalue?.value?.id;
      if (qid && wikipediaEditionsCache.has(qid)) {
        const lang = wikipediaEditionsCache.get(qid);
        if (!language) language = lang;
        if (!sitelinks[lang + "wiki"]) hasMissingSitelink = true;
      }
    }

    // P4656 (Wikimedia import URL)
    for (const snak of ref.snaks[PID_WIKIMEDIA_IMPORT_URL] || []) {
      const url = snak.datavalue?.value;
      if (!url) continue;
      try {
        const { hostname } = new URL(url);
        const m = hostname
          .toLowerCase()
          .match(/^(?:www\.)?([a-z]+)\.([a-z]+(?:\.[a-z]+)?)$/);
        if (!m) continue;
        const [, lang, domain] = m;
        if (lang === "wikidata") continue;
        if (!language) language = lang;
        let project = domain.split(".")[0];
        if (project === "wikipedia" || project === "wikimedia")
          project = "wiki";
        if (!sitelinks[lang + project]) {
          if (isValidWikipediaEdition(lang)) hasMissingSitelink = true;
          else
            console.warn(
              `${TOOL_NAME}: "${lang}" from "${url}" not in Wikipedia editions cache`,
            );
        }
      } catch {
        continue;
      }
    }

    return { language, hasMissingSitelink };
  }

  function findClaimById(entity, claimId) {
    if (!entity?.claims || !claimId) return null;
    for (const pid in entity.claims) {
      const found = entity.claims[pid].find((c) => c.id === claimId);
      if (found) return found;
    }
    return null;
  }

  /**
   * Returns true when a claim is eligible for value-based deduplication in
   * detectDuplicateValues.  A claim qualifies when it has no qualifiers at all,
   * or only the rank-reason qualifier (P7452 / P2241) and/or P1810, so that
   * the value alone is sufficient to identify duplicates.
   */
  function canBeGrouped(claim) {
    if (hasNoQualifiers(claim)) return true;
    const qPids = Object.keys(claim.qualifiers || {}).filter(
      (p) => (claim.qualifiers[p] || []).length > 0,
    );
    // External-id with only P1810 (subject named as).
    if (
      claim.mainsnak?.datatype === "external-id" &&
      qPids.length === 1 &&
      qPids[0] === PID_SUBJECT_NAMED_AS
    )
      return true;
    // Preferred rank with only P7452 (reason for preferred rank), optionally
    // also P1810 (subject named as).
    if (
      claim.rank === "preferred" &&
      qPids.includes(PID_REASON_FOR_PREFERRED_RANK) &&
      qPids.every(
        (p) =>
          p === PID_REASON_FOR_PREFERRED_RANK || p === PID_SUBJECT_NAMED_AS,
      )
    )
      return true;
    // Deprecated rank with only P2241 (reason for deprecated rank), optionally
    // also P1810 (subject named as).
    if (
      claim.rank === "deprecated" &&
      qPids.includes(PID_REASON_FOR_DEPRECATED_RANK) &&
      qPids.every(
        (p) =>
          p === PID_REASON_FOR_DEPRECATED_RANK || p === PID_SUBJECT_NAMED_AS,
      )
    )
      return true;
    return false;
  }

  function getReferenceUrl(snaks) {
    const urlSnaks = snaks[PID_REFERENCE_URL] || [];
    if (urlSnaks.length !== 1) return null;
    const v = urlSnaks[0].datavalue?.value;
    return typeof v === "string" ? v : null;
  }

  function hasExternalId(snaks) {
    return Object.keys(snaks).some((pid) =>
      (snaks[pid] || []).some((s) => s.datatype === "external-id"),
    );
  }

  /** Wraps a regex pattern with ^ / $ anchors and compiles it with Unicode mode. */
  function compileAnchoredRegex(pattern) {
    let p = pattern;
    if (!p.startsWith("^")) p = "^" + p;
    if (!p.endsWith("$")) p = p + "$";
    return new RegExp(p, "u");
  }

  // ==== 13 Occupation helpers ===============================================

  async function buildOccupationParents(occIds) {
    const ids = uniq(occIds);
    if (!ids.length) return occupationParentsCache;

    let toFetchSet = new Set(
      ids.filter((id) => !occupationParentsCache.has(id)),
    );
    if (!toFetchSet.size) return occupationParentsCache;

    for (
      let depth = 0;
      depth < MAX_TRAVERSAL_DEPTH && toFetchSet.size;
      depth++
    ) {
      const toFetch = Array.from(toFetchSet);
      toFetchSet = new Set();
      try {
        const entities = await api_fetchEntities(toFetch);
        for (const id of toFetch) {
          const parents = extractP279Parents(entities[id]);
          occupationParentsCache.set(id, parents);
          for (const p of parents) {
            if (!occupationParentsCache.has(p)) toFetchSet.add(p);
          }
        }
        const occEntry = caches.find((c) => c.key === OCC_CACHE_KEY);
        if (occEntry) cache_saveLocalSt(occEntry);
      } catch (e) {
        console.error(
          `${TOOL_NAME}: buildOccupationParents failed`,
          toFetch,
          e,
        );
        for (const id of toFetch) {
          if (!occupationParentsCache.has(id))
            occupationParentsCache.set(id, new Set());
        }
        const occEntry = caches.find((c) => c.key === OCC_CACHE_KEY);
        if (occEntry) cache_saveLocalSt(occEntry);
      }
    }
    return occupationParentsCache;
  }

  function isSubclassOfLocal(strongId, weakId, maxDepth = MAX_TRAVERSAL_DEPTH) {
    if (!isQid(strongId) || !isQid(weakId)) return false;
    if (strongId === weakId) return true;
    const visited = new Set([strongId]);
    let frontier = [strongId];
    for (let depth = 0; depth < maxDepth && frontier.length; depth++) {
      const next = [];
      for (const node of frontier) {
        for (const p of occupationParentsCache.get(node) || new Set()) {
          if (p === weakId) return true;
          if (!visited.has(p)) {
            visited.add(p);
            next.push(p);
          }
        }
      }
      frontier = next;
    }
    return false;
  }

  // ==== 14 External-ID validation ===========================================

  function validateExternalIdValue(pid, value) {
    if (typeof value !== "string")
      return { valid: false, reason: "not_a_string" };
    if (value.trim() !== value)
      return { valid: false, reason: "leading_or_trailing_whitespace" };
    if (/\t|\v|\f/.test(value))
      return { valid: false, reason: "contains_tab_or_vertical_whitespace" };

    const pattern = propertyRegexCache.get(pid);
    if (pattern) {
      try {
        if (!compileAnchoredRegex(pattern).test(value))
          return { valid: false, reason: "regex_mismatch" };
      } catch (err) {
        console.error(`${TOOL_NAME}: bad regex for ${pid}:`, pattern, err);
        return { valid: false, reason: "regex_error", error: err.message };
      }
    } else {
      if (value.endsWith("/") || value.endsWith("\\"))
        return { valid: false, reason: "ends_with_slash" };
    }

    return { valid: true };
  }

  // ==== 15 URL->property matching ============================================

  /** Try to extract the external-ID value from a regex match. */
  function extractId(searchUrl, regex, matchResult, replacement, propertyId) {
    if (replacement) {
      return searchUrl.replace(regex, replacement.replace(/\\(\d+)/g, "$$$1"));
    }
    const groups = matchResult.slice(1);
    if (!groups.length) {
      console.warn(
        `${TOOL_NAME}: match with ${propertyId} but zero groups; url=${searchUrl}`,
      );
      return null;
    }
    if (groups.length === 1) return groups[0];

    // Multiple groups: accept if all non-null ones are substrings of groups[0]
    const first = matchResult[1];
    for (let i = 2; i < matchResult.length; i++) {
      const g = matchResult[i];
      if (g != null && !first.includes(g)) {
        console.warn(
          `${TOOL_NAME}: multiple independent groups for ${propertyId}; url=${searchUrl}`,
        );
        return null;
      }
    }
    console.info(
      `${TOOL_NAME}: accepting ${propertyId} with nested groups; url=${searchUrl}`,
    );
    return first;
  }

  /**
   * Match a URL against all cached property URL-match patterns.
   * Returns { matched, suggestedProperty, matchedPatternObj, extractedId }.
   */
  function matchUrlAgainstPatterns(searchUrl) {
    for (const [propertyId, patternObjs] of propertyUrlPatternsCache) {
      if (!Array.isArray(patternObjs)) continue;
      for (const patternObj of patternObjs) {
        try {
          const regex = compileAnchoredRegex(patternObj.pattern);
          const matchResult = regex.exec(searchUrl);
          if (!matchResult) continue;

          const extractedId = extractId(
            searchUrl,
            regex,
            matchResult,
            patternObj.replacement,
            propertyId,
          );
          if (!extractedId) continue;

          const validation = validateExternalIdValue(propertyId, extractedId);
          if (validation.valid) {
            return {
              matched: true,
              suggestedProperty: propertyId,
              matchedPatternObj: patternObj,
              extractedId,
            };
          }
          console.warn(
            `${TOOL_NAME}: extracted ID "${extractedId}" for ${propertyId} failed validation: ${validation.reason}`,
          );
        } catch (e) {
          console.warn(
            `${TOOL_NAME}: invalid pattern for ${propertyId}:`,
            patternObj,
            e,
          );
        }
      }
    }
    return {
      matched: false,
      suggestedProperty: null,
      matchedPatternObj: null,
      extractedId: null,
    };
  }

  /**
   * Normalise a raw URL and try to match it against all cached property URL patterns.
   * First tries the URL as-is; if that fails, tries a recognition-mode cleaned version
   * (strips functional/UI params). Returns { matched, suggestedProperty, matchedPatternObj,
   * extractedId, keepUrl } — keepUrl is true when params were stripped that should be
   * kept in the reference URL so the original link still works.
   */
  function matchUrlAgainstPatternsWithCleanup(rawUrl) {
    const searchUrl = normalizeUrl(rawUrl);
    let result = matchUrlAgainstPatterns(searchUrl);
    let keepUrl = false;
    if (!result.matched) {
      const { url: cleaned, keepUrl: kUrl } = cleanUrl(searchUrl, {
        recognitionMode: true,
      });
      const normalizedCleaned = normalizeUrl(cleaned);
      if (normalizedCleaned !== searchUrl) {
        result = matchUrlAgainstPatterns(normalizedCleaned);
        keepUrl = kUrl;
      }
    }
    return { ...result, keepUrl };
  }

  // ==== 16 Diff builders ====================================================

  function makeRemoveClaimDiff(pid, removeClaim, becauseOfClaim) {
    const diff = {
      action: ACTION_REMOVE_CLAIM,
      pid,
      claimId: removeClaim.id,
      value: removeClaim.mainsnak?.datavalue,
    };
    if (becauseOfClaim) {
      diff.keepClaimId = becauseOfClaim.id;
      diff.keepValue = becauseOfClaim.mainsnak?.datavalue;
    }
    return diff;
  }

  function makeNormalizeDiff(field, lang, before, after) {
    return {
      action: ACTION_NORMALIZE,
      field,
      claimId: null,
      lang,
      before,
      after,
    };
  }

  function makeAddExternalIdToReferenceDiff(
    pid,
    claim,
    ref,
    keepUrl,
    urlValue,
    suggestedProperty,
    extractedId,
  ) {
    return {
      action: ACTION_ADD_EXTERNAL_ID_TO_REFERENCE,
      pid,
      claimId: claim.id,
      refHash: ref.hash,
      suggestedProperty,
      keepUrl,
      referenceUrl: urlValue,
      extractedId,
    };
  }

  /** Iterate all non-deprecated claims and their references, collecting non-null callback results. */
  function mapReferences(entity, callback) {
    const results = [];
    for (const pid in entity.claims || {}) {
      for (const claim of entity.claims[pid]) {
        if (claim.rank === "deprecated") continue;
        for (const ref of claim.references || []) {
          const value = callback(pid, claim, ref);
          if (value != null) results.push(value);
        }
      }
    }
    return results;
  }

  // ==== 17 Detectors ========================================================

  function detectCleanUrls(entity) {
    const diffs = [];

    // Check a single URL string; returns a diff object or null.
    function checkUrl(urlValue, makeDiff) {
      if (typeof urlValue !== "string") return null;
      const { url: cleaned } = cleanUrl(urlValue);
      if (cleaned === urlValue) return null;
      return makeDiff(urlValue, cleaned);
    }

    // 1. Top-level URL claims (mainsnak)
    for (const pid in entity.claims || {}) {
      for (const claim of entity.claims[pid]) {
        if (claim.rank === "deprecated") continue;
        if (claim.mainsnak?.datatype !== "url") continue;
        const diff = checkUrl(
          claim.mainsnak?.datavalue?.value,
          (before, after) => ({
            action: ACTION_CLEAN_URL,
            context: "claim",
            pid,
            claimId: claim.id,
            snakPid: pid,
            before,
            after,
          }),
        );
        if (diff) diffs.push(diff);
      }
    }

    // 2. Qualifier URL snaks
    for (const pid in entity.claims || {}) {
      for (const claim of entity.claims[pid]) {
        if (claim.rank === "deprecated") continue;
        for (const qPid in claim.qualifiers || {}) {
          for (const snak of claim.qualifiers[qPid]) {
            if (snak.datatype !== "url") continue;
            const diff = checkUrl(snak.datavalue?.value, (before, after) => ({
              action: ACTION_CLEAN_URL,
              context: "qualifier",
              pid,
              claimId: claim.id,
              snakPid: qPid,
              snakHash: snak.hash,
              before,
              after,
            }));
            if (diff) diffs.push(diff);
          }
        }
      }
    }

    // 3. Reference URL snaks (all URL-datatype PIDs, not just P854)
    diffs.push(
      ...mapReferences(entity, (pid, claim, ref) => {
        for (const rPid in ref.snaks || {}) {
          for (const snak of ref.snaks[rPid]) {
            if (snak.datatype !== "url") continue;
            const diff = checkUrl(snak.datavalue?.value, (before, after) => ({
              action: ACTION_CLEAN_URL,
              context: "reference",
              pid,
              claimId: claim.id,
              refHash: ref.hash,
              snakPid: rPid,
              snakHash: snak.hash,
              before,
              after,
            }));
            if (diff) return diff; // mapReferences takes the first non-null per ref
          }
        }
      }),
    );

    return diffs;
  }

  function detectNormalizeLabels(entity) {
    const diffs = [];

    for (const lang in entity.labels || {}) {
      const before = entity.labels[lang].value;
      const after = normalizeText(before);
      if (after !== before)
        diffs.push(makeNormalizeDiff("label", lang, before, after));
    }

    for (const lang in entity.descriptions || {}) {
      const before = entity.descriptions[lang].value;
      const after =
        normalizeText(before)
          ?.replace(/[;\s]+$/g, "")
          .trim() ?? before;
      if (after !== before)
        diffs.push(makeNormalizeDiff("description", lang, before, after));
    }

    for (const lang in entity.aliases || {}) {
      for (const a of entity.aliases[lang]) {
        const after = normalizeText(a.value);
        if (after !== a.value)
          diffs.push(makeNormalizeDiff("alias", lang, a.value, after));
      }
    }

    return diffs;
  }

  function detectRemoveDuplicateAliases(entity) {
    const diffs = [];

    // Build a set of normalised mul alias values for cross-language comparison.
    const mulAliasNorms = new Set(
      (entity.aliases?.mul || []).map((a) => normalizeText(a.value)),
    );
    const mulLabelNorm = entity.labels?.mul?.value
      ? normalizeText(entity.labels.mul.value)
      : null;

    for (const lang in entity.aliases || {}) {
      if (lang === "mul") continue;

      const labelNorm = normalizeText(entity.labels?.[lang]?.value || "");
      const seen = new Set();

      for (const a of entity.aliases[lang]) {
        const norm = normalizeText(a.value);

        // 1. Alias equals the label in the same language (original behaviour).
        if (norm === labelNorm) {
          diffs.push({
            action: ACTION_REMOVE_ALIAS,
            claimId: null,
            lang,
            value: a.value,
            reason: "alias_equals_label",
          });
          continue;
        }

        // 2. Alias equals the mul label.
        if (mulLabelNorm && norm === mulLabelNorm) {
          diffs.push({
            action: ACTION_REMOVE_ALIAS,
            claimId: null,
            lang,
            value: a.value,
            reason: "alias_equals_mul_label",
          });
          continue;
        }

        // 3. Alias equals a mul alias.
        if (mulAliasNorms.has(norm)) {
          diffs.push({
            action: ACTION_REMOVE_ALIAS,
            claimId: null,
            lang,
            value: a.value,
            reason: "alias_equals_mul_alias",
          });
          continue;
        }

        // 4. Duplicate within the same language (original behaviour).
        if (seen.has(norm)) {
          diffs.push({
            action: ACTION_REMOVE_ALIAS,
            claimId: null,
            lang,
            value: a.value,
            reason: "duplicate",
          });
        } else {
          seen.add(norm);
        }
      }
    }
    return diffs;
  }
  function detectIdDescriptions(entity) {
    const diffs = [];
    const val = entity.descriptions?.en?.value;
    if (!val) return diffs;

    const patterns = [
      {
        regex: /^peerage person id=(\d+)$/i,
        prop: "P4638",
        check: (claim, id) =>
          claim.mainsnak?.datavalue?.value?.includes(`#i${id}`),
        removeMode: "blank",
      },
      {
        regex:
          /\borcid\s*(?:id\s*)?(?:[=#:]\s*)?\(?\s*(\d{4}-\d{4}-\d{4}-\d{3}[0-9X])\s*\)?/i,
        prop: "P496",
        check: (claim, id) => claim.mainsnak?.datavalue?.value === id,
        removeMode: "partial",
      },
    ];

    for (const pattern of patterns) {
      const match = pattern.regex.exec(val);
      if (!match) continue;
      const descId = match[1];
      const idPresent = (entity.claims?.[pattern.prop] || []).some((c) =>
        pattern.check(c, descId),
      );
      const rawAfter =
        pattern.removeMode === "partial"
          ? val
              .replace(match[0], "")
              .replace(/\(\s*\)/g, "") // remove empty () left behind
              .replace(/\(([^)]*?)$/, "$1") // remove unmatched ( with no closing )
              .replace(/^(.*?)\)/, "$1") // remove unmatched ) with no opening (
          : "";
      const after =
        pattern.removeMode === "partial" ? normalizeText(rawAfter) : "";
      diffs.push({
        action: ACTION_NORMALIZE,
        field: "description",
        lang: "en",
        before: val,
        after,
        idPresent,
      });
      break;
    }
    return diffs;
  }

  function detectRedundantPreferred(entity) {
    const diffs = [];
    for (const pid in entity.claims) {
      const claims = entity.claims[pid];
      if (!claims?.length) continue;
      const ranks = claims.map((c) => c.rank);
      const allPreferred = ranks.every((r) => r === "preferred");
      const onlyPrefOrDepr = ranks.every(
        (r) => r === "preferred" || r === "deprecated",
      );
      if (allPreferred || onlyPrefOrDepr) {
        for (const c of claims) {
          if (c.rank === "preferred") {
            diffs.push({
              action: ACTION_DOWNGRADE_PREFERRED,
              pid,
              claimId: c.id,
              value: c.mainsnak?.datavalue,
              removedQualifier: c.qualifiers?.[PID_REASON_FOR_PREFERRED_RANK]
                ? PID_REASON_FOR_PREFERRED_RANK
                : null,
            });
          }
        }
      }
    }
    return diffs;
  }

  function detectExpiredPreferred(entity) {
    const diffs = [];
    for (const pid in entity.claims || {}) {
      for (const c of entity.claims[pid]) {
        if (c.rank !== "preferred") continue;
        const val = (c.qualifiers?.[PID_END_TIME] || [])[0]?.datavalue?.value;
        if (!val?.time) continue;
        const endDate = parseWikibaseTime(val.time);
        if (endDate && endDate < new Date()) {
          diffs.push({
            action: ACTION_DOWNGRADE_PREFERRED,
            pid,
            claimId: c.id,
            value: c.mainsnak?.datavalue,
            removedQualifier: c.qualifiers?.[PID_REASON_FOR_PREFERRED_RANK]
              ? PID_REASON_FOR_PREFERRED_RANK
              : null,
          });
        }
      }
    }
    return diffs;
  }

  function detectEmptyEndTime(entity) {
    const diffs = [];
    for (const pid in entity.claims || {}) {
      for (const c of entity.claims[pid]) {
        for (const snak of c.qualifiers?.[PID_END_TIME] || []) {
          if (snak.snaktype === "novalue") {
            diffs.push({
              action: ACTION_REMOVE_QUALIFIER,
              claimId: c.id,
              pid,
              qualifierPid: PID_END_TIME,
              qualifierSnakHash: snak.hash,
              qualifierValue: snak.datavalue,
            });
          }
        }
      }
    }
    return diffs;
  }

  /**
   * Detect redundant start/end time qualifiers on P27 (country of citizenship)
   * that duplicate the item's date of birth (P569) or date of death (P570).
   *
   * Rules:
   *   - P580 (start time) on a P27 claim is redundant when it equals P569 (DoB)
   *     at the lowest common precision.
   *   - P582 (end time) on a P27 claim is redundant when it equals P570 (DoD)
   *     at the lowest common precision.
   *
   * Guards:
   *   - Deprecated P27 claims are skipped.
   *   - The reference date (DoB / DoD) is resolved as follows:
   *       1. Collect all non-deprecated claims for the property.
   *       2. If any are preferred rank, keep only those; otherwise keep all normal-rank.
   *       3. If more than one candidate remains after that, skip (ambiguous).
   *   - Precision must be year (9) or finer on both sides; coarser values are skipped.
   *   - Only time-datatype qualifier snaks are considered.
   */
  function detectRedundantCitizenshipDates(entity) {
    const diffs = [];

    /**
     * Resolve the single authoritative date value for a date property (P569/P570).
     * Returns the WbTime value object, or null when the result is ambiguous or absent.
     */
    function resolveDateValue(pid) {
      const all = (entity.claims?.[pid] || []).filter(
        (c) => c.rank !== "deprecated",
      );
      if (!all.length) return null;

      const preferred = all.filter((c) => c.rank === "preferred");
      const candidates = preferred.length ? preferred : all;

      // More than one candidate → ambiguous, skip.
      if (candidates.length !== 1) return null;

      const val = candidates[0].mainsnak?.datavalue?.value;
      // Must be a time value with year-or-finer precision.
      if (!val?.time || typeof val.precision !== "number" || val.precision < 9)
        return null;

      return val;
    }

    const dobValue = resolveDateValue(PID_DATE_OF_BIRTH);
    const dodValue = resolveDateValue(PID_DATE_OF_DEATH);

    // Nothing to compare against — bail out early.
    if (!dobValue && !dodValue) return diffs;

    // Pairs of [qualifier PID, reference WbTime value] to check.
    const checks = [
      { qualifierPid: PID_START_TIME, refValue: dobValue },
      { qualifierPid: PID_END_TIME, refValue: dodValue },
    ].filter((c) => c.refValue !== null);

    for (const claim of entity.claims?.[PID_CITIZENSHIP] || []) {
      if (claim.rank === "deprecated") continue;

      for (const { qualifierPid, refValue } of checks) {
        for (const snak of claim.qualifiers?.[qualifierPid] || []) {
          if (snak.snaktype !== "value") continue;
          const qualVal = snak.datavalue?.value;
          if (!qualVal?.time || typeof qualVal.precision !== "number") continue;

          // Both sides must be year-or-finer.
          if (qualVal.precision < 9) continue;

          // Build minimal pseudo-claims so we can reuse has_same_normalized_date.
          const pseudoQual = { mainsnak: { datavalue: { value: qualVal } } };
          const pseudoRef = { mainsnak: { datavalue: { value: refValue } } };

          if (!has_same_normalized_date(pseudoQual, pseudoRef, true, false))
            continue;

          diffs.push({
            action: ACTION_REMOVE_QUALIFIER,
            pid: PID_CITIZENSHIP,
            claimId: claim.id,
            qualifierPid,
            qualifierSnakHash: snak.hash,
            qualifierValue: snak.datavalue,
            // Extra context for renderRow
            matchedPid:
              qualifierPid === PID_START_TIME
                ? PID_DATE_OF_BIRTH
                : PID_DATE_OF_DEATH,
          });
        }
      }
    }

    return diffs;
  }

  function detectLowPrecisionDates(entity) {
    const diffs = [];
    for (const pid of [PID_DATE_OF_BIRTH, PID_DATE_OF_DEATH]) {
      const claims = entity.claims?.[pid] || [];

      // Group by same-date at any precision
      const groups = [];
      for (const c of claims) {
        if (c.rank === "deprecated") continue;
        const prec = c.mainsnak?.datavalue?.value?.precision ?? 0;
        if (prec > 11) continue;
        const group = groups.find((g) =>
          has_same_normalized_date(c, g[0], true, false),
        );
        if (group) group.push(c);
        else groups.push([c]);
      }

      for (const group of groups) {
        if (group.length < 2) continue;
        const maxPrec = Math.max(
          ...group.map((c) => c.mainsnak.datavalue.value.precision || 0),
        );

        let highestNoRefPrec = 0;
        let bestNoRefClaim = null;
        for (const c of group) {
          if (
            !(c.references || []).length &&
            c.mainsnak.datavalue.value.precision > highestNoRefPrec
          ) {
            highestNoRefPrec = c.mainsnak.datavalue.value.precision;
            bestNoRefClaim = c;
          }
        }

        const preciseStrongClaim = group.find(
          (c) =>
            c.mainsnak.datavalue.value.precision === maxPrec &&
            c.references?.length,
        );

        for (const c of group) {
          const prec = c.mainsnak.datavalue.value.precision;
          const refs = c.references || [];
          if (prec < maxPrec) {
            const allWeak =
              !refs.length ||
              refs.every((r) => isWeakReference(entity, r, refs));
            if (preciseStrongClaim && allWeak)
              diffs.push(makeRemoveClaimDiff(pid, c, preciseStrongClaim));
            if (!refs.length && prec < highestNoRefPrec && bestNoRefClaim) {
              diffs.push(makeRemoveClaimDiff(pid, c, bestNoRefClaim));
            }
          }
        }
      }
    }
    return diffs;
  }

  function detectMergeSameDateClaims(entity) {
    const diffs = [];
    const visited = new Set();

    for (const pid of getDateProperties(entity)) {
      const claims = entity.claims?.[pid] || [];
      visited.clear();

      for (let i = 0; i < claims.length; i++) {
        const base = claims[i];
        if (visited.has(base.id)) continue;
        const group = [base];
        visited.add(base.id);

        for (let j = i + 1; j < claims.length; j++) {
          const cand = claims[j];
          if (visited.has(cand.id)) continue;
          if (has_same_normalized_date(base, cand, false, false)) {
            group.push(cand);
            visited.add(cand.id);
          }
        }
        if (group.length < 2) continue;

        // Sub-group by rank + qualifiers (ignoring P31:Q26961029)
        const subgroups = [];
        for (const claim of group) {
          const sg = subgroups.find(
            (g) =>
              g.rank === claim.rank &&
              qualifiersEqualExceptP31(g.claims[0], claim),
          );
          if (sg) sg.claims.push(claim);
          else subgroups.push({ rank: claim.rank, claims: [claim] });
        }

        for (const { claims: sgClaims } of subgroups) {
          if (sgClaims.length < 2) continue;
          const target = chooseMergeTarget(sgClaims);
          for (const c of sgClaims) {
            if (c.id !== target.id) {
              diffs.push({
                action: ACTION_MERGE_CLAIM,
                pid,
                fromClaimId: c.id,
                toClaimId: target.id,
                value: c.mainsnak?.datavalue,
              });
            }
          }
        }
      }
    }
    return diffs;
  }

  function detectWrongPropertyClaims(entity) {
    const diffs = [];
    const RULES = [
      {
        props: new Set([PID_URL, PID_REFERENCE_URL]),
        newProp: PID_ARCHIVE_URL,
        checkFn: isArchiveUrl,
      },
      {
        props: new Set([PID_URL, PID_REFERENCE_URL]),
        newProp: PID_WIKIMEDIA_IMPORT_URL,
        checkFn: isWikimediaImportUrl,
      },
      { props: new Set([PID_URL]), newProp: PID_REFERENCE_URL },
    ];

    for (const pid in entity.claims || {}) {
      for (const c of entity.claims[pid]) {
        for (const ref of c.references || []) {
          for (const wrongProp in ref.snaks || {}) {
            for (const s of ref.snaks[wrongProp]) {
              const val = s.datavalue?.value;
              for (const rule of RULES) {
                if (!rule.props.has(wrongProp)) continue;
                const matches = rule.checkFn
                  ? typeof val === "string" && rule.checkFn(val)
                  : true;
                if (matches) {
                  diffs.push({
                    action: ACTION_CHANGE_PROPERTY,
                    context: "reference",
                    pid,
                    claimId: c.id,
                    refHash: ref.hash,
                    snakHash: s.hash,
                    oldProperty: wrongProp,
                    newProperty: rule.newProp,
                    value: s.datavalue,
                  });
                  break; // first matching rule wins
                }
              }
            }
          }
        }
      }
    }
    return diffs;
  }

  function detectMoveRetrievedFromExternalId(entity) {
    const diffs = [];
    for (const pid in entity.claims || {}) {
      for (const c of entity.claims[pid]) {
        if (c.mainsnak?.datatype !== "external-id") continue;
        for (const qSnak of c.qualifiers?.[PID_RETRIEVED] || []) {
          diffs.push({
            action: ACTION_MOVE_QUALIFIER_TO_REFERENCE,
            claimId: c.id,
            pid,
            qualifierPid: PID_RETRIEVED,
            qualifierSnakHash: qSnak.hash,
            qualifierValue: qSnak.datavalue,
          });
        }
      }
    }
    return diffs;
  }

  function detectDuplicateValues(entity) {
    const diffs = [];
    for (const pid in entity.claims || {}) {
      const claims = entity.claims[pid] || [];
      if (!claims.length) continue;

      const groups = new Map();

      // First pass: non-normal claims.
      // Preferred claims go into an "active" bucket; deprecated into "deprecated".
      for (const claim of claims) {
        if (claim.rank === "normal") continue;
        if (!canBeGrouped(claim)) continue;
        const rankGroup = claim.rank === "deprecated" ? "deprecated" : "active";
        const key = `${normalizeDataValueKey(claim.mainsnak?.datavalue?.value)}|${rankGroup}`;
        if (!groups.has(key)) groups.set(key, []);
        groups.get(key).push(claim);
      }

      // Second pass: normal claims.
      // Each normal claim is placed into the first matching group in priority order:
      //   1. An existing "active" (preferred) group with the same value.
      //   2. An existing "deprecated" group with the same value.
      //   3. A (possibly new) "active" group for normal-only deduplication.
      for (const claim of claims) {
        if (claim.rank !== "normal") continue;
        if (!canBeGrouped(claim)) continue;
        const valueKey = normalizeDataValueKey(
          claim.mainsnak?.datavalue?.value,
        );
        const activeKey = `${valueKey}|active`;
        const deprecatedKey = `${valueKey}|deprecated`;

        if (groups.has(activeKey)) {
          groups.get(activeKey).push(claim);
        } else if (groups.has(deprecatedKey)) {
          groups.get(deprecatedKey).push(claim);
        } else {
          groups.set(activeKey, [claim]);
        }
      }

      // Emit merges for every group that contains two or more claims.
      // For deprecated groups, pass preferDeprecated so that a deprecated claim
      // (with its P2241 reason) is kept as the target when a normal claim was
      // added to the group.
      for (const [key, dupes] of groups) {
        if (dupes.length < 2) continue;
        const target = chooseMergeTarget(dupes, {
          preferDeprecated: key.endsWith("|deprecated"),
        });
        for (const claim of dupes) {
          if (claim.id !== target.id) {
            diffs.push({
              action: ACTION_MERGE_CLAIM,
              pid,
              fromClaimId: claim.id,
              toClaimId: target.id,
              value: claim.mainsnak?.datavalue,
            });
          }
        }
      }
    }
    return diffs;
  }
  function detectDuplicateRefs(entity) {
    const diffs = [];
    const IGNORE_PROPS = new Set(["P813", "P1476", "P1810"]);

    function buildFieldMap(ref) {
      const map = {};
      for (const pid of Object.keys(ref.snaks || {})) {
        if (IGNORE_PROPS.has(pid)) continue;
        const vals = (ref.snaks[pid] || [])
          .map((s) => JSON.stringify(s.datavalue?.value ?? null))
          .sort();
        if (vals.length) map[pid] = vals;
      }
      return map;
    }

    function countProps(ref) {
      return Object.keys(ref.snaks || {}).filter((p) => !IGNORE_PROPS.has(p))
        .length;
    }

    function isSubset(mapA, mapB) {
      for (const pid in mapA) {
        if (!mapB[pid]) return false;
        const setB = new Set(mapB[pid]);
        if (!mapA[pid].every((v) => setB.has(v))) return false;
      }
      return true;
    }

    for (const pid in entity.claims || {}) {
      for (const claim of entity.claims[pid]) {
        const refs = claim.references || [];
        if (refs.length < 2) continue;

        // Build metadata map once
        const meta = new Map(
          refs.map((r) => [
            r.hash,
            {
              map: buildFieldMap(r),
              ts: parseRetrievedTimestamp(r) ?? -Infinity,
              propCount: countProps(r),
            },
          ]),
        );

        const sorted = [...refs].sort((a, b) => {
          const ma = meta.get(a.hash);
          const mb = meta.get(b.hash);
          if (mb.ts !== ma.ts) return mb.ts - ma.ts;
          if (mb.propCount !== ma.propCount) return mb.propCount - ma.propCount;
          if (isSubset(ma.map, mb.map) && !isSubset(mb.map, ma.map)) return 1;
          if (isSubset(mb.map, ma.map) && !isSubset(ma.map, mb.map)) return -1;
          return a.hash.localeCompare(b.hash);
        });

        const groups = [];
        for (const ref of sorted) {
          const m = meta.get(ref.hash);
          if (!m.propCount) continue;
          const anchor = groups.find(
            (g) =>
              isSubset(m.map, meta.get(g.anchorRef.hash).map) &&
              m.ts <= meta.get(g.anchorRef.hash).ts,
          );
          if (anchor) {
            anchor.members.push(ref);
          } else {
            groups.push({ anchorRef: ref, members: [] });
          }
        }

        for (const g of groups) {
          for (const ref of g.members) {
            diffs.push({
              action: ACTION_REMOVE_REFS,
              pid,
              claimId: claim.id,
              refHash: ref.hash,
              removedKeys: Object.keys(ref.snaks),
            });
          }
        }
      }
    }
    return diffs;
  }

  /**
   * Determines whether a reference that carries multiple URL snaks can be safely
   * split into one reference per URL.  Returns { splittable: boolean, urlCount? }.
   * Only references whose snaks are a subset of the allowed PIDs and that contain
   * more than one non-archive / non-retrieved URL are considered splittable.
   */
  function isSplittableReference(ref) {
    const snaks = ref.snaks || {};
    const pids = Object.keys(snaks);
    const ALLOWED = [
      PID_REFERENCE_URL,
      PID_RETRIEVED,
      PID_ARCHIVE_URL,
      PID_ARCHIVE_DATE,
      PID_IMPORTED_FROM,
      PID_WIKIMEDIA_IMPORT_URL,
    ];
    if (!pids.includes(PID_REFERENCE_URL)) return { splittable: false };
    if (!pids.every((p) => ALLOWED.includes(p))) return { splittable: false };

    let archiveCount = 0,
      wikimediaCount = 0,
      urlCount = 0,
      otherCount = 0;

    for (const p of pids) {
      if (p === PID_RETRIEVED || p === PID_ARCHIVE_DATE) continue;
      for (const snak of snaks[p] || []) {
        if (snak.datatype === "url") {
          const v = snak.datavalue?.value;
          if (!v || typeof v !== "string") continue;
          if (isArchiveUrl(v) || p === PID_ARCHIVE_URL) archiveCount++;
          else if (isWikimediaImportUrl(v)) wikimediaCount++;
          else urlCount++;
        } else {
          otherCount += (snaks[p] || []).length;
        }
      }
    }

    if (archiveCount > 1) return { splittable: false };
    if ((snaks[PID_ARCHIVE_DATE] || []).length > 1)
      return { splittable: false };
    if (wikimediaCount + urlCount + otherCount <= 1)
      return { splittable: false };

    const totalUrls = archiveCount + wikimediaCount + urlCount;
    return { splittable: true, urlCount: totalUrls };
  }

  function detectMultipleReferenceUrls(entity) {
    return mapReferences(entity, (pid, claim, ref) => {
      const result = isSplittableReference(ref);
      if (!result.splittable) return null;
      return {
        action: ACTION_SPLIT_REFERENCE_URLS,
        pid,
        claimId: claim.id,
        refHash: ref.hash,
        urlCount: result.urlCount,
      };
    });
  }

  function detectAddExternalIdToReference(entity) {
    const urlMatchCache = new Map();
    return mapReferences(entity, (currentPid, claim, ref) => {
      const snaks = ref.snaks || {};
      const urlValue = getReferenceUrl(snaks);
      if (!urlValue || hasExternalId(snaks)) return null;

      const cacheKey = normalizeUrl(urlValue);

      if (urlMatchCache.has(cacheKey)) {
        const cached = urlMatchCache.get(cacheKey);
        if (!cached.matched || currentPid === cached.suggestedProperty)
          return null;
        return makeAddExternalIdToReferenceDiff(
          currentPid,
          claim,
          ref,
          cached.keepUrl,
          urlValue,
          cached.suggestedProperty,
          cached.extractedId,
        );
      }

      const { keepUrl, ...result } =
        matchUrlAgainstPatternsWithCleanup(urlValue);
      urlMatchCache.set(cacheKey, { ...result, keepUrl });

      if (
        !result.matched ||
        !result.extractedId ||
        currentPid === result.suggestedProperty
      )
        return null;
      return makeAddExternalIdToReferenceDiff(
        currentPid,
        claim,
        ref,
        keepUrl,
        urlValue,
        result.suggestedProperty,
        result.extractedId,
      );
    });
  }

  async function detectRedundantOccupation(entity) {
    const occupations = (entity.claims[PID_OCCUPATION] || []).filter(
      (c) => c.rank === "normal" || c.rank === "preferred",
    );
    const weakOccs = occupations.filter(
      (c) => c.rank === "normal" && !hasValidReference(entity, c),
    );
    const strongOccs = occupations.filter(
      (c) => hasValidReference(entity, c) || c.rank === "preferred",
    );

    if (!weakOccs.length || !strongOccs.length) return [];

    const allIds = [
      ...weakOccs.map((c) => c.mainsnak.datavalue.value.id),
      ...strongOccs.map((c) => c.mainsnak.datavalue.value.id),
    ];
    try {
      await buildOccupationParents(allIds);
    } catch (e) {
      console.error(`${TOOL_NAME}: buildOccupationParents failed`, e);
      return [];
    }

    const diffs = [];
    for (const weakClaim of weakOccs) {
      const weakId = weakClaim.mainsnak.datavalue.value.id;
      for (const strongClaim of strongOccs) {
        const strongId = strongClaim.mainsnak.datavalue.value.id;
        if (strongId !== weakId && isSubclassOfLocal(strongId, weakId)) {
          diffs.push(
            makeRemoveClaimDiff(PID_OCCUPATION, weakClaim, strongClaim),
          );
          break;
        }
      }
    }
    return diffs;
  }

  function detectJulianGregorianDuplicateDates(entity) {
    const diffs = [];
    for (const pid of getDateProperties(entity)) {
      const claims = entity.claims?.[pid] || [];
      const unrefClaims = claims.filter((c) => !(c.references || []).length);
      const refClaims = claims.filter((c) => (c.references || []).length > 0);

      for (const a of unrefClaims) {
        const aVal = a.mainsnak?.datavalue?.value;
        if (!aVal) continue;
        for (const b of refClaims) {
          const bVal = b.mainsnak?.datavalue?.value;
          if (!bVal) continue;
          if (
            aVal.calendarmodel !== bVal.calendarmodel &&
            has_same_normalized_date(a, b, false, true)
          ) {
            diffs.push(makeRemoveClaimDiff(pid, a, b));
          }
        }
      }
    }
    return diffs;
  }

  /**
   * Detect references that contain at least one obsolete external-ID snak
   * alongside at least one other external-ID snak (non-obsolete), and no
   * P813 (retrieved) — which is typical of book/bibliographic references.
   * In those cases only the obsolete snaks are stripped; the rest of the
   * reference is preserved.
   *
   * Guards:
   *   - The reference must have no P813 (retrieved) snak.
   *   - The reference must contain at least one obsolete external-ID snak.
   *   - The reference must contain at least one other external-ID snak that
   *     is not obsolete (the "surviving content" guard).
   *
   * Unlike the old partial_obsolete path this fires unconditionally —
   * regardless of whether stronger references exist on the same statement —
   * because the surviving content in the reference is itself meaningful.
   */
  function detectObsoleteSnaksInReferences(entity) {
    const diffs = [];

    for (const pid in entity.claims || {}) {
      for (const claim of entity.claims[pid]) {
        if (claim.rank === "deprecated") continue;
        for (const ref of claim.references || []) {
          const snaks = ref.snaks || {};
          const pids = Object.keys(snaks);

          // Skip if there is a P813 (retrieved) snak.
          if (pids.includes(PID_RETRIEVED)) continue;

          // Collect obsolete external-ID PIDs in this reference.
          const obsoletePids = pids.filter(
            (p) =>
              obsoleteIdProps.has(p) &&
              (snaks[p] || []).some(
                (s) => s.datatype === "external-id" && s.datavalue?.value,
              ),
          );
          if (!obsoletePids.length) continue;

          // Require at least one surviving non-obsolete external-ID snak.
          const hasOtherExtId = pids.some(
            (p) =>
              !obsoletePids.includes(p) &&
              (snaks[p] || []).some(
                (s) => s.datatype === "external-id" && s.datavalue?.value,
              ),
          );
          if (!hasOtherExtId) continue;

          diffs.push({
            action: ACTION_REMOVE_OBSOLETE_SNAKS,
            pid,
            claimId: claim.id,
            refHash: ref.hash,
            obsoletePids,
            removedKeys: obsoletePids,
          });
        }
      }
    }

    return diffs;
  }

  /**
   * Detect date properties that have exactly one normal-rank claim and one
   * deprecated-rank claim where:
   *   - The deprecated claim's reason-for-deprecated-rank (P2241) is
   *     Q42727519 (less precise/accurate), OR it carries no P2241 qualifier at all.
   *   - The two dates are equal at the lowest common precision.
   *
   * Proposed fix (two diffs per matching pair):
   *   1. Upgrade the normal-rank claim to preferred rank and set P7452 = Q71536040
   *      (most precise value) — ACTION_UPGRADE_PRECISE_DATE on the precise claim.
   *   2. Downgrade the deprecated claim back to normal rank and strip its P2241
   *      qualifier — reuses ACTION_DOWNGRADE_PREFERRED (rank change + qualifier removal)
   *      on the deprecated (less-precise) claim.
   */
  function detectUpgradePreciseDate(entity) {
    const diffs = [];

    for (const pid of getDateProperties(entity)) {
      const claims = entity.claims?.[pid] || [];

      // Partition into normal and deprecated (ignore preferred/other)
      const normalClaims = claims.filter((c) => c.rank === "normal");
      const deprecatedClaims = claims.filter((c) => c.rank === "deprecated");

      if (normalClaims.length !== 1 || deprecatedClaims.length !== 1) continue;

      for (const normalClaim of normalClaims) {
        const normalPrec =
          normalClaim.mainsnak?.datavalue?.value?.precision ?? -1;
        if (normalPrec < 0) continue;

        for (const deprClaim of deprecatedClaims) {
          const deprPrec =
            deprClaim.mainsnak?.datavalue?.value?.precision ?? -1;
          if (deprPrec < 0) continue;

          // The normal claim should be at least as precise as the deprecated one.
          // (Typically day vs year: 11 >= 9.)
          if (normalPrec < deprPrec) continue;

          // Check dates are the same at the lower precision.
          if (!has_same_normalized_date(normalClaim, deprClaim, true, false))
            continue;

          // The normal claim must have at least one reference.
          if (!(normalClaim.references || []).length) continue;

          // The deprecated claim must have P2241 = Q42727519, or no P2241 at all.
          const p2241Snaks =
            deprClaim.qualifiers?.[PID_REASON_FOR_DEPRECATED_RANK] || [];
          if (p2241Snaks.length > 0) {
            const allLessPrecise = p2241Snaks.every(
              (s) => s.datavalue?.value?.id === QID_LESS_PRECISE,
            );
            if (!allLessPrecise) continue;
          }

          // Emit two linked diffs — one visible (upgrade), one hidden (downgrade):
          // 1. Upgrade the precise (normal) claim to preferred + add P7452 = Q71536040
          const upgradeRowId = `upgradePreciseDate_${pid}_${normalClaim.id}`;
          diffs.push({
            action: ACTION_UPGRADE_PRECISE_DATE,
            rowId: upgradeRowId,
            pid,
            claimId: normalClaim.id,
            value: normalClaim.mainsnak?.datavalue,
            // paired deprecated claim (for display)
            deprClaimId: deprClaim.id,
            deprValue: deprClaim.mainsnak?.datavalue,
          });

          // 2. Downgrade the deprecated (less-precise) claim to normal + strip P2241.
          //    _hidden: true — applied alongside the upgrade diff above but not
          //    shown as a separate row in the preview table.
          //    Shares the same rowId so the checkbox state is inherited from the
          //    visible upgrade row.
          diffs.push({
            action: ACTION_DOWNGRADE_PREFERRED,
            _hidden: true,
            rowId: upgradeRowId,
            pid,
            claimId: deprClaim.id,
            value: deprClaim.mainsnak?.datavalue,
            // We re-use this action: the apply step sets rank = "normal" and
            // strips removedQualifier. Here we point it at P2241 instead of P7452.
            removedQualifier:
              p2241Snaks.length > 0 ? PID_REASON_FOR_DEPRECATED_RANK : null,
            // Tag so the apply step knows to go from deprecated -> normal.
            fromDeprecated: true,
          });
        }
      }
    }

    return diffs;
  }

  async function detectConvertWikipediaStatedIn(entity) {
    const diffs = [];
    if (!wikipediaEditionsCache.size) {
      await refreshCacheWithNotify(
        caches.find((c) => c.key === WIKIPEDIA_EDITIONS_CACHE_KEY),
      );
    }
    for (const [pid, claims] of Object.entries(entity.claims || {})) {
      for (const claim of claims) {
        for (const ref of claim.references || []) {
          for (const s of ref.snaks?.[PID_STATED_IN] || []) {
            const qid = s?.datavalue?.value?.id;
            if (qid && wikipediaEditionsCache.has(qid)) {
              diffs.push({
                action: ACTION_CHANGE_PROPERTY,
                context: "reference",
                pid,
                claimId: claim.id,
                refHash: ref.hash,
                snakHash: s.hash,
                oldProperty: PID_STATED_IN,
                newProperty: PID_IMPORTED_FROM,
                value: s.datavalue,
              });
            }
          }
        }
      }
    }
    return diffs;
  }

  async function detectInvalidStatedInReference(entity) {
    const diffs = [];
    if (!propertyStatedInCache.size) {
      const entry = caches.find((c) => c.key === STATED_IN_CACHE_KEY);
      if (entry) await refreshCacheWithNotify(entry);
    }

    for (const [pid, claims] of Object.entries(entity.claims || {})) {
      for (const claim of claims) {
        for (const ref of claim.references || []) {
          const snaks = ref.snaks || {};
          const refPids = Object.keys(snaks);
          let extIdPid = null;
          let extIdCount = 0;
          for (const snakPid of refPids) {
            if (snaks[snakPid]?.[0]?.datatype === "external-id") {
              extIdPid = snakPid;
              extIdCount++;
            }
          }
          const statedInSnaks = snaks[PID_STATED_IN] || [];
          if (extIdCount !== 1 || statedInSnaks.length !== 1 || !extIdPid)
            continue;

          const statedInSnak = statedInSnaks[0];
          const statedInQid = statedInSnak?.datavalue?.value?.id;
          const preferences = propertyStatedInCache.get(extIdPid);

          if (preferences && statedInQid) {
            const { preferred, allowed, notAllowed } = preferences;
            if (notAllowed?.has(statedInQid) && preferred) {
              diffs.push({
                action: ACTION_CHANGE_VALUE,
                context: "reference",
                pid,
                claimId: claim.id,
                refHash: ref.hash,
                snakHash: statedInSnak.hash,
                oldValue: statedInQid,
                newValue: preferred,
                externalIdPid: extIdPid,
              });
            }
          }
        }
      }
    }
    return diffs;
  }

  /**
   * Detect references that carry both P143 (imported from Wikimedia project) and
   * P4656 (Wikimedia import URL), each with exactly one value, where the Wikipedia
   * edition implied by the P4656 URL differs from what P143 currently states.
   *
   * For example, a P4656 value of "https://pl.wikipedia.org/w/index.php?title=…"
   * implies P143 = Q1551807 (Polish Wikipedia).  If P143 holds a different QID,
   * this detector emits ACTION_CHANGE_VALUE to correct it.
   *
   * Only *.wikipedia.org hostnames are handled.  Other Wikimedia projects
   * (Wikibooks, Wikisource, etc.) are skipped to avoid false positives.
   */
  async function detectMismatchedWikimediaImport(entity) {
    const diffs = [];

    if (!wikipediaEditionsCache.size) {
      await refreshCacheWithNotify(
        caches.find((c) => c.key === WIKIPEDIA_EDITIONS_CACHE_KEY),
      );
    }

    // Build a reverse map: language code -> QID (first seen wins per language).
    const langToQid = new Map();
    for (const [qid, lang] of wikipediaEditionsCache) {
      if (!langToQid.has(lang)) langToQid.set(lang, qid);
    }

    for (const [pid, claims] of Object.entries(entity.claims || {})) {
      for (const claim of claims) {
        for (const ref of claim.references || []) {
          const snaks = ref.snaks || {};
          const p143Snaks = snaks[PID_IMPORTED_FROM] || [];
          const p4656Snaks = snaks[PID_WIKIMEDIA_IMPORT_URL] || [];

          // Only handle references with exactly one P143 and one P4656 snak.
          if (p143Snaks.length !== 1 || p4656Snaks.length !== 1) continue;

          const p143Snak = p143Snaks[0];
          const p4656Snak = p4656Snaks[0];

          const currentQid = p143Snak?.datavalue?.value?.id;
          const importUrl = p4656Snak?.datavalue?.value;

          if (!isQid(currentQid) || typeof importUrl !== "string") continue;

          // Parse the URL hostname to derive the expected Wikipedia-edition QID.
          let expectedQid = null;
          try {
            const { hostname } = new URL(importUrl);
            // Accept only *.wikipedia.org (strip leading www. just in case).
            const m = hostname
              .toLowerCase()
              .replace(/^www\./, "")
              .match(/^([a-z-]+)\.wikipedia\.org$/);
            if (!m) continue; // not a Wikipedia URL — skip
            expectedQid = langToQid.get(m[1]);
          } catch {
            continue;
          }

          if (!expectedQid || expectedQid === currentQid) continue;

          diffs.push({
            action: ACTION_CHANGE_VALUE,
            context: "reference",
            pid,
            claimId: claim.id,
            refHash: ref.hash,
            snakHash: p143Snak.hash,
            oldValue: currentQid,
            newValue: expectedQid,
            importUrl,
          });
        }
      }
    }

    return diffs;
  }

  /**
   * Detect P1343 (described by source) claims whose value matches the preferred
   * "applicable stated in" (P9073) of an external-id property present on the same item,
   * OR whose value is a not-allowed stated-in for such a property (and is not an
   * allowed value for any other external-id property on the item).
   *
   * Simple variant  – P1343 has no qualifiers and no references:
   *   Emit ACTION_REMOVE_CLAIM for the P1343 claim.
   *
   * Complex variant – P1343 may have qualifiers (e.g. P577 publication date) and/or
   *   a reference that itself contains the external-id value:
   *   Emit ACTION_ABSORB_DESCRIBED_BY_SOURCE, which the apply step handles by:
   *     1. Removing the P1343 claim.
   *     2. Moving each reference from (I) to the matching external-id claim (III),
   *        injecting any P1343 qualifiers (IV) as extra snaks inside each moved reference.
   *
   * In both cases the detector only fires when:
   *   - The P1343 rank is not deprecated.
   *   - The P1343 has at most one reference (multiple refs -> skip, too ambiguous).
   *   - The external-id claim rank is not deprecated.
   *   - The P1343 value QID is either:
   *       (a) an allowed stated-in for at least one external-id property on this item, OR
   *       (b) a not-allowed stated-in for such a property (i.e. a related-entity QID
   *           such as issuer/maintainer/editor preloaded from the cache, or any QID
   *           already appearing in existing references that is not in allowed),
   *           provided it is not simultaneously an allowed stated-in for any other
   *           external-id property on this item.
   *
   * When multiple ext-id claims share the same property (e.g. two P243 values),
   * the correct target is resolved from the P1343 reference content:
   *   1. The ref contains an ext-id snak whose PID+value matches exactly one candidate.
   *   2. The ref URL matches a URL pattern whose extracted ID matches a candidate.
   *   3. The candidate ext-id value appears verbatim in the ref URL
   *      (e.g. OCLC 749229748 inside https://search.worldcat.org/title/749229748).
   * If none of these resolve to exactly one candidate, the P1343 claim is skipped.
   */
  async function detectAbsorbDescribedBySource(entity) {
    const diffs = [];

    // Ensure stated-in cache is populated
    if (!propertyStatedInCache.size) {
      const entry = caches.find((c) => c.key === STATED_IN_CACHE_KEY);
      if (entry) await refreshCacheWithNotify(entry);
    }

    const describedByClaims = (
      entity.claims?.[PID_DESCRIBED_BY_SOURCE] || []
    ).filter((c) => c.rank !== "deprecated");
    if (!describedByClaims.length) return diffs;

    // Build two lookup maps over external-id properties present on this item:
    //
    //   extIdByAllowed    – allowedQid  -> [{extIdPid, claim, preferred}]
    //                       for every QID that is an allowed stated-in.
    //
    //   extIdByNotAllowed – notAllowedQid -> [{extIdPid, claim, preferred}]
    //                       for every QID that is NOT allowed for a property
    //                       that does have at least one allowed stated-in.
    //                       A QID present in extIdByAllowed is never added here.
    //
    // A P1343 value matching extIdByAllowed is straightforwardly redundant.
    // A P1343 value matching extIdByNotAllowed (but not extIdByAllowed) is a
    // wrong/deprecated stated-in for a source already implied by the external-id.
    const extIdByAllowed = new Map();
    const extIdByNotAllowed = new Map();

    for (const [extIdPid, extIdClaims] of Object.entries(entity.claims || {})) {
      const prefs = propertyStatedInCache.get(extIdPid);
      if (!prefs?.allowed?.size) continue;

      for (const extClaim of extIdClaims) {
        if (extClaim.rank === "deprecated") continue;
        const entry = { extIdPid, claim: extClaim, preferred: prefs.preferred };

        for (const allowedQid of prefs.allowed) {
          if (!extIdByAllowed.has(allowedQid))
            extIdByAllowed.set(allowedQid, []);
          extIdByAllowed.get(allowedQid).push(entry);
        }

        // Seed extIdByNotAllowed from the cache's precomputed notAllowed set
        // (related-entity QIDs from P2378/P126/P10726/P1629/P98 that are not
        // valid P9073 values), plus any not-allowed QIDs already present in
        // existing references on this claim.
        const seedNotAllowed = (qid) => {
          if (!isQid(qid) || prefs.allowed.has(qid)) return;
          if (!extIdByNotAllowed.has(qid)) extIdByNotAllowed.set(qid, []);
          extIdByNotAllowed.get(qid).push(entry);
        };
        for (const qid of prefs.notAllowed || []) seedNotAllowed(qid);
        for (const ref of extClaim.references || []) {
          for (const siSnak of ref.snaks?.[PID_STATED_IN] || []) {
            seedNotAllowed(siSnak?.datavalue?.value?.id);
          }
        }
      }
    }

    for (const p1343Claim of describedByClaims) {
      const sourceQid = p1343Claim.mainsnak?.datavalue?.value?.id;
      if (!isQid(sourceQid)) continue;

      // Prefer the allowed-match; fall back to not-allowed only when the QID is
      // not an allowed value for any other property present on this item.
      const candidateExtIds =
        extIdByAllowed.get(sourceQid) ??
        (!extIdByAllowed.has(sourceQid)
          ? extIdByNotAllowed.get(sourceQid)
          : undefined);

      if (!candidateExtIds?.length) continue;

      const hasQuals = !hasNoQualifiers(p1343Claim);
      const refs = p1343Claim.references || [];

      // Safety: more than one reference on the P1343 claim -> too ambiguous, skip.
      if (refs.length > 1) continue;

      // ── Resolve which ext-id claim to target ──────────────────────────────
      //
      // When there is only one candidate we use it directly.
      // When there are multiple candidates (e.g. two P243 claims) we must
      // identify the correct one from the P1343 reference content:
      //
      //   1. The ref contains an external-id snak whose value matches a candidate.
      //   2. The ref contains a URL (P854/P2699); matchUrlAgainstPatterns can
      //      extract an ID that matches a candidate.
      //   3. The ref URL contains the candidate's ext-id value as a substring
      //      (e.g. worldcat URL containing the OCLC number).
      //
      // If none of these resolve to exactly one candidate, skip this P1343 claim.
      let targetEntry = null;

      if (candidateExtIds.length === 1) {
        targetEntry = candidateExtIds[0];
      } else {
        // Multiple candidates — attempt disambiguation using the P1343 reference.
        const ref = refs[0]; // at most one ref (guarded above)
        const refSnaks = ref?.snaks || {};

        // Strategy 1: ref contains an ext-id snak whose value matches a candidate.
        const extIdInRef = new Map(); // pid -> value
        for (const rPid of Object.keys(refSnaks)) {
          const snak = refSnaks[rPid]?.[0];
          if (
            snak?.datatype === "external-id" &&
            typeof snak.datavalue?.value === "string"
          ) {
            extIdInRef.set(rPid, snak.datavalue.value);
          }
        }
        if (extIdInRef.size === 1) {
          const [rPid, rVal] = [...extIdInRef.entries()][0];
          const matched = candidateExtIds.filter(
            (e) =>
              e.extIdPid === rPid &&
              e.claim.mainsnak?.datavalue?.value === rVal,
          );
          if (matched.length === 1) targetEntry = matched[0];
        } else if (extIdInRef.size > 1) {
          // Multiple ext-ids in the ref — too ambiguous.
          continue;
        }

        // Strategy 2 & 3: use a URL from the ref.
        if (!targetEntry) {
          const urlSnak =
            (refSnaks[PID_REFERENCE_URL] || [])[0] ??
            (refSnaks[PID_URL] || [])[0];
          const urlValue = urlSnak?.datavalue?.value;

          if (typeof urlValue === "string") {
            // Strategy 2: pattern-match the URL to extract an ext-id value.
            if (!targetEntry) {
              const matchResult = matchUrlAgainstPatternsWithCleanup(urlValue);
              if (matchResult.matched && matchResult.extractedId) {
                const matched = candidateExtIds.filter(
                  (e) =>
                    e.extIdPid === matchResult.suggestedProperty &&
                    e.claim.mainsnak?.datavalue?.value ===
                      matchResult.extractedId,
                );
                if (matched.length === 1) targetEntry = matched[0];
              }
            }

            // Strategy 3: the candidate's ext-id value appears verbatim in the URL.
            if (!targetEntry) {
              const matched = candidateExtIds.filter((e) => {
                const val = e.claim.mainsnak?.datavalue?.value;
                return (
                  typeof val === "string" &&
                  val.length > 0 &&
                  urlValue.includes(val)
                );
              });
              if (matched.length === 1) targetEntry = matched[0];
            }
          }
        }

        if (!targetEntry) continue; // could not unambiguously identify the target
      }

      const { extIdPid, claim: extIdClaim } = targetEntry;
      const isSimple = !hasQuals && refs.length === 0;

      if (isSimple) {
        // Simple variant: just remove the P1343 claim, nothing to migrate
        diffs.push({
          action: ACTION_REMOVE_CLAIM,
          pid: PID_DESCRIBED_BY_SOURCE,
          claimId: p1343Claim.id,
          value: p1343Claim.mainsnak?.datavalue,
          keepClaimId: extIdClaim.id,
          keepValue: extIdClaim.mainsnak?.datavalue,
        });
      } else {
        // Complex variant: remove P1343 and migrate its references (with any
        // P1343 qualifiers injected as additional snaks) to the ext-id claim.
        diffs.push({
          action: ACTION_ABSORB_CLAIM,
          pid: PID_DESCRIBED_BY_SOURCE,
          claimId: p1343Claim.id,
          value: p1343Claim.mainsnak?.datavalue,
          extIdPid,
          extIdClaimId: extIdClaim.id,
          keepValue: extIdClaim.mainsnak?.datavalue,
          // Snapshot qualifiers at detect-time so the apply step can embed them
          qualifiers: JSON.parse(JSON.stringify(p1343Claim.qualifiers || {})),
          qualifiersOrder: (p1343Claim["qualifiers-order"] || []).slice(),
          refHashes: refs.map((r) => r.hash),
        });
      }
    }

    return diffs;
  }

  /**
   * Detect URL-type claims (e.g. P973 described at URL, P856 official website, P2699 URL)
   * whose value can be matched by matchUrlAgainstPatterns to an external-ID property,
   * and where a non-deprecated claim for that ext-id property with the extracted value
   * already exists on the same item.
   *
   * When found, the URL claim is redundant — its qualifiers and references can be
   * absorbed into the matching ext-id claim just like absorbDescribedBySource.
   * The same ACTION_ABSORB_DESCRIBED_BY_SOURCE apply logic is reused; the URL claim
   * plays the role of the P1343 claim.
   *
   * URL properties considered: P973 (described at URL), P856 (official website),
   * P2699 (URL), and any other URL-datatype property present on the item.
   *
   * Guards:
   *   - URL claim rank is not deprecated.
   *   - URL claim has at most one reference (multiple refs -> skip).
   *   - The extracted ext-id value matches exactly one non-deprecated claim on the item.
   */
  async function detectAbsorbUrlClaim(entity) {
    const diffs = [];

    // Collect all URL-datatype claims on the item, grouped by PID.
    // We consider every property whose mainsnak datatype is "url".
    const urlClaims = [];
    for (const [pid, claims] of Object.entries(entity.claims || {})) {
      for (const claim of claims) {
        if (claim.rank === "deprecated") continue;
        if (claim.mainsnak?.datatype !== "url") continue;
        const urlValue = claim.mainsnak?.datavalue?.value;
        if (typeof urlValue !== "string" || !urlValue) continue;
        urlClaims.push({ pid, claim, urlValue });
      }
    }
    if (!urlClaims.length) return diffs;

    // Build a lookup: extIdPid -> Map<value, claim> for all non-deprecated ext-id claims.
    const extIdIndex = new Map(); // extIdPid -> Map<value, claim>
    for (const [pid, claims] of Object.entries(entity.claims || {})) {
      for (const claim of claims) {
        if (claim.rank === "deprecated") continue;
        if (claim.mainsnak?.datatype !== "external-id") continue;
        const val = claim.mainsnak?.datavalue?.value;
        if (typeof val !== "string") continue;
        if (!extIdIndex.has(pid)) extIdIndex.set(pid, new Map());
        // Keep first non-deprecated claim for each value (duplicates handled elsewhere)
        if (!extIdIndex.get(pid).has(val)) extIdIndex.get(pid).set(val, claim);
      }
    }
    if (!extIdIndex.size && !DEV_ABSORB_URL_CLAIM_CREATE_MISSING) return diffs;

    for (const { pid, claim: urlClaim, urlValue } of urlClaims) {
      // Safety: more than one reference -> too ambiguous, skip.
      const refs = urlClaim.references || [];
      // not so relevant here
      // if (refs.length > 1) continue;

      // Match the URL against cached patterns.
      const matchResult = matchUrlAgainstPatternsWithCleanup(urlValue);

      if (!matchResult.matched || !matchResult.extractedId) continue;

      const { suggestedProperty: extIdPid, extractedId } = matchResult;

      // Helper: check whether the item's P31 (instance of) matches any of the
      // given QIDs.
      const itemIsA = (qids) =>
        qids.some((qid) =>
          (entity.claims?.["P31"] || []).some(
            (c) => c.mainsnak?.datavalue?.value?.id === qid,
          ),
        );

      // Platform-pair check: if the URL resolves to a "content item" property
      // and the item has the corresponding "creator" property for the same
      // platform, remove the URL claim (it points at a content item, not the
      // subject). Emit ACTION_REMOVE_CLAIM with the creator claim as the reason.
      const platformPair = ABSORB_URL_CLAIM_PLATFORM_PAIRS[extIdPid];
      if (platformPair && itemIsA(platformPair.itemTypes)) {
        const creatorClaims = (
          entity.claims?.[platformPair.creatorPid] || []
        ).filter((c) => c.rank !== "deprecated");
        if (creatorClaims.length) {
          //const hasQuals = !hasNoQualifiers(urlClaim);
          //const isSimple = !hasQuals && refs.length === 0;
          //if (isSimple) {
          diffs.push({
            action: ACTION_REMOVE_CLAIM,
            pid,
            claimId: urlClaim.id,
            value: urlClaim.mainsnak?.datavalue,
            keepClaimId: creatorClaims[0].id,
            keepValue: creatorClaims[0].mainsnak?.datavalue,
          });
          //}
          // Complex case (has qualifiers/refs): skip — too ambiguous to migrate
          // to the creator claim automatically.
          continue;
        }
      }

      // Skip ext-id properties that are inappropriate for this item type and
      // have no platform-pair remedy (e.g. IMDb title ID on a human item).
      if (
        ABSORB_URL_CLAIM_SKIP[extIdPid] &&
        itemIsA(ABSORB_URL_CLAIM_SKIP[extIdPid])
      )
        continue;

      // Check whether the extracted ext-id value exists on the item.
      const extIdClaim = extIdIndex.get(extIdPid)?.get(extractedId);

      if (!extIdClaim) {
        // Normal mode: ext-id must already exist on the item.
        if (!DEV_ABSORB_URL_CLAIM_CREATE_MISSING) continue;

        // Dev mode: propose converting the URL claim into a new ext-id statement,
        // carrying over its qualifiers and references.
        diffs.push({
          action: ACTION_CONVERT_URL_TO_EXT_ID,
          pid,
          claimId: urlClaim.id,
          value: urlClaim.mainsnak?.datavalue,
          extIdPid,
          extractedId,
          qualifiers: JSON.parse(JSON.stringify(urlClaim.qualifiers || {})),
          qualifiersOrder: (urlClaim["qualifiers-order"] || []).slice(),
          refHashes: refs.map((r) => r.hash),
        });
        continue;
      }

      // Don't absorb into the same claim (shouldn't happen, but guard anyway).
      if (extIdClaim.id === urlClaim.id) continue;

      const hasQuals = !hasNoQualifiers(urlClaim);
      const isSimple = !hasQuals && refs.length === 0;

      if (isSimple) {
        diffs.push({
          action: ACTION_REMOVE_CLAIM,
          pid,
          claimId: urlClaim.id,
          value: urlClaim.mainsnak?.datavalue,
          keepClaimId: extIdClaim.id,
          keepValue: extIdClaim.mainsnak?.datavalue,
        });
      } else {
        diffs.push({
          action: ACTION_ABSORB_CLAIM,
          pid,
          claimId: urlClaim.id,
          value: urlClaim.mainsnak?.datavalue,
          extIdPid,
          extIdClaimId: extIdClaim.id,
          keepValue: extIdClaim.mainsnak?.datavalue,
          qualifiers: JSON.parse(JSON.stringify(urlClaim.qualifiers || {})),
          qualifiersOrder: (urlClaim["qualifiers-order"] || []).slice(),
          refHashes: refs.map((r) => r.hash),
        });
      }
    }

    return diffs;
  }

  function detectSelfCite(entity) {
    const currentQid = mw.config.get("wgTitle");
    if (!isQid(currentQid)) return [];
    return (entity.claims?.[PID_CITES_WORK] || [])
      .filter((claim) => claim.mainsnak?.datavalue?.value?.id === currentQid)
      .map((claim) => makeRemoveClaimDiff(PID_CITES_WORK, claim, null));
  }

  // ==== 18 Whitelist for always-removable wikimedia refs ====================

  const ALWAYS_REMOVE_WIKIMEDIA_PIDS = new Set([
    "P301", // category's main topic
    "P373", // Commons category
    "P910", // topic's main category
    "P971", // category combines topics
    "P1200", // category for the water basin
    "P1464", // category for people born here
    "P1465", // category for people who died here
    "P1740", // category for films shot at this location
    "P1753", // list related to category
    "P1754", // category related to list
    "P1791", // category for people buried here
    "P1792", // category of associated people
    "P2033", // category for pictures taken with this camera
    "P2517", // category for recipients of this award
    "P2875", // property usage tracking category
    "P3709", // category for value different from Wikidata
    "P3713", // category for value not in Wikidata
    "P3734", // category for value same as Wikidata
    "P3876", // category for alumni of educational institution
    "P4195", // category for employees of the organization
    "P4224", // category contains
    "P4329", // category populated by
    "P5996", // category for films in this language
    "P6112", // category for members of a team
    "P6186", // category for eponymous categories
    "P6365", // member category
    "P7084", // related category
    "P7561", // category for the interior of the item
    "P7782", // category for ship name
    "P7861", // category for files created with program
    "P7867", // category for maps or plans
    "P8464", // content partnership category
    "P10280", // category for honorary citizens of entity
    "P12686", // category for births in this time period
    "P12687", // category for deaths in this time period

    "P935", // Commons gallery
    "P1472", // Commons Creator page
    "P1612", // Commons Institution page
  ]);

  // ==== 19 Reference-detector factory =======================================

  /**
   * Single-pass reference categorisation for all active ref-category detectors.
   * Each ref is classified once; the resulting diff is placed into the bucket for
   * its category key if that key is present in activeCategories.
   * Returns a Map<categoryKey, diff[]>.
   */
  /**
   * Runs a single pass over all active source-category detectors (those backed
   * by detectRefCategories rather than their own detect function), classifying
   * each reference exactly once.  Returns a Map<categoryKey, diff[]>.
   */
  function detectRefCategories(entity, activeCategories) {
    const results = new Map(activeCategories.map((k) => [k, []]));

    for (const pid in entity.claims || {}) {
      for (const c of entity.claims[pid]) {
        if (!c.references?.length) continue;

        const levels = c.references.map((r) =>
          getReferenceLevel(entity, r, c.references, c),
        );
        const maxLevel = Math.max(...levels);
        const strict = isStrictStatement(c);

        for (let ri = 0; ri < c.references.length; ri++) {
          const ref = c.references[ri];
          const cat = determineSourceCategory(entity, ref, c.references, c);
          if (!results.has(cat)) continue;

          //                                     Applies to      Applies to
          //                       Level check   strict          non-strict/ext-id
          // wikimedia:               Y*            Y              Y                       * Not for ALWAYS_REMOVE_WIKIMEDIA_PIDS
          // aggregator:              Y             Y              N
          // community:               Y             Y              N
          // redundant:               Y             Y              N
          // inferred:                Y             Y              N
          // obsolete:                Y             Y              N
          // invalid:                 N             -              Y
          // self_stated_in:          N             -              Y
          // wikimedia_no_sitelinks:  N             Y              Y

          const level = levels[ri];
          const alwaysRemove =
            (cat === "wikimedia" && ALWAYS_REMOVE_WIKIMEDIA_PIDS.has(pid)) ||
            (cat === "invalid" && !strict) ||
            cat === "wikimedia_no_sitelinks" ||
            (cat === "self_stated_in" && !strict) ||
            // redundant can always be removed; there is another reference that is not redundant
            cat === "redundant";
          const ignoreStrictCheck = cat === "wikimedia";

          if (isSplittableReference(ref).splittable) continue;
          if (!alwaysRemove) {
            if (!ignoreStrictCheck && !strict) continue;
            if (level >= maxLevel) continue;
          }

          const diff = {
            action: ACTION_REMOVE_REFS,
            pid,
            claimId: c.id,
            refHash: ref.hash,
            removedKeys: Object.keys(ref.snaks),
          };
          if (cat === "wikimedia_no_sitelinks") {
            diff.lang =
              analyzeWikimediaReference(entity, ref).language || "unknown";
          }

          results.get(cat).push(diff);
        }
      }
    }

    return results;
  }

  function createReferenceDetector(categoryKey) {
    const isNoSitelinks = categoryKey === "wikimedia_no_sitelinks";
    return {
      label: categoryKey,
      headers: isNoSitelinks
        ? ["property", "lang", "removedValues"]
        : ["property", "removedValues"],
      isRemoveRefCategory: true,
      summaryLabel: categoryKey,
      // detect is null; generatePreviewDiffs runs detectRefCategories as a shared
      // single pass over all active ref-category detectors instead.
      detect: null,
      renderRow(row, labels) {
        const propLink = renderLink({
          id: row.pid,
          claimId: row.claimId,
          labels,
        });
        const resolvedRemoved = resolvePidList(row.removedKeys, labels);
        return isNoSitelinks
          ? [
              propLink,
              renderLangCode(row.lang, { hintOnly: true }),
              resolvedRemoved,
            ]
          : [propLink, resolvedRemoved];
      },
    };
  }

  /**
   * Detect references that contain a P854 (reference URL) value which is already
   * present in the statement itself — either as the mainsnak value (when the
   * statement property is a URL-datatype property) or as a URL-datatype qualifier.
   *
   * Two outcomes are possible for each matching reference:
   *   - If P854 is the only meaningful snak in the reference (i.e. after removing
   *     P854 no snaks remain, or only metadata-only snaks such as P813/P1476/P1810
   *     remain), emit ACTION_REMOVE_REFS to delete the entire reference.
   *   - Otherwise emit ACTION_REMOVE_REDUNDANT_REF_URL to strip only the P854 snak
   *     while keeping the rest of the reference intact.
   *
   * Only non-deprecated claims are considered.
   * References that are about to be split (isSplittableReference) are skipped.
   */
  function detectRemoveRedundantRefUrl(entity) {
    const diffs = [];
    // Snaks that are considered "metadata only" and do not count as real content
    // when deciding whether to drop the entire reference vs. just the P854 snak.
    const METADATA_PIDS = new Set([
      PID_RETRIEVED,
      PID_TITLE,
      PID_SUBJECT_NAMED_AS,
    ]);

    // Normalise a raw URL for comparison, matching the logic in detectDuplicateUrlClaims:
    // percent-decode, strip tracking/functional params (recognition mode), strip trailing slash,
    // and strip a leading "www." from the hostname so that e.g.
    // "https://www.example.com" and "https://example.com" are treated as equal.
    function normForCompare(raw) {
      if (typeof raw !== "string" || !raw) return null;
      try {
        const { url: cleaned } = cleanUrl(normalizeUrl(raw), {
          recognitionMode: true,
        });
        const withoutSlash = removeTrailingSlash(cleaned);
        // Strip www. from hostname for comparison only
        const u = new URL(withoutSlash);
        if (u.hostname.startsWith("www.")) {
          u.hostname = u.hostname.slice(4);
        }
        return removeTrailingSlash(u.href);
      } catch {
        return raw;
      }
    }

    for (const pid in entity.claims || {}) {
      for (const claim of entity.claims[pid]) {
        if (claim.rank === "deprecated") continue;

        // Collect normalised URL values that already appear in the statement:
        //   1. Mainsnak value when the statement property is a URL type.
        //   2. Any URL-datatype qualifier value.
        const statementUrls = new Set();

        if (claim.mainsnak?.datatype === "url") {
          const norm = normForCompare(claim.mainsnak?.datavalue?.value);
          if (norm) statementUrls.add(norm);
        }

        for (const qPid in claim.qualifiers || {}) {
          for (const snak of claim.qualifiers[qPid]) {
            if (snak.datatype === "url") {
              const norm = normForCompare(snak.datavalue?.value);
              if (norm) statementUrls.add(norm);
            }
          }
        }

        if (!statementUrls.size) continue;

        for (const ref of claim.references || []) {
          if (isSplittableReference(ref).splittable) continue;

          const p854Snaks = ref.snaks?.[PID_REFERENCE_URL] || [];
          // Find P854 snaks whose normalised value is already in the statement.
          const redundantP854 = p854Snaks.filter((s) => {
            const norm = normForCompare(s.datavalue?.value);
            return norm !== null && statementUrls.has(norm);
          });
          if (!redundantP854.length) continue;

          // Determine what remains in the reference after removing the redundant P854 snaks.
          const remainingSnakPids = Object.keys(ref.snaks || {}).filter((p) => {
            if (p !== PID_REFERENCE_URL) return true;
            // Keep P854 if there are non-redundant P854 snaks left.
            const kept = p854Snaks.filter((s) => !redundantP854.includes(s));
            return kept.length > 0;
          });

          const hasRealContent = remainingSnakPids.some(
            (p) => !METADATA_PIDS.has(p),
          );

          if (!hasRealContent) {
            // Remove the entire reference — nothing meaningful remains.
            diffs.push({
              action: ACTION_REMOVE_REFS,
              pid,
              claimId: claim.id,
              refHash: ref.hash,
              removedKeys: Object.keys(ref.snaks),
              // carry extra info for renderRow
              _redundantRefUrl: redundantP854[0]?.datavalue?.value,
            });
          } else {
            // Strip only the redundant P854 snak(s), keep the rest.
            for (const s of redundantP854) {
              diffs.push({
                action: ACTION_REMOVE_REDUNDANT_REF_URL,
                pid,
                claimId: claim.id,
                refHash: ref.hash,
                snakHash: s.hash,
                referenceUrl: s.datavalue?.value,
              });
            }
          }
        }
      }
    }
    return diffs;
  }

  /**
   * Detect URL-datatype claims whose value appears on more than one URL property
   * on the same item (e.g. P856 official website and P973 described at URL both
   * pointing to the same URL).
   *
   * The claim on the property with the *lower* PID number is treated as the
   * canonical one ("keep"); claims on higher-numbered properties are absorbed
   * into it using ACTION_ABSORB_DESCRIBED_BY_SOURCE (which migrates qualifiers
   * and references, then removes the source claim).
   *
   * Only non-deprecated URL-datatype claims are considered.
   * A URL value must appear on at least two different properties to trigger this.
   */
  function detectDuplicateUrlClaims(entity) {
    const diffs = [];

    // Collect all non-deprecated URL claims, grouped by normalised URL value.
    // urlMap: normalisedUrl -> [{pid, claim}]
    const urlMap = new Map();

    for (const [pid, claims] of Object.entries(entity.claims || {})) {
      for (const claim of claims) {
        if (claim.rank === "deprecated") continue;
        if (claim.mainsnak?.datatype !== "url") continue;
        const rawVal = claim.mainsnak?.datavalue?.value;
        if (typeof rawVal !== "string" || !rawVal) continue;

        // Normalise for comparison: percent-decode, strip known tracking params
        // (recognition mode also strips functional/UI params and trailing slash),
        // then strip any remaining trailing slash.
        // The original claim value is preserved unchanged.
        const { url: cleaned } = cleanUrl(normalizeUrl(rawVal), {
          recognitionMode: true,
        });
        const normVal = removeTrailingSlash(cleaned);

        if (!urlMap.has(normVal)) urlMap.set(normVal, []);
        urlMap.get(normVal).push({ pid, claim });
      }
    }

    for (const [, entries] of urlMap) {
      // Only act when the same URL appears on 2+ distinct properties.
      const byPid = new Map();
      for (const e of entries) {
        if (!byPid.has(e.pid)) byPid.set(e.pid, []);
        byPid.get(e.pid).push(e.claim);
      }
      if (byPid.size < 2) continue;

      // Sort PIDs numerically ascending; lowest PID = canonical target.
      const sortedPids = [...byPid.keys()].sort(
        (a, b) =>
          parseInt(a.replace("P", ""), 10) - parseInt(b.replace("P", ""), 10),
      );

      const keepPid = sortedPids[0];
      // Prefer a non-deprecated, non-empty claim on the keep PID.
      const keepClaim = byPid.get(keepPid)[0];

      for (const removePid of sortedPids.slice(1)) {
        for (const removeClaim of byPid.get(removePid)) {
          const hasQuals = !hasNoQualifiers(removeClaim);
          const refs = removeClaim.references || [];
          const isSimple = !hasQuals && refs.length === 0;

          if (isSimple) {
            diffs.push({
              action: ACTION_REMOVE_CLAIM,
              pid: removePid,
              claimId: removeClaim.id,
              value: removeClaim.mainsnak?.datavalue,
              keepClaimId: keepClaim.id,
              keepValue: keepClaim.mainsnak?.datavalue,
            });
          } else {
            diffs.push({
              action: ACTION_ABSORB_CLAIM,
              pid: removePid,
              claimId: removeClaim.id,
              value: removeClaim.mainsnak?.datavalue,
              extIdPid: keepPid,
              extIdClaimId: keepClaim.id,
              keepValue: keepClaim.mainsnak?.datavalue,
              qualifiers: JSON.parse(
                JSON.stringify(removeClaim.qualifiers || {}),
              ),
              qualifiersOrder: (removeClaim["qualifiers-order"] || []).slice(),
              refHashes: refs.map((r) => r.hash),
            });
          }
        }
      }
    }

    return diffs;
  }

  /**
   * Normalise a raw URL for blocklist matching.
   * Strips the scheme (https?://) and a leading "www." so that blocklist prefix
   * rules such as "about.me" match both "https://about.me/..." and
   * "https://www.about.me/...".
   */
  function normalizeUrlForBlocklist(rawUrl) {
    if (typeof rawUrl !== "string") return "";
    return rawUrl.replace(/^https?:\/\//i, "").replace(/^www\./i, "");
  }

  /**
   * Test a single (normalised) URL against one blocklist rule.
   * Returns true when the URL matches.
   */
  function urlMatchesBlocklistRule(normUrl, rule) {
    if (rule.matchType === "prefix") {
      return normUrl.startsWith(rule.pattern);
    }
    if (rule.matchType === "regex" && rule.compiledRegex) {
      return rule.compiledRegex.test(normUrl);
    }
    return false;
  }

  /**
   * Detect URL-datatype claims (mainsnak) whose value matches a rule in the
   * URL deprecation blocklist ([[User:Difool/URL-deprecation-blocklist]]).
   *
   * Two outcomes per matching claim:
   *   action "remove"    -> ACTION_REMOVE_CLAIM  (the "URLs to remove" section)
   *   action "deprecate" -> ACTION_DEPRECATE_URL_CLAIM  (all other sections)
   *
   * Only non-deprecated claims are considered; claims already at deprecated rank
   * are skipped regardless of action.
   *
   * The diff carries `sectionLabel` so the preview table can show which blocklist
   * section matched (e.g. "Self-created profile platforms").
   */
  function detectBlocklistedUrlClaims(entity) {
    const diffs = [];
    const rules = urlBlocklistCache.rules;
    if (!rules.length) return diffs;

    for (const [pid, claims] of Object.entries(entity.claims || {})) {
      for (const claim of claims) {
        if (claim.rank === "deprecated") continue;
        if (claim.mainsnak?.datatype !== "url") continue;

        const rawVal = claim.mainsnak?.datavalue?.value;
        if (typeof rawVal !== "string" || !rawVal) continue;

        const normUrl = normalizeUrlForBlocklist(rawVal);

        for (const rule of rules) {
          if (!urlMatchesBlocklistRule(normUrl, rule)) continue;

          if (rule.action === "remove") {
            diffs.push({
              action: ACTION_REMOVE_CLAIM,
              pid,
              claimId: claim.id,
              value: claim.mainsnak?.datavalue,
              sectionLabel: rule.sectionLabel,
            });
          } else {
            // "deprecate"
            diffs.push({
              action: ACTION_DEPRECATE_URL_CLAIM,
              pid,
              claimId: claim.id,
              value: claim.mainsnak?.datavalue,
              sectionLabel: rule.sectionLabel,
              deprecationReason: rule.deprecationReason || null,
            });
          }
          break; // first matching rule wins
        }
      }
    }
    return diffs;
  }

  /**
   * Detect human items where the en, de, and fr labels all exist, are
   * identical after normalisation, no mul label has been set yet, and the
   * shared value contains only Latin-script characters.
   *
   * Guards:
   *   - Item must have P31 (instance of) = Q5 (human).
   *   - labels.mul must be absent (undefined or empty string).
   *   - en, de, and fr labels must all be present.
   *   - After normaliseText() they must all be equal.
   *   - The resulting string must consist solely of Latin-script characters,
   *     digits, whitespace, punctuation and combining marks — no Cyrillic,
   *     Arabic, Hebrew, CJK, Devanagari, Greek, etc.
   *
   * Emits a single diff of action ACTION_SET_MUL_LABEL.
   */
  function detectAddMulLabel(entity) {
    const diffs = [];

    // Only for humans
    const isHuman = (entity.claims?.P31 || []).some(
      (c) => c.mainsnak?.datavalue?.value?.id === "Q5",
    );
    if (!isHuman) return diffs;

    // No mul label yet
    if (entity.labels?.mul?.value) return diffs;

    // en, de, fr must all be present
    const required = ["en", "de", "fr"];
    const rawValues = required.map((lang) => entity.labels?.[lang]?.value);
    if (rawValues.some((v) => !v)) return diffs;

    // Normalise and require all three to be equal
    const normalised = rawValues.map((v) => normalizeText(v));
    if (normalised.some((v) => v !== normalised[0])) return diffs;

    const labelValue = normalised[0];
    if (!labelValue) return diffs;

    // Must be Latin-script only (allow Latin chars, numbers, separators,
    // punctuation and combining marks — reject Cyrillic, Arabic, CJK, etc.)
    if (!/^[\p{Script=Latin}\p{N}\p{Z}\p{P}\p{M}]+$/u.test(labelValue))
      return diffs;

    diffs.push({
      action: ACTION_SET_MUL_LABEL,
      value: labelValue,
      matchingLangs: required.join(", "),
    });

    return diffs;
  }

  /**
   * Detect alias values that appear identically in more than 5 languages and
   * are not yet present as a mul alias.
   *
   * For each such value, propose:
   *   1. (visible)  ACTION_ADD_MUL_ALIAS — add the value to aliases.mul.
   *   2. (hidden)   ACTION_REMOVE_ALIAS  — remove it from each source language.
   *
   * The hidden diffs share the same rowId as the visible one, so unchecking a
   * row in the preview suppresses both the addition and all removals together.
   *
   * Guards:
   *   - "mul" language itself is never treated as a source language.
   *   - Values already present in aliases.mul (normalised) are skipped.
   *   - Values equal to the mul label (normalised) are skipped — they would
   *     just become alias = label in mul, which is redundant.
   *   - Threshold is strictly >5 (i.e. 6 or more languages).
   *   - Works for all item types (not restricted to humans).
   */
  function detectAddMulAlias(entity) {
    const diffs = [];

    // Build the set of values already in aliases.mul (normalised).
    const mulAliasNorms = new Set(
      (entity.aliases?.mul || []).map((a) => normalizeText(a.value)),
    );

    // Also skip values equal to the current mul label.
    const mulLabelNorm = entity.labels?.mul?.value
      ? normalizeText(entity.labels.mul.value)
      : null;

    // Collect alias values across all non-mul languages:
    //   normalisedValue -> { original: string, langs: Map<lang, originalValue> }
    // We keep lang->originalValue so the remove diff uses the exact stored string.
    const valueMap = new Map();

    for (const lang in entity.aliases || {}) {
      if (lang === "mul") continue;
      for (const alias of entity.aliases[lang]) {
        const norm = normalizeText(alias.value);
        if (!norm) continue;
        if (mulAliasNorms.has(norm)) continue;
        if (mulLabelNorm && norm === mulLabelNorm) continue;

        if (!valueMap.has(norm)) {
          valueMap.set(norm, { original: alias.value, langs: new Map() });
        }
        // Store the first seen original spelling; track all source languages.
        valueMap.get(norm).langs.set(lang, alias.value);
      }
    }

    for (const [norm, { original, langs }] of valueMap) {
      if (langs.size <= 5) continue; // threshold: strictly more than 5

      const rowId = `addMulAlias_${norm}`;
      const sourceLangs = [...langs.keys()];

      // Visible diff: add the value as a mul alias.
      diffs.push({
        action: ACTION_ADD_MUL_ALIAS,
        rowId,
        value: original,
        sourceLangs,
        langCount: langs.size,
      });

      // Hidden diffs: remove the value from each source language.
      for (const [lang, origValue] of langs) {
        diffs.push({
          action: ACTION_REMOVE_ALIAS,
          _hidden: true,
          rowId,
          lang,
          value: origValue,
        });
      }
    }

    return diffs;
  }

  /**
   * Detect labels in language A (≠ 'en') that are identical to the English label
   * and have been copied from English rather than being the native-language form.
   *
   * Evidence of a bad copy: the item has a {A}wiki sitelink whose title, after
   * stripping any trailing parenthetical disambiguation suffix, differs from the
   * current label in language A.
   *
   * Guards:
   *   - Language A must not be 'en'.
   *   - The English label must be present and non-empty.
   *   - The sitelink title, after stripping the parenthetical, must be non-empty
   *     and must differ from the current label in language A.
   *   - The stripped title must also differ from the English label — if the
   *     Wikipedia title is itself the same as the English label there is nothing
   *     to fix.
   *
   * Emits ACTION_NORMALIZE with field "label", handled by the existing apply step.
   */
  function detectFixCopiedLabels(entity) {
    const diffs = [];

    // Only run on humans unless the DEV flag is set, because Wikipedia article
    // titles are always capitalised and may contain comma-based geographic
    // qualifications (e.g. "Springfield, Illinois") that are hard to normalise
    // safely for non-human items.
    if (!DEV_FIX_COPIED_LABELS_ALL_TYPES) {
      const isHuman = (entity.claims?.P31 || []).some(
        (c) => c.mainsnak?.datavalue?.value?.id === "Q5",
      );
      if (!isHuman) return diffs;
    }

    const enLabel = entity.labels?.en?.value;
    if (!enLabel) return diffs;
    const enNorm = normalizeText(enLabel);
    if (!enNorm) return diffs;

    // The mul label being identical to the English label is the signal that
    // this item is in the "copied English everywhere" pattern, which makes
    // absent per-language labels eligible for filling from the Wikipedia title.
    const mulNorm = normalizeText(entity.labels?.mul?.value || "");
    const mulMatchesEn = mulNorm === enNorm;

    const sitelinks = entity.sitelinks || {};

    for (const lang in sitelinks) {
      // Only process {lang}wiki sitelinks.
      if (!lang.endsWith("wiki")) continue;
      const langCode = lang.slice(0, -4);
      if (langCode === "en") continue;

      const sitelink = sitelinks[lang];
      if (!sitelink?.title) continue;

      const current = entity.labels?.[langCode]?.value || "";
      const currentNorm = normalizeText(current);

      // Trigger condition:
      //   (a) label equals the English label (direct copy), or
      //   (b) label is absent/empty, mul label equals English label, and the
      //       language already has a description — meaning someone has worked
      //       on this language's data, so filling the label from Wikipedia is
      //       meaningful rather than noise.
      const isDirect = currentNorm === enNorm;
      const isEmpty = !currentNorm;
      const hasDescription = !!entity.descriptions?.[langCode]?.value;
      if (!isDirect && !(isEmpty && mulMatchesEn && hasDescription)) continue;

      // Strip trailing parenthetical disambiguation.
      const stripped = sitelink.title.replace(/\s*\([^()]*\)\s*$/, "").trim();
      if (!stripped) continue;

      // Normalize apostrophe variants to a plain apostrophe for comparison only.
      // This avoids false-positive diffs where the only difference is apostrophe style
      // (e.g. "Mika'ela" vs "Mika'ela").
      function normalizeApostrophes(s) {
        return s.replace(/[\u2018\u2019\u02BC\u0060\u00B4\u02B9]/g, "'");
      }

      const strippedNorm = normalizeApostrophes(normalizeText(stripped));

      // No change if the stripped title matches the current label (or both empty).
      if (strippedNorm === normalizeApostrophes(currentNorm)) continue;

      // Skip if the stripped title is itself the same as the English label.
      if (strippedNorm === normalizeApostrophes(enNorm)) continue;

      diffs.push({
        action: ACTION_NORMALIZE,
        field: "label",
        claimId: null,
        lang: langCode,
        before: current,
        after: stripped,
        sitelinkTitle: sitelink.title,
      });
    }

    return diffs;
  }

  // ==== 20 Detector registry ================================================

  function renderRemoveRow(row, labels) {
    const propLink = renderLink({ id: row.pid, claimId: row.claimId, labels });
    const removeLink = renderLink({
      claimId: row.claimId,
      datavalue: row.value,
      labels,
    });
    if (!row.keepClaimId && !row.keepValue) return [propLink, removeLink];
    const becauseOf = row.keepClaimId
      ? renderLink({
          claimId: row.keepClaimId,
          datavalue: row.keepValue,
          labels,
        })
      : renderValue(row.keepValue, labels);
    return [propLink, removeLink, becauseOf];
  }

  function renderDuplicateValueRow(row, labels, entity) {
    const propLink = renderLink({
      id: row.pid,
      claimId: row.fromClaimId,
      labels,
    });
    const from = findClaimById(entity, row.fromClaimId);
    const to = findClaimById(entity, row.toClaimId);
    return [
      propLink,
      renderLink({
        claimId: row.fromClaimId,
        datavalue: from?.mainsnak?.datavalue,
      }),
      renderLink({
        claimId: row.toClaimId,
        datavalue: to?.mainsnak?.datavalue,
      }),
    ];
  }

  const detectors = {
    ...(DEV_WIKIMEDIA_NO_SITELINKS
      ? {
          wikimedia_no_sitelinks: createReferenceDetector(
            "wikimedia_no_sitelinks",
          ),
        }
      : {}),
    wikimedia: createReferenceDetector("wikimedia"),
    aggregator: createReferenceDetector("aggregator"),
    community: createReferenceDetector("community"),
    redundant: createReferenceDetector("redundant"),
    inferred: createReferenceDetector("inferred"),
    obsolete: createReferenceDetector("obsolete"),
    invalid: createReferenceDetector("invalid"),
    self_stated_in: createReferenceDetector("self_stated_in"),

    removeObsoleteSnaks: {
      label: "partial_obsolete",
      headers: ["property", "removedValues"],
      isRemoveRefCategory: false,
      summaryLabel: "remove obsolete snaks from references",
      detect: detectObsoleteSnaksInReferences,
      renderRow(row, labels) {
        return [
          renderLink({ id: row.pid, claimId: row.claimId, labels }),
          resolvePidList(row.removedKeys, labels),
        ];
      },
    },

    normalizeLabels: {
      label: "normalizeLabels",
      headers: ["field", "lang", "original", "normalized"],
      isRemoveRefCategory: false,
      summaryLabel: "normalize text",
      detect: detectNormalizeLabels,
      renderRow: (row) => [
        row.field,
        renderLangCode(row.lang, { hintOnly: true }),
        visualizeInvisibleChars(row.before),
        visualizeInvisibleChars(row.after),
      ],
    },
    removeAliasEqualsLabel: {
      label: "removeAliasEqualsLabel",
      headers: ["lang", "aliasLabel", "context"], // add a "context" column
      summaryLabel: "remove alias=label/mul",
      isRemoveRefCategory: false,
      detect: detectRemoveDuplicateAliases,
      renderRow: (row) => [
        renderLangCode(row.lang),
        visualizeInvisibleChars(row.value),
        row.reason === "alias_equals_mul_label"
          ? "= mul label"
          : row.reason === "alias_equals_mul_alias"
            ? "= mul alias"
            : row.reason === "duplicate"
              ? "duplicate"
              : "= label", // alias_equals_label
      ],
    },
    removeLowPrecisionDates: {
      label: "removeLowPrecisionDates",
      headers: ["property", "removed", "becauseOf"],
      isRemoveRefCategory: false,
      summaryLabel: "remove redundant dates",
      detect: detectLowPrecisionDates,
      renderRow: renderRemoveRow,
    },
    removeRedundantPreferred: {
      label: "removeRedundantPreferred",
      headers: ["property", "p7452"],
      isRemoveRefCategory: false,
      summaryLabel: "downgrade preferred ranks",
      detect: detectRedundantPreferred,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        row.removedQualifier ? getMsg("yes") : getMsg("no"),
      ],
    },
    removeExpiredPreferred: {
      label: "removeExpiredPreferred",
      headers: ["property", "p7452"],
      isRemoveRefCategory: false,
      summaryLabel: "downgrade expired preferred ranks",
      detect: detectExpiredPreferred,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        row.removedQualifier ? getMsg("yes") : getMsg("no"),
      ],
    },
    removeEmptyEndTime: {
      label: "removeEmptyEndTime",
      headers: ["property", "value"],
      isRemoveRefCategory: false,
      summaryLabel: "remove empty end time qualifiers",
      detect: detectEmptyEndTime,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        row.qualifierValue ? JSON.stringify(row.qualifierValue) : "(no value)",
      ],
    },

    redundantCitizenshipDates: {
      label: "redundantCitizenshipDates",
      headers: ["property", "value", "becauseOf"],
      isRemoveRefCategory: false,
      summaryLabel: "remove redundant citizenship start/end dates",
      detect: detectRedundantCitizenshipDates,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        renderValue(row.qualifierValue, labels),
        renderEntity(row.matchedPid, labels),
      ],
    },

    mergeSameDateClaims: {
      label: "mergeSameDateClaims",
      headers: ["property", "value", "mergedInto"],
      summaryLabel: "merge same-date claims",
      isRemoveRefCategory: false,
      detect: detectMergeSameDateClaims,
      renderRow: renderDuplicateValueRow,
    },
    replaceWrongProperty: {
      label: "replaceWrongProperty",
      headers: ["property", "context", "oldProperty", "newProperty"],
      summaryLabel: "replace property",
      isRemoveRefCategory: false,
      detect: detectWrongPropertyClaims,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        getMsg(row.context),
        renderEntity(row.oldProperty, labels),
        renderEntity(row.newProperty, labels),
      ],
    },
    moveRetrievedFromExternalId: {
      label: "moveRetrievedFromExternalId",
      headers: ["property", "value"],
      isRemoveRefCategory: false,
      summaryLabel: "move P813 from external-id to reference",
      detect: detectMoveRetrievedFromExternalId,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        renderValue(row.qualifierValue),
      ],
    },
    duplicateValues: {
      label: "duplicateValues",
      headers: ["property", "value", "mergedInto"],
      summaryLabel: "merge duplicate values",
      isRemoveRefCategory: false,
      detect: detectDuplicateValues,
      renderRow: renderDuplicateValueRow,
    },
    dupRetrieved: {
      label: "dupRetrieved",
      headers: ["property", "removedValues"],
      isRemoveRefCategory: true,
      summaryLabel: "duplicate",
      detect: detectDuplicateRefs,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        resolvePidList(row.removedKeys, labels),
      ],
    },
    removeRedundantOccupation: {
      label: "removeRedundantOccupation",
      headers: ["property", "removed", "becauseOf"],
      isRemoveRefCategory: false,
      summaryLabel: "remove redundant occupations",
      detect: detectRedundantOccupation,
      renderRow: renderRemoveRow,
    },
    removeJulianGregorianDates: {
      label: "removeJulianGregorianDates",
      headers: ["property", "removed", "becauseOf"],
      isRemoveRefCategory: false,
      summaryLabel: "remove Julian/Gregorian duplicate dates",
      detect: detectJulianGregorianDuplicateDates,
      renderRow: renderRemoveRow,
    },
    upgradePreciseDate: {
      label: "upgradePreciseDate",
      headers: ["property", "value", "becauseOf"],
      isRemoveRefCategory: false,
      summaryLabel: "upgrade precise date to preferred rank",
      detect: detectUpgradePreciseDate,
      renderRow: (row, labels) => {
        // Only the ACTION_UPGRADE_PRECISE_DATE half is shown in the table;
        // the companion ACTION_DOWNGRADE_PREFERRED diff shares the same rowId
        // group and is applied automatically.
        const propLink = renderLink({
          id: row.pid,
          claimId: row.claimId,
          labels,
        });
        const preciseLink = renderLink({
          claimId: row.claimId,
          datavalue: row.value,
          labels,
        });
        const lessLink = row.deprClaimId
          ? renderLink({
              claimId: row.deprClaimId,
              datavalue: row.deprValue,
              labels,
            })
          : "";
        return [propLink, preciseLink, lessLink];
      },
    },
    convertWikipediaStatedIn: {
      label: "convertWikipediaStatedIn",
      headers: ["property", "context", "oldProperty", "newProperty"],
      summaryLabel: "replace property",
      isRemoveRefCategory: false,
      detect: detectConvertWikipediaStatedIn,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        getMsg(row.context),
        renderEntity(row.oldProperty, labels),
        renderEntity(row.newProperty, labels),
      ],
    },
    convertInvalidStatedInReference: {
      label: "convertInvalidStatedInReference",
      headers: ["property", "externalIdProperty", "oldValue", "newValue"],
      summaryLabel: "fix invalid 'stated in' in reference",
      isRemoveRefCategory: false,
      requiresLargeBuffers: true,
      detect: detectInvalidStatedInReference,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        renderPidLink(
          row.externalIdPid,
          renderEntity(row.externalIdPid, labels),
        ),
        renderQidLink(row.oldValue, renderEntity(row.oldValue, labels)),
        renderQidLink(row.newValue, renderEntity(row.newValue, labels)),
      ],
    },
    mismatchedWikimediaImport: {
      label: "mismatchedWikimediaImport",
      headers: ["property", "referenceUrl", "oldValue", "newValue"],
      summaryLabel: "fix mismatched P143 vs P4656",
      isRemoveRefCategory: false,
      detect: detectMismatchedWikimediaImport,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        formatUrlForDisplay(row.importUrl || ""),
        renderQidLink(row.oldValue, renderEntity(row.oldValue, labels)),
        renderQidLink(row.newValue, renderEntity(row.newValue, labels)),
      ],
    },
    removeIdDescriptions: {
      label: "removeIdDescriptions",
      headers: ["description", "idPresent"],
      summaryLabel: "remove ID-style descriptions",
      isRemoveRefCategory: false,
      detect: detectIdDescriptions,
      renderRow: (row) => [
        row.before,
        row.idPresent ? getMsg("yes") : getMsg("no"),
      ],
    },
    addExternalIdToReference: {
      label: ACTION_ADD_EXTERNAL_ID_TO_REFERENCE,
      headers: [
        "property",
        "suggestedProperty",
        "extractedId",
        "keepUrl",
        "referenceUrl",
      ],
      summaryLabel: "add external ID to reference",
      isRemoveRefCategory: false,
      detect: detectAddExternalIdToReference,
      requiresLargeBuffers: true,
      requiresHeavyComputing: true,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        renderEntity(row.suggestedProperty, labels),
        row.extractedId,
        row.keepUrl ? getMsg("yes") : getMsg("no"),
        formatUrlForDisplay(row.referenceUrl),
      ],
    },
    splitMultipleReferenceUrls: {
      label: "splitMultipleReferenceUrls",
      headers: ["property", "count"],
      isRemoveRefCategory: false,
      summaryLabel: "split multiple reference URLs",
      detect: detectMultipleReferenceUrls,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        String(row.urlCount),
      ],
    },
    cleanUrls: {
      label: "cleanUrls",
      headers: ["property", "original", "normalized"],
      isRemoveRefCategory: false,
      summaryLabel: "clean URLs",
      detect: detectCleanUrls,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        formatUrlForDisplay(row.before),
        formatUrlForDisplay(row.after),
      ],
    },
    removeSelfCite: {
      label: "removeSelfCite",
      headers: ["property", "removed"],
      isRemoveRefCategory: false,
      summaryLabel: "remove self-citation",
      detect: detectSelfCite,
      renderRow: renderRemoveRow,
    },
    absorbDescribedBySource: {
      label: "absorbDescribedBySource",
      headers: ["property", "removed", "externalIdClaim"],
      isRemoveRefCategory: false,
      requiresLargeBuffers: true,
      summaryLabel: "remove redundant described-by-source",
      detect: detectAbsorbDescribedBySource,
      renderRow: (row, labels) => {
        const propLink = renderLink({
          id: row.pid,
          claimId: row.claimId,
          labels,
        });
        const removeLink = renderLink({
          claimId: row.claimId,
          datavalue: row.value,
          labels,
        });
        const extIdLink = row.extIdClaimId
          ? renderLink({ id: row.extIdPid, claimId: row.extIdClaimId, labels })
          : row.keepClaimId
            ? renderLink({
                claimId: row.keepClaimId,
                datavalue: row.keepValue,
                labels,
              })
            : renderValue(row.keepValue, labels);
        return [propLink, removeLink, extIdLink];
      },
    },
    absorbUrlClaim: {
      label: "absorbUrlClaim",
      headers: ["urlClaim", "removed", "externalIdClaim"],
      isRemoveRefCategory: false,
      requiresLargeBuffers: true,
      summaryLabel: "remove redundant URL claim",
      detect: detectAbsorbUrlClaim,
      renderRow: (row, labels) => {
        const propLink = renderLink({
          id: row.pid,
          claimId: row.claimId,
          labels,
        });
        const removeLink = renderLink({
          claimId: row.claimId,
          datavalue: row.value,
          labels,
          isUrl: true,
        });
        const extIdLink = row.extIdClaimId
          ? renderLink({ id: row.extIdPid, claimId: row.extIdClaimId, labels })
          : row.keepClaimId
            ? renderLink({
                claimId: row.keepClaimId,
                datavalue: row.keepValue,
                labels,
              })
            : renderValue(row.keepValue, labels);
        return [propLink, removeLink, extIdLink];
      },
    },
    convertUrlToExtId: {
      label: "convertUrlToExtId",
      headers: ["urlClaim", "removed", "newProperty", "extractedId"],
      isRemoveRefCategory: false,
      requiresLargeBuffers: true,
      summaryLabel: "convert URL claim to external ID",
      detect: () => [], // driven by detectAbsorbUrlClaim via DEV_ABSORB_URL_CLAIM_CREATE_MISSING
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        renderLink({
          claimId: row.claimId,
          datavalue: row.value,
          labels,
          isUrl: true,
        }),
        renderEntity(row.extIdPid, labels),
        row.extractedId,
      ],
    },
    duplicateUrlClaims: {
      label: "duplicateUrlClaims",
      headers: ["property", "removed", "becauseOf"],
      isRemoveRefCategory: false,
      summaryLabel: "merge duplicate URL claims",
      detect: detectDuplicateUrlClaims,
      renderRow: (row, labels) => {
        const propLink = renderLink({
          id: row.pid,
          claimId: row.claimId,
          labels,
        });
        const removeLink = renderLink({
          claimId: row.claimId,
          datavalue: row.value,
          labels,
          isUrl: true,
        });
        const keepLink = row.extIdClaimId
          ? renderLink({ id: row.extIdPid, claimId: row.extIdClaimId, labels })
          : row.keepClaimId
            ? renderLink({
                claimId: row.keepClaimId,
                datavalue: row.keepValue,
                labels,
              })
            : renderValue(row.keepValue, labels);
        return [propLink, removeLink, keepLink];
      },
    },
    removeRedundantRefUrl: {
      label: "removeRedundantRefUrl",
      headers: ["property", "referenceUrl"],
      isRemoveRefCategory: false,
      summaryLabel: "remove redundant reference URL (P854)",
      detect: detectRemoveRedundantRefUrl,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        formatUrlForDisplay(row.referenceUrl || row._redundantRefUrl || ""),
      ],
    },

    blocklistedUrlClaims: {
      label: "blocklistedUrlClaims",
      headers: ["property", "value", "blocklistAction", "blocklistReason"],
      isRemoveRefCategory: false,
      summaryLabel: "deprecate/remove blocklisted URL claims",
      detect: detectBlocklistedUrlClaims,
      renderRow: (row, labels) => [
        renderLink({ id: row.pid, claimId: row.claimId, labels }),
        renderLink({
          claimId: row.claimId,
          datavalue: row.value,
          labels,
          isUrl: true,
        }),
        row.action === ACTION_REMOVE_CLAIM ? "remove" : "deprecate",
        row.sectionLabel || "",
      ],
    },

    addMulLabel: {
      label: "addMulLabel",
      headers: ["mulLabelValue", "mulLabelLanguages"],
      isRemoveRefCategory: false,
      summaryLabel: "add mul label",
      detect: detectAddMulLabel,
      renderRow: (row) => [row.value, row.matchingLangs],
    },
    addMulAlias: {
      label: "addMulAlias",
      headers: ["mulLabelValue", "mulAliasLangCount"],
      isRemoveRefCategory: false,
      summaryLabel: "add mul alias",
      detect: detectAddMulAlias,
      renderRow: (row) => [
        row.value,
        `${row.langCount} (${row.sourceLangs.slice(0, 8).join(", ")}${row.langCount > 8 ? ", …" : ""})`,
      ],
    },

    fixCopiedLabel: {
      label: "fixCopiedLabel",
      headers: [
        "fixCopiedLabelLang",
        "fixCopiedLabelBefore",
        "fixCopiedLabelAfter",
      ],
      isRemoveRefCategory: false,
      summaryLabel: "fix copied labels",
      detect: detectFixCopiedLabels,
      renderRow: (row) => [
        renderLangCode(row.lang),
        row.before,
        row.sitelinkTitle
          ? $("<a>")
              .attr(
                "href",
                `https://${row.lang}.wikipedia.org/wiki/${encodeURIComponent(row.sitelinkTitle.replace(/ /g, "_"))}`,
              )
              .attr("target", "_blank")
              .text(row.after)
          : row.after,
      ],
    },
  };
  // ==== 21 UI helpers =======================================================

  function collectPropertyLabels() {
    const map = {};

    document
      .querySelectorAll(".wikibase-statementgroupview")
      .forEach((group) => {
        const pid = group.getAttribute("data-property-id");
        const labelEl = group.querySelector(
          ".wikibase-statementgroupview-property-label a",
        );
        if (pid && labelEl) map[pid] = labelEl.textContent.trim();
      });

    document.querySelectorAll(".wikibase-snakview-property a").forEach((a) => {
      const title = a.getAttribute("title");
      if (title?.startsWith("Property:")) {
        const pid = title.replace("Property:", "");
        if (!map[pid]) map[pid] = a.textContent.trim();
      }
    });

    document
      .querySelectorAll(
        ".wikibase-snakview-value a, .wikibase-statementview-value a",
      )
      .forEach((a) => {
        let qid = null;
        const title = a.getAttribute("title");
        if (/^Q\d+$/.test(title)) {
          qid = title;
        } else {
          const m = (a.getAttribute("href") || "").match(/\/wiki\/(Q\d+)/);
          if (m) qid = m[1];
        }
        if (qid && !map[qid]) {
          const text = a.textContent.trim();
          if (text) map[qid] = text;
        }
      });

    return map;
  }

  function resolvePidList(pidList, labels) {
    return (pidList || [])
      .map((pid) => (labels[pid] ? `${labels[pid]} (${pid})` : pid))
      .join(", ");
  }

  /**
   * Render a language code as a jQuery element.
   * Default: <code>langCode</code> Name
   * With hintOnly=true: <code title="Name">langCode</code>  (name as tooltip)
   * Falls back gracefully when no name is in wikipediaLangNamesCache.
   */
  function renderLangCode(langCode, { hintOnly = false } = {}) {
    const name = wikipediaLangNamesCache.get(langCode);
    if (hintOnly) {
      const span = $("<span>");
      span.append($("<code>").text(langCode));
      if (name) span.attr("title", name);
      return span;
    }
    const span = $("<span>");
    span.append($("<code>").text(langCode));
    if (name) span.append(document.createTextNode(" " + name));
    return span;
  }

  function renderEntity(id, labels = {}) {
    return labels[id] ? `${labels[id]} (${id})` : id;
  }

  function renderValue(dataValue, labels = {}) {
    if (!dataValue) return "";
    const { type, value } = dataValue;
    if (value?.time) {
      const dt = value.time.replace("+", "").replace("T00:00:00Z", "");
      const calendar =
        {
          [URL_PROLEPTIC_GREGORIAN_CALENDAR]: "Gregorian",
          [URL_PROLEPTIC_JULIAN_CALENDAR]: "Julian",
        }[value.calendarmodel] || "Unknown";
      const prec = precisionLabels[value.precision] || value.precision;
      return `${dt} [${calendar}, ${prec}]`;
    }
    if (type === "wikibase-entityid") return renderEntity(value?.id, labels);
    if (type === "string") return value;
    if (type === "globecoordinate")
      return `(${value.latitude}, ${value.longitude})`;
    if (typeof value === "string") return value;
    return JSON.stringify(dataValue);
  }

  function renderLink({ id, claimId, labels = {}, datavalue, isUrl = false }) {
    const rawText = id
      ? renderEntity(id, labels)
      : renderValue(datavalue, labels);
    const text = isUrl ? formatUrlForDisplay(rawText) : rawText;
    return $("<a>").attr("href", `#${claimId}`).text(text);
  }

  function renderPidLink(pid, text) {
    return $("<a>")
      .attr("href", `/wiki/Property:${pid}`)
      .text(text || pid);
  }

  function renderQidLink(qid, text) {
    return $("<a>")
      .attr("href", `/wiki/${qid}`)
      .text(text || qid);
  }

  /** Append a cell value to a <td>. Handles jQuery, DOM elements, and plain text. */
  function renderCell(cell) {
    const td = $("<td>");
    if (cell instanceof jQuery || cell instanceof HTMLElement) {
      td.append(cell);
    } else {
      td.text(String(cell ?? ""));
    }
    return td;
  }

  // ==== 22 UI rendering =====================================================

  function renderCleanupUI(entity, previewRows, applyCleanupChanges) {
    const box = $(`
      <div id="WikidataCleanup_box" style="margin-top:10px; border:1px solid #ccc; padding:10px;">
        <h3>${getMsg("cleanupTitle")}</h3>
        <p>${getMsg("cleanupIntro")}</p>
        <div id="WikidataCleanup_preview"></div>
      </div>
    `);

    const checkboxStates = {};
    const rowStates = {};
    const rowWidgets = {};
    const preview = box.find("#WikidataCleanup_preview");

    const runBtn = new OO.ui.ButtonWidget({
      label: getMsg("runButton"),
      flags: ["progressive", "primary"],
    });
    runBtn.on("click", () => applyCleanupChanges(checkboxStates, rowStates));

    const settingsBtn = new OO.ui.ButtonWidget({
      label: getMsg("settingsButton"),
    });
    settingsBtn.on("click", () =>
      showSettingsDialog(cache_loadSettings(), cache_saveSettings),
    );

    const controls = $("<div>").css({
      marginTop: "10px",
      display: "flex",
      justifyContent: "space-between",
    });
    controls.append(runBtn.$element, settingsBtn.$element);
    box.append(controls);

    if (!previewRows.length) {
      preview.html(
        `<div class="mw-message-box mw-message-box-notice"><strong>${getMsg("noCleanups")}</strong></div>`,
      );
    } else {
      const labels = collectPropertyLabels();
      const grouped = {};
      for (const row of previewRows) {
        (grouped[row.type] ||= []).push(row);
      }

      for (const cat of Object.keys(grouped)) {
        const def = detectors[cat];
        if (!def) continue;

        let rows = grouped[cat];
        if (def.headers.includes("property")) {
          rows = rows
            .slice()
            .sort(
              (a, b) =>
                parseInt(a.pid.replace("P", ""), 10) -
                parseInt(b.pid.replace("P", ""), 10),
            );
        }

        const master = new OO.ui.CheckboxInputWidget({ selected: true });
        checkboxStates[cat] = true;
        const masterInput = master.$element.find("input")[0];

        const labelWidget = new OO.ui.LabelWidget({ label: getMsg(def.label) });
        const row = $("<div>").css({ margin: "6px 0" });
        row.append(master.$element, labelWidget.$element);
        preview.append(row);

        const thead = $("<thead><tr></tr></thead>");
        ["", ...def.headers.map((h) => getMsg(h))].forEach((h) =>
          thead.find("tr").append($("<th>").text(h)),
        );

        const colgroup = $("<colgroup>").append('<col style="width:2.2em">');
        for (let i = 0; i < def.headers.length; i++) colgroup.append("<col>");

        const table = $(
          '<table class="wikitable" style="width:100%; table-layout:fixed; margin-bottom:10px;">',
        ).append(colgroup, thead, "<tbody></tbody>");
        const tbody = table.find("tbody");

        rows.forEach((rowObj, idx) => {
          const rowId = rowObj.rowId || `${cat}_${idx}`;
          rowStates[rowId] = true;
          const rowCb = new OO.ui.CheckboxInputWidget({ selected: true });
          rowWidgets[rowId] = rowCb;

          rowCb.on("change", (val) => {
            rowStates[rowId] = val;
            updateMasterCheckbox();
          });

          const tr = $("<tr>").append($("<td>").append(rowCb.$element));
          def
            .renderRow(rowObj, labels, entity)
            .forEach((cell) => tr.append(renderCell(cell)));
          tbody.append(tr);
        });

        preview.append(table);

        $(masterInput).on("click", () => {
          const checked = masterInput.checked;
          checkboxStates[cat] = checked;
          grouped[cat].forEach((rowObj) => {
            rowStates[rowObj.rowId] = checked;
            rowWidgets[rowObj.rowId].setSelected(checked, true);
          });
          masterInput.indeterminate = false;
        });

        function updateMasterCheckbox() {
          const checkedCount = grouped[cat].filter(
            (r) => rowStates[r.rowId],
          ).length;
          if (checkedCount === 0) {
            masterInput.checked = false;
            masterInput.indeterminate = false;
            checkboxStates[cat] = false;
          } else if (checkedCount === grouped[cat].length) {
            masterInput.checked = true;
            masterInput.indeterminate = false;
            checkboxStates[cat] = true;
          } else {
            masterInput.checked = false;
            masterInput.indeterminate = true;
            checkboxStates[cat] = true;
          }
        }
      }
    }

    $(".wikibase-entitytermsview-entitytermsforlanguagelistview").append(box);
  }

  // ==== 23 Diff application =================================================

  function replacePropertyInReference(ref, oldProp, newProp, snakHash) {
    if (!ref.snaks?.[oldProp]) return;
    const oldIdx = ref["snaks-order"].indexOf(oldProp);
    const newIdx = ref["snaks-order"].indexOf(newProp);

    const [toMove, keepOld] = ref.snaks[oldProp].reduce(
      ([mv, kp], snak) => {
        if (snak.hash === snakHash) {
          snak.property = newProp;
          delete snak.hash;
          return [[...mv, snak], kp];
        }
        return [mv, [...kp, snak]];
      },
      [[], []],
    );

    if (!toMove.length) return;

    ref.snaks[newProp] = ref.snaks[newProp] || [];
    if (oldIdx !== -1 && newIdx !== -1 && oldIdx < newIdx) {
      ref.snaks[newProp] = toMove.concat(ref.snaks[newProp]);
    } else {
      ref.snaks[newProp] = ref.snaks[newProp].concat(toMove);
    }

    if (oldIdx !== -1) ref["snaks-order"][oldIdx] = newProp;
    ref["snaks-order"] = [...new Set(ref["snaks-order"])];

    if (keepOld.length) ref.snaks[oldProp] = keepOld;
    else delete ref.snaks[oldProp];
  }

  /**
   * Applies a list of cleanup diffs to deep-cloned copies of the entity's claims
   * and returns a wbeditentity-ready update object containing the modified claims,
   * labels, descriptions, and aliases.
   */
  function mergeCleanupDiffs(entity, diffs) {
    const updates = { claims: [] };
    const claimMap = new Map();

    function getOrCloneClaim(pid, claimId) {
      let merged = claimMap.get(claimId);
      if (!merged) {
        const orig = (entity.claims[pid] || []).find((c) => c.id === claimId);
        if (!orig) return null;
        merged = JSON.parse(JSON.stringify(orig));
        claimMap.set(claimId, merged);
      }
      return merged._remove ? null : merged;
    }

    for (const diff of diffs) {
      switch (diff.action) {
        case ACTION_REMOVE_REFS: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          const idx = claim.references.findLastIndex(
            (r) => r.hash === diff.refHash,
          );
          if (idx !== -1) claim.references.splice(idx, 1);
          break;
        }

        case ACTION_REMOVE_CLAIM: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (claim) claim._remove = true;
          break;
        }

        case ACTION_SPLIT_REFERENCE_URLS: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          const idx = claim.references.findLastIndex(
            (r) => r.hash === diff.refHash,
          );
          if (idx === -1) break;

          const before = claim.references.slice(0, idx);
          const after = claim.references.slice(idx + 1);
          const refToSplit = claim.references[idx];

          // Collect splittable snaks (excluding P813 and P2960)
          const allEntries = [];
          for (const pid in refToSplit.snaks || {}) {
            if (pid === PID_RETRIEVED || pid === PID_ARCHIVE_DATE) continue;
            for (const snak of refToSplit.snaks[pid] || [])
              allEntries.push({ pid, snak });
          }
          if (allEntries.length <= 1) break;

          const retrievedSnaks = refToSplit.snaks[PID_RETRIEVED] || [];
          const latestRetrieved = retrievedSnaks.length
            ? retrievedSnaks.reduce((max, cur) =>
                (cur.datavalue?.value?.time || "") >
                (max.datavalue?.value?.time || "")
                  ? cur
                  : max,
              )
            : null;

          const ar_urls = refToSplit.snaks[PID_ARCHIVE_URL] || [];
          const ar_dates = refToSplit.snaks[PID_ARCHIVE_DATE] || [];
          if (ar_urls.length > 1 || ar_dates.length > 1) break;

          // Identify and extract archive entries
          let archiveEntry = null;
          const archiveIdx = allEntries.findIndex((e) => {
            if (e.pid === PID_ARCHIVE_URL) return true;
            const raw = e.snak.datavalue?.value;
            const v = typeof raw === "string" ? raw : raw?.id || "";
            try {
              return isArchiveUrl(v);
            } catch {
              return false;
            }
          });
          if (archiveIdx !== -1) {
            // Check there is only one archive entry
            const archiveCount = allEntries.filter((e, i) => {
              if (e.pid === PID_ARCHIVE_URL) return true;
              const raw = e.snak.datavalue?.value;
              const v = typeof raw === "string" ? raw : raw?.id || "";
              try {
                return isArchiveUrl(v);
              } catch {
                return false;
              }
            }).length;
            if (archiveCount > 1) break;
            archiveEntry = allEntries.splice(archiveIdx, 1)[0];
          }

          const newRefs = allEntries
            .map((entry) => {
              const rawVal = entry.snak.datavalue?.value || "";
              const val =
                typeof rawVal === "string" ? rawVal : rawVal?.id || "";
              let mappedPid = entry.pid;
              if (
                [
                  PID_REFERENCE_URL,
                  PID_WIKIMEDIA_IMPORT_URL,
                  PID_ARCHIVE_URL,
                ].includes(entry.pid)
              ) {
                if (isArchiveUrl(val)) mappedPid = PID_ARCHIVE_URL;
                else if (isWikimediaImportUrl(val))
                  mappedPid = PID_WIKIMEDIA_IMPORT_URL;
                else mappedPid = PID_REFERENCE_URL;
              }
              if (mappedPid === PID_ARCHIVE_URL && archiveEntry) return null; // handled below

              const snaks = { [mappedPid]: [entry.snak] };
              const snaksOrder = [mappedPid];
              if (latestRetrieved && mappedPid !== PID_ARCHIVE_URL) {
                snaks[PID_RETRIEVED] = [latestRetrieved];
                snaksOrder.push(PID_RETRIEVED);
              }
              return { snaks, "snaks-order": snaksOrder };
            })
            .filter(Boolean);

          // Build archive ref last
          let arRef = null;
          if (archiveEntry) {
            const arSnaks = { [PID_ARCHIVE_URL]: [archiveEntry.snak] };
            const arSnaksOrder = [PID_ARCHIVE_URL];
            if (ar_dates.length) {
              arSnaks[PID_ARCHIVE_DATE] = [ar_dates[0]];
              arSnaksOrder.push(PID_ARCHIVE_DATE);
            }
            arRef = { snaks: arSnaks, "snaks-order": arSnaksOrder };
          } else if (ar_urls.length === 1) {
            const arSnaks = { [PID_ARCHIVE_URL]: [ar_urls[0]] };
            const arSnaksOrder = [PID_ARCHIVE_URL];
            if (ar_dates.length) {
              arSnaks[PID_ARCHIVE_DATE] = [ar_dates[0]];
              arSnaksOrder.push(PID_ARCHIVE_DATE);
            }
            arRef = { snaks: arSnaks, "snaks-order": arSnaksOrder };
          }

          if (arRef) newRefs.push(arRef);
          claim.references = [...before, ...newRefs, ...after];
          break;
        }

        case ACTION_MERGE_CLAIM: {
          const from = getOrCloneClaim(diff.pid, diff.fromClaimId);
          const to = getOrCloneClaim(diff.pid, diff.toClaimId);
          if (!from || !to) break;

          // Merge references
          const toRefHashes = new Set((to.references || []).map((r) => r.hash));
          to.references = (to.references || []).concat(
            (from.references || []).filter((r) => !toRefHashes.has(r.hash)),
          );

          // Merge qualifiers
          for (const pid in from.qualifiers || {}) {
            const toHashes = new Set(
              (to.qualifiers?.[pid] || []).map((s) => s.hash),
            );
            const newSnaks = (from.qualifiers[pid] || []).filter(
              (s) => !toHashes.has(s.hash),
            );
            if (newSnaks.length) {
              to.qualifiers = to.qualifiers || {};
              to.qualifiers[pid] = (to.qualifiers[pid] || []).concat(newSnaks);
            }
          }

          from._remove = true;
          break;
        }

        case ACTION_DOWNGRADE_PREFERRED: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          claim.rank = "normal";
          // When called from detectUpgradePreciseDate the source claim is
          // deprecated (fromDeprecated:true) and carries P2241 instead of P7452.
          const qualToRemove = diff.fromDeprecated
            ? PID_REASON_FOR_DEPRECATED_RANK
            : PID_REASON_FOR_PREFERRED_RANK;
          if (diff.removedQualifier && claim.qualifiers?.[qualToRemove]) {
            delete claim.qualifiers[qualToRemove];
            if (claim["qualifiers-order"]) {
              claim["qualifiers-order"] = claim["qualifiers-order"].filter(
                (p) => p !== qualToRemove,
              );
            }
          }
          break;
        }

        case ACTION_UPGRADE_PRECISE_DATE: {
          // Set the normal-rank precise date claim to preferred rank and add
          // P7452 = Q71536040 (most precise value) as a qualifier.
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          claim.rank = "preferred";
          claim.qualifiers = claim.qualifiers || {};
          claim["qualifiers-order"] = claim["qualifiers-order"] || [];
          // Add P7452 = Q71536040 only when not already present
          if (!claim.qualifiers[PID_REASON_FOR_PREFERRED_RANK]) {
            claim.qualifiers[PID_REASON_FOR_PREFERRED_RANK] = [
              {
                snaktype: "value",
                property: PID_REASON_FOR_PREFERRED_RANK,
                datavalue: {
                  value: {
                    "entity-type": "item",
                    "numeric-id": parseInt(
                      QID_MOST_PRECISE.replace("Q", ""),
                      10,
                    ),
                    id: QID_MOST_PRECISE,
                  },
                  type: "wikibase-entityid",
                },
              },
            ];
            if (
              !claim["qualifiers-order"].includes(PID_REASON_FOR_PREFERRED_RANK)
            ) {
              claim["qualifiers-order"].unshift(PID_REASON_FOR_PREFERRED_RANK);
            }
          }
          break;
        }

        case ACTION_CHANGE_PROPERTY: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim || diff.context !== "reference") break;
          const refIdx = claim.references.findIndex(
            (r) => r.hash === diff.refHash,
          );
          if (refIdx !== -1) {
            replacePropertyInReference(
              claim.references[refIdx],
              diff.oldProperty,
              diff.newProperty,
              diff.snakHash,
            );
          }
          break;
        }

        case ACTION_CHANGE_VALUE: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim || diff.context !== "reference") break;
          const ref = claim.references.find((r) => r.hash === diff.refHash);
          if (!ref) break;
          for (const snakPid in ref.snaks || {}) {
            for (const snak of ref.snaks[snakPid]) {
              if (snak.hash === diff.snakHash) {
                snak.datavalue = snak.datavalue || {};
                snak.datavalue.value = snak.datavalue.value || {};
                snak.datavalue.value.id = diff.newValue;
                // Keep numeric-id in sync for wikibase-entityid snaks (QIDs).
                if (isQid(diff.newValue)) {
                  snak.datavalue.value["numeric-id"] = parseInt(
                    diff.newValue.replace("Q", ""),
                    10,
                  );
                }
                delete snak.hash;
              }
            }
          }
          break;
        }

        case ACTION_REMOVE_QUALIFIER: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          const qPid = diff.qualifierPid;
          const filtered = (claim.qualifiers?.[qPid] || []).filter(
            (s) => s.hash !== diff.qualifierSnakHash,
          );
          if (filtered.length === 0) {
            delete claim.qualifiers[qPid];
            if (claim["qualifiers-order"]) {
              claim["qualifiers-order"] = claim["qualifiers-order"].filter(
                (p) => p !== qPid,
              );
            }
          } else {
            claim.qualifiers[qPid] = filtered;
          }
          break;
        }

        case ACTION_CLEAN_URL: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;

          if (diff.context === "claim") {
            // Top-level mainsnak
            if (claim.mainsnak?.datavalue?.value === diff.before) {
              claim.mainsnak.datavalue.value = diff.after;
              delete claim.mainsnak.hash;
            }
          } else if (diff.context === "qualifier") {
            // Qualifier snak identified by PID + hash
            for (const snak of claim.qualifiers?.[diff.snakPid] || []) {
              if (
                snak.hash === diff.snakHash &&
                snak.datavalue?.value === diff.before
              ) {
                snak.datavalue.value = diff.after;
                delete snak.hash;
                break;
              }
            }
          } else {
            // Reference snak (context === "reference" or legacy diffs without context)
            const ref = claim.references?.find((r) => r.hash === diff.refHash);
            if (!ref) break;
            // snakPid may be absent in old diffs; fall back to PID_REFERENCE_URL
            const targetPid = diff.snakPid || PID_REFERENCE_URL;
            for (const snak of ref.snaks?.[targetPid] || []) {
              if (
                snak.datatype === "url" &&
                snak.datavalue?.value === diff.before
              ) {
                snak.datavalue.value = diff.after;
                delete snak.hash;
                break;
              }
            }
          }
          break;
        }
        case ACTION_MOVE_QUALIFIER_TO_REFERENCE: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          const retrievedQuals = claim.qualifiers?.[PID_RETRIEVED] || [];
          if (!retrievedQuals.length) break;

          const p813Snaks = retrievedQuals.map((q) => ({
            snaktype: q.snaktype || "value",
            property: PID_RETRIEVED,
            datavalue: q.datavalue
              ? JSON.parse(JSON.stringify(q.datavalue))
              : undefined,
          }));

          const refs = claim.references || [];
          const hasP813 = (r) => (r?.snaks?.[PID_RETRIEVED] || []).length > 0;

          if (refs.length === 1 && !hasP813(refs[0])) {
            const target = refs[0];
            target.snaks = target.snaks || {};
            target["snaks-order"] = target["snaks-order"] || [];
            target.snaks[PID_RETRIEVED] = (
              target.snaks[PID_RETRIEVED] || []
            ).concat(p813Snaks);
            if (!target["snaks-order"].includes(PID_RETRIEVED))
              target["snaks-order"].push(PID_RETRIEVED);
          } else {
            claim.references = refs.concat([
              {
                snaks: { [PID_RETRIEVED]: p813Snaks },
                "snaks-order": [PID_RETRIEVED],
              },
            ]);
          }

          delete claim.qualifiers[PID_RETRIEVED];
          break;
        }

        case ACTION_REMOVE_ALIAS: {
          updates.aliases = updates.aliases || {};
          const current =
            updates.aliases[diff.lang] || entity.aliases[diff.lang] || [];
          updates.aliases[diff.lang] = current.filter(
            (a) => a.value !== diff.value,
          );
          break;
        }

        case ACTION_NORMALIZE: {
          if (diff.field === "label") {
            updates.labels = updates.labels || {};
            updates.labels[diff.lang] = {
              language: diff.lang,
              value: diff.after,
            };
          } else if (diff.field === "description") {
            updates.descriptions = updates.descriptions || {};
            updates.descriptions[diff.lang] = {
              language: diff.lang,
              value: diff.after,
            };
          } else if (diff.field === "alias") {
            updates.aliases = updates.aliases || {};
            const current =
              updates.aliases[diff.lang] || entity.aliases[diff.lang] || [];
            updates.aliases[diff.lang] = current.map((a) =>
              a.value === diff.before
                ? { language: diff.lang, value: diff.after }
                : a,
            );
          }
          break;
        }

        case ACTION_ADD_EXTERNAL_ID_TO_REFERENCE: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          const ref = claim.references.find((r) => r.hash === diff.refHash);
          if (!ref) break;

          ref.snaks = ref.snaks || {};
          ref["snaks-order"] = ref["snaks-order"] || [];

          const insertBeforeP854 = (pid) => {
            if (!ref["snaks-order"].includes(pid)) {
              const p854i = ref["snaks-order"].indexOf(PID_REFERENCE_URL);
              if (p854i !== -1) ref["snaks-order"].splice(p854i, 0, pid);
              else ref["snaks-order"].push(pid);
            }
          };

          // Add P248 (stated in) if missing
          if (
            !ref.snaks[PID_STATED_IN] &&
            propertyStatedInCache.has(diff.suggestedProperty)
          ) {
            const statedInData = propertyStatedInCache.get(
              diff.suggestedProperty,
            );
            if (statedInData?.preferred) {
              ref.snaks[PID_STATED_IN] = [
                {
                  snaktype: "value",
                  property: PID_STATED_IN,
                  datavalue: {
                    value: {
                      "entity-type": "item",
                      "numeric-id": parseInt(
                        statedInData.preferred.replace("Q", ""),
                        10,
                      ),
                      id: statedInData.preferred,
                    },
                    type: "wikibase-entityid",
                  },
                },
              ];
              insertBeforeP854(PID_STATED_IN);
            }
          }

          // Add the extracted external ID
          ref.snaks[diff.suggestedProperty] = (
            ref.snaks[diff.suggestedProperty] || []
          ).concat([
            {
              snaktype: "value",
              property: diff.suggestedProperty,
              datavalue: { value: diff.extractedId, type: "string" },
            },
          ]);
          insertBeforeP854(diff.suggestedProperty);

          // Remove P854 if not needed
          if (!diff.keepUrl) {
            delete ref.snaks[PID_REFERENCE_URL];
            ref["snaks-order"] = ref["snaks-order"].filter(
              (p) => p !== PID_REFERENCE_URL,
            );
          }
          break;
        }

        case ACTION_ABSORB_CLAIM: {
          // 1. Mark the source claim for removal (may be a P1343 claim or a URL-type claim).
          const p1343 = getOrCloneClaim(diff.pid, diff.claimId);
          if (p1343) p1343._remove = true;

          // 2. Move the source claim's references to the ext-id claim, injecting any
          //    qualifiers from the source claim as extra values inside each moved reference.
          //    If there are no references but qualifiers are present, synthesise a
          //    new bare reference on the ext-id claim from those qualifiers alone.
          const extIdClaim = getOrCloneClaim(diff.extIdPid, diff.extIdClaimId);
          if (!extIdClaim) break;

          extIdClaim.references = extIdClaim.references || [];

          // Build qualifier snaks (hash-stripped, ready to embed into a reference).
          // P2699 (URL) is remapped to P854 (reference URL) because URL is a statement
          // qualifier property whereas P854 is its reference counterpart.
          const qualSnaksByPid = {};
          const qualOrder = [];
          for (const qPid of diff.qualifiersOrder || []) {
            const refPid = qPid === PID_URL ? PID_REFERENCE_URL : qPid;
            const snaks = (diff.qualifiers?.[qPid] || []).map((s) => ({
              snaktype: s.snaktype,
              property: refPid,
              datavalue: s.datavalue
                ? JSON.parse(JSON.stringify(s.datavalue))
                : undefined,
            }));
            if (snaks.length) {
              qualSnaksByPid[refPid] = snaks;
              if (!qualOrder.includes(refPid)) qualOrder.push(refPid);
            }
          }

          if (diff.refHashes?.length) {
            // Migrate existing references from the source claim, enriching each with qualifier values.
            // Read from the original entity — the clone only carries _remove
            const p1343Original = (entity.claims[diff.pid] || []).find(
              (c) => c.id === diff.claimId,
            );
            const existingHashes = new Set(
              extIdClaim.references.map((r) => r.hash),
            );
            const refsToMove = (p1343Original?.references || []).filter(
              (r) =>
                diff.refHashes.includes(r.hash) && !existingHashes.has(r.hash),
            );

            for (const origRef of refsToMove) {
              const newRef = JSON.parse(JSON.stringify(origRef));
              delete newRef.hash;
              newRef.snaks = newRef.snaks || {};
              newRef["snaks-order"] = newRef["snaks-order"] || [];

              for (const qPid of qualOrder) {
                if (!newRef.snaks[qPid]) {
                  newRef.snaks[qPid] = qualSnaksByPid[qPid];
                  newRef["snaks-order"].push(qPid);
                }
                // Leave existing pid snaks untouched to avoid value conflicts
              }

              extIdClaim.references.push(newRef);
            }
          } else if (qualOrder.length) {
            // No references on P1343, but it has qualifiers — synthesise a new
            // reference on the ext-id claim consisting solely of those qualifier snaks
            const newRef = {
              snaks: Object.fromEntries(
                qualOrder.map((p) => [p, qualSnaksByPid[p]]),
              ),
              "snaks-order": qualOrder.slice(),
            };
            extIdClaim.references.push(newRef);
          }

          break;
        }

        case ACTION_REMOVE_REDUNDANT_REF_URL: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          const ref = claim.references?.find((r) => r.hash === diff.refHash);
          if (!ref) break;
          // Remove only the specific redundant P854 snak identified by snakHash.
          const kept = (ref.snaks[PID_REFERENCE_URL] || []).filter(
            (s) => s.hash !== diff.snakHash,
          );
          if (kept.length) {
            ref.snaks[PID_REFERENCE_URL] = kept;
          } else {
            delete ref.snaks[PID_REFERENCE_URL];
            ref["snaks-order"] = (ref["snaks-order"] || []).filter(
              (p) => p !== PID_REFERENCE_URL,
            );
          }
          break;
        }

        case ACTION_REMOVE_OBSOLETE_SNAKS: {
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          const ref = claim.references?.find((r) => r.hash === diff.refHash);
          if (!ref) break;
          for (const obsoletePid of diff.obsoletePids || []) {
            delete ref.snaks[obsoletePid];
            ref["snaks-order"] = (ref["snaks-order"] || []).filter(
              (p) => p !== obsoletePid,
            );
          }
          break;
        }

        case ACTION_DEPRECATE_URL_CLAIM: {
          // Set the claim rank to "deprecated".
          // Qualifiers and references are intentionally left in place so that the
          // human reviewer can see why the claim existed.
          // When a P2241 (reason for deprecated rank) QID is supplied by the
          // blocklist rule, add it as a qualifier.
          const claim = getOrCloneClaim(diff.pid, diff.claimId);
          if (!claim) break;
          claim.rank = "deprecated";
          if (diff.deprecationReason && isQid(diff.deprecationReason)) {
            claim.qualifiers = claim.qualifiers || {};
            claim["qualifiers-order"] = claim["qualifiers-order"] || [];
            if (!claim.qualifiers[PID_REASON_FOR_DEPRECATED_RANK]) {
              claim.qualifiers[PID_REASON_FOR_DEPRECATED_RANK] = [
                {
                  snaktype: "value",
                  property: PID_REASON_FOR_DEPRECATED_RANK,
                  datavalue: {
                    value: {
                      "entity-type": "item",
                      "numeric-id": parseInt(
                        diff.deprecationReason.replace("Q", ""),
                        10,
                      ),
                      id: diff.deprecationReason,
                    },
                    type: "wikibase-entityid",
                  },
                },
              ];
              if (
                !claim["qualifiers-order"].includes(
                  PID_REASON_FOR_DEPRECATED_RANK,
                )
              ) {
                claim["qualifiers-order"].push(PID_REASON_FOR_DEPRECATED_RANK);
              }
            }
          }
          break;
        }

        case ACTION_SET_MUL_LABEL: {
          // Add labels.mul with the shared label value.
          updates.labels = updates.labels || {};
          updates.labels.mul = { language: "mul", value: diff.value };
          break;
        }

        case ACTION_ADD_MUL_ALIAS: {
          // Add diff.value to aliases.mul, preserving any existing mul aliases.
          // Start from the already-accumulated updates.aliases.mul when multiple
          // ADD_MUL_ALIAS diffs fire in the same edit (shouldn't happen, but safe).
          updates.aliases = updates.aliases || {};
          const existingMul = updates.aliases.mul
            ? updates.aliases.mul.slice()
            : (entity.aliases?.mul || []).map((a) => ({ ...a }));
          const normNew = normalizeText(diff.value);
          if (!existingMul.some((a) => normalizeText(a.value) === normNew)) {
            existingMul.push({ language: "mul", value: diff.value });
          }
          updates.aliases.mul = existingMul;
          break;
        }

        case ACTION_CONVERT_URL_TO_EXT_ID: {
          // Dev-mode only (DEV_ABSORB_URL_CLAIM_CREATE_MISSING).
          // Creates a new ext-id statement from the matched URL, transferring the
          // URL claim's qualifiers and references onto it, then removes the URL claim.

          // 1. Mark the URL claim for removal.
          const urlClaim = getOrCloneClaim(diff.pid, diff.claimId);
          if (urlClaim) urlClaim._remove = true;

          // 2. Build the new ext-id claim object.
          const newExtIdClaim = {
            type: "statement",
            rank: "normal",
            mainsnak: {
              snaktype: "value",
              property: diff.extIdPid,
              datavalue: { value: diff.extractedId, type: "string" },
              datatype: "external-id",
            },
            qualifiers: {},
            "qualifiers-order": [],
            references: [],
          };

          // 3. Copy qualifiers from the URL claim (P2699/URL remapped to P854).
          for (const qPid of diff.qualifiersOrder || []) {
            const refPid = qPid === PID_URL ? PID_REFERENCE_URL : qPid;
            const snaks = (diff.qualifiers?.[qPid] || []).map((s) => ({
              snaktype: s.snaktype,
              property: refPid,
              datavalue: s.datavalue
                ? JSON.parse(JSON.stringify(s.datavalue))
                : undefined,
            }));
            if (snaks.length) {
              newExtIdClaim.qualifiers[refPid] = snaks;
              newExtIdClaim["qualifiers-order"].push(refPid);
            }
          }

          // 4. Copy references from the original URL claim.
          const urlOriginal = (entity.claims[diff.pid] || []).find(
            (c) => c.id === diff.claimId,
          );
          for (const origRef of (urlOriginal?.references || []).filter((r) =>
            diff.refHashes.includes(r.hash),
          )) {
            const newRef = JSON.parse(JSON.stringify(origRef));
            delete newRef.hash;
            newExtIdClaim.references.push(newRef);
          }

          // 5. Register the new claim so mergeCleanupDiffs includes it in the API call.
          //    Give it a temporary id so getOrCloneClaim can track it if needed.
          newExtIdClaim.id = `NEW_EXT_ID_${diff.extIdPid}_${diff.claimId}`;
          claimMap.set(newExtIdClaim.id, newExtIdClaim);

          break;
        }
      }
    }

    // Deduplicate aliases
    for (const lang in updates.aliases || {}) {
      updates.aliases[lang] = updates.aliases[lang].filter(
        (a, i, arr) => arr.findIndex((b) => b.value === a.value) === i,
      );
    }

    for (const claim of claimMap.values()) {
      if (claim._remove) {
        updates.claims.push({ id: claim.id, remove: "" });
      } else if (claim.id?.startsWith("NEW_EXT_ID_")) {
        // New claim created by ACTION_CONVERT_URL_TO_EXT_ID — strip the temporary id
        // so the API treats it as a create rather than an edit.
        const { id: _tempId, ...newClaim } = claim;
        updates.claims.push(newClaim);
      } else {
        updates.claims.push(claim);
      }
    }

    return updates;
  }

  // ==== 24 Core flow ========================================================

  /**
   * Runs all active detectors against the entity in batches and collects their
   * diffs.  Returns { previewRows, updates, modified } where previewRows is the
   * list of visible diff objects shown in the UI table.
   */
  async function generatePreviewDiffs(entity, settings) {
    const updates = { claims: [] };
    const previewRows = [];
    let modified = false;

    const activeDefs = Object.entries(detectors).filter(([id, def]) => {
      if (settings?.enabledDetectors?.[id] === false) return false;
      if (def.requiresLargeBuffers && !settings?.enableLargeBuffers)
        return false;
      if (def.requiresHeavyComputing && !settings?.enableHeavyComputing)
        return false;
      return true;
    });

    // Run all source-category detectors (those backed by detectRefCategories, indicated
    // by detect: null) in a single shared pass so each reference is classified only once
    // regardless of how many such detectors are active.
    // Note: isRemoveRefCategory is intentionally not used here — it is also true for
    // dupRetrieved, which has its own detect function and must not use the shared pass.
    const sourceCategoryDefs = activeDefs.filter(
      ([, def]) => def.detect === null,
    );
    const sharedRefResults = sourceCategoryDefs.length
      ? detectRefCategories(
          entity,
          sourceCategoryDefs.map(([id]) => id),
        )
      : new Map();

    const BATCH_SIZE = 3;
    const results = [];

    for (let i = 0; i < activeDefs.length; i += BATCH_SIZE) {
      const batch = activeDefs.slice(i, i + BATCH_SIZE);
      const batchResults = await Promise.all(
        batch.map(([id, def]) =>
          def.detect === null
            ? Promise.resolve(sharedRefResults.get(id) ?? [])
            : Promise.resolve(def.detect(entity)),
        ),
      );
      results.push(...batchResults);
      if (i + BATCH_SIZE < activeDefs.length) {
        await new Promise((resolve) => setTimeout(resolve, 0));
      }
    }

    // Collect raw results per detector id before post-processing.
    // detectAbsorbUrlClaim may emit ACTION_CONVERT_URL_TO_EXT_ID diffs (dev mode);
    // split those into their own bucket so they render under the correct registry entry.
    const rawByDef = new Map();
    activeDefs.forEach(([id], index) => {
      const diffs = results[index] || [];
      if (id === "absorbUrlClaim" && DEV_ABSORB_URL_CLAIM_CREATE_MISSING) {
        rawByDef.set(
          "absorbUrlClaim",
          diffs.filter((d) => d.action !== ACTION_CONVERT_URL_TO_EXT_ID),
        );
        rawByDef.set(
          "convertUrlToExtId",
          diffs.filter((d) => d.action === ACTION_CONVERT_URL_TO_EXT_ID),
        );
      } else {
        // Merge with any existing bucket (e.g. convertUrlToExtId pre-populated by the split above).
        const existing = rawByDef.get(id);
        rawByDef.set(id, existing ? existing.concat(diffs) : diffs);
      }
    });

    // Post-filter: suppress any ACTION_ADD_EXTERNAL_ID_TO_REFERENCE diff whose
    // reference is about to be moved by ACTION_ABSORB_DESCRIBED_BY_SOURCE.
    // The absorb action transplants the entire reference onto the ext-id claim,
    // so adding an ext-id snak to it first would produce a self-referential
    // reference (ext-id claim -> reference containing the same ext-id value).
    const absorbedRefKeys = new Set();
    for (const detectorId of ["absorbDescribedBySource", "absorbUrlClaim"]) {
      for (const diff of rawByDef.get(detectorId) || []) {
        for (const hash of diff.refHashes || []) {
          absorbedRefKeys.add(`${diff.claimId}::${hash}`);
        }
      }
    }
    if (absorbedRefKeys.size) {
      const addDiffs = rawByDef.get("addExternalIdToReference");
      if (addDiffs?.length) {
        rawByDef.set(
          "addExternalIdToReference",
          addDiffs.filter(
            (d) => !absorbedRefKeys.has(`${d.claimId}::${d.refHash}`),
          ),
        );
      }
    }

    // Iterate rawByDef directly (not activeDefs) so that synthetic buckets like
    // "convertUrlToExtId" — which are populated by splitting another detector's
    // results rather than by running their own detect function — are also included.
    for (const [id, diffs] of rawByDef) {
      if (!diffs?.length) continue;
      diffs.forEach((c, idx) => {
        c.type = id;
        c.rowId = c.rowId || `${id}_${idx}`;
      });
      // _hidden diffs (e.g. the companion downgrade in upgradePreciseDate) are
      // applied together with their visible counterpart but not shown in the table.
      previewRows.push(...diffs.filter((c) => !c._hidden));
      updates[`updates_${id}`] = { changes: diffs };
      modified = true;
    }

    console.log(`${TOOL_NAME}: ${previewRows.length} issues found`);
    return { previewRows, updates, modified };
  }

  /** Builds the Wikidata edit summary string from the selected cleanup categories. */
  function buildSummary(checkboxStates) {
    const selected = Object.keys(checkboxStates).filter(
      (k) => checkboxStates[k],
    );
    const refParts = selected
      .filter((id) => detectors[id]?.isRemoveRefCategory)
      .map((id) => detectors[id].summaryLabel);
    const otherParts = selected
      .filter((id) => !detectors[id]?.isRemoveRefCategory)
      .map((id) => detectors[id]?.summaryLabel || id);

    const parts = [];
    if (refParts.length) parts.push("remove " + refParts.join("+") + " refs");
    parts.push(...otherParts);

    return `Cleanup: ${parts.join("; ")} ([[User:Difool/WikidataCleanup]])`;
  }

  /**
   * Merges the diffs selected by the user, then submits a wbeditentity API call
   * and reloads the page on success.
   */
  function runCleanupCallback(
    entityId,
    entity,
    updates,
    checkboxStates,
    rowStates,
  ) {
    const allDiffs = [];
    for (const [id, def] of Object.entries(detectors)) {
      if (!checkboxStates[id]) continue;
      for (const c of updates[`updates_${id}`]?.changes || []) {
        if (rowStates[c.rowId] !== false) allDiffs.push(c);
      }
    }

    const filteredUpdates = mergeCleanupDiffs(entity, allDiffs);
    const summary = buildSummary(checkboxStates);

    new mw.Api()
      .postWithEditToken({
        action: "wbeditentity",
        format: "json",
        id: entityId,
        summary,
        baserevid: mw.config.get("wgRevisionId"),
        data: JSON.stringify(filteredUpdates),
      })
      .then((resp) => {
        if (resp?.success === 1) window.location.reload();
        else alert(getMsg("applyFailed"));
      })
      .catch((errorCode, error) => {
        // mw.Api jQuery deferred collapses (errorCode, error) into the first argument
        const errObj =
          (typeof errorCode === "object" && errorCode) ||
          (typeof error === "object" && error);
        const apiError = errObj?.error || errObj;
        const detail = apiError?.info;

        console.error(`${TOOL_NAME}: edit error`, errorCode, error);
        // alert(
        //   detail
        //     ? `${getMsg("cleanupFailed")}\n\n${detail}`
        //     : getMsg("cleanupFailed"),
        // );

        mw.notify(detail || getMsg("cleanupFailed"), {
          type: "error",
          autoHide: false,
          tag: `${TOOL_NAME}_editError`,
        });
      });
  }

  function renderCleanupUISafe(entity, previewRows, applyCleanupChanges) {
    if (typeof OO === "undefined" || !OO.ui?.ButtonWidget) {
      mw.loader
        .using(["oojs-ui-core", "oojs-ui-widgets"])
        .then(() => renderCleanupUI(entity, previewRows, applyCleanupChanges))
        .catch((err) =>
          console.error(`${TOOL_NAME}: failed to load OOUI`, err),
        );
      return;
    }
    renderCleanupUI(entity, previewRows, applyCleanupChanges);
  }

  async function initCleanupTool(settings, { menuclick = false } = {}) {
    $("#WikidataCleanup_box").remove();
    await initCaches(settings);

    try {
      const entityId = mw.config.get("wgPageName");
      const data = await new mw.Api().get({
        action: "wbgetentities",
        format: "json",
        ids: entityId,
        props: "claims|labels|descriptions|aliases|sitelinks",
      });
      const entity = data.entities[entityId];
      if (!entity?.claims) return;

      const { previewRows, updates, modified } = await generatePreviewDiffs(
        entity,
        settings,
      );

      if (modified || menuclick) {
        renderCleanupUISafe(entity, previewRows, (checkboxStates, rowStates) =>
          runCleanupCallback(
            entityId,
            entity,
            updates,
            checkboxStates,
            rowStates,
          ),
        );

        if (menuclick) {
          const box = document.getElementById("WikidataCleanup_box");
          box?.scrollIntoView({ behavior: "smooth", block: "start" });
          box?.querySelector("button")?.focus();
        }
      }
    } catch (err) {
      console.error(`${TOOL_NAME}: API error`, err);
    }
  }

  // ==== 25 Settings dialog ==================================================

  function showSettingsDialog(currentSettings, saveSettings) {
    const autoStartCb = new OO.ui.CheckboxInputWidget({
      selected: !!currentSettings.autoStartPreview,
    });
    const largeBuffersCb = new OO.ui.CheckboxInputWidget({
      selected: currentSettings.enableLargeBuffers !== false,
    });
    const heavyComputingCb = new OO.ui.CheckboxInputWidget({
      selected: currentSettings.enableHeavyComputing !== false,
    });

    const generalFieldset = new OO.ui.FieldsetLayout({
      label: getMsg("generalSettings"),
      items: [
        new OO.ui.FieldLayout(autoStartCb, {
          label: getMsg("autoStart"),
          align: "inline",
        }),
        new OO.ui.FieldLayout(largeBuffersCb, {
          label: "Enable large buffers (IndexedDB caching)",
          help: "Disabling reduces memory usage but disables some advanced features",
          align: "inline",
        }),
        new OO.ui.FieldLayout(heavyComputingCb, {
          label: "Enable heavy computing (complex analysis)",
          help: "Disabling reduces CPU usage but disables some analysis features",
          align: "inline",
        }),
      ],
    });

    const detectorFields = Object.entries(detectors).map(([id, def]) => {
      const cb = new OO.ui.CheckboxInputWidget({
        selected: currentSettings.enabledDetectors?.[id] !== false,
      });
      cb.detectorId = id;
      return new OO.ui.FieldLayout(cb, {
        label: getMsg(def.label),
        align: "inline",
      });
    });

    const detectorFieldset = new OO.ui.FieldsetLayout({
      label: getMsg("detectorSettings"),
      items: detectorFields,
    });

    function buildCacheField(cacheDef) {
      const info = new OO.ui.LabelWidget({ label: "Loading…" });

      cache_getStatus(cacheDef).then((s) => info.setLabel(s));

      const refreshStatus = async () => {
        info.setLabel(await cache_getStatus(cacheDef));
      };
      const reloadBtn = new OO.ui.ButtonWidget({
        label: cacheDef.fetchFn ? "Reload" : "Reset now",
        flags: [cacheDef.fetchFn ? "progressive" : "destructive"],
      });
      reloadBtn.on("click", async () => {
        if (cacheDef.fetchFn)
          await refreshCacheWithNotify(cacheDef, currentSettings);
        else await cache_reset(cacheDef);
        await refreshStatus();
      });

      const clearBtn = new OO.ui.ButtonWidget({
        label: "Clear cache",
        flags: ["destructive"],
      });
      clearBtn.on("click", async () => {
        await cache_reset(cacheDef);
        await refreshStatus();
      });

      const items = [info, reloadBtn];
      if (cacheDef.fetchFn) items.push(clearBtn); // only show when reload is available

      return new OO.ui.PanelLayout({
        padded: true,
        expanded: false,
        content: [new OO.ui.HorizontalLayout({ items })],
      });
    }

    const cacheFieldset = new OO.ui.FieldsetLayout({
      label: getMsg("cacheSettings"),
      items: caches.map(buildCacheField),
    });

    // Dialog class (scoped to avoid polluting the outer IIFE)
    const SettingsDialog = function (config) {
      SettingsDialog.super.call(this, config);
    };
    OO.inheritClass(SettingsDialog, OO.ui.ProcessDialog);
    SettingsDialog.static.name = "settingsDialog";
    SettingsDialog.static.title = getMsg("settingsTitle");
    SettingsDialog.static.size = "large";
    SettingsDialog.static.actions = [
      { action: "close", label: getMsg("close"), flags: ["safe", "primary"] },
    ];

    SettingsDialog.prototype.initialize = function () {
      SettingsDialog.super.prototype.initialize.apply(this, arguments);
      const content = new OO.ui.PanelLayout({ padded: true, expanded: true });
      content.$element.append(
        generalFieldset.$element,
        detectorFieldset.$element,
        cacheFieldset.$element,
      );
      this.$body.append(content.$element);
    };

    SettingsDialog.prototype.getActionProcess = function (action) {
      if (action !== "close")
        return SettingsDialog.super.prototype.getActionProcess.call(
          this,
          action,
        );
      return new OO.ui.Process(() => {
        const newSettings = {
          autoStartPreview: autoStartCb.isSelected(),
          enableLargeBuffers: largeBuffersCb.isSelected(),
          enableHeavyComputing: heavyComputingCb.isSelected(),
          enabledDetectors: Object.fromEntries(
            detectorFields.map((f) => [
              f.fieldWidget.detectorId,
              f.fieldWidget.isSelected(),
            ]),
          ),
        };
        saveSettings(newSettings);
        $("#WikidataCleanup_box").remove();
        initCleanupTool(newSettings);
        this.close({ action: "close" });
      });
    };

    const manager = new OO.ui.WindowManager();
    $("body").append(manager.$element);
    const dialog = new SettingsDialog();
    manager.addWindows([dialog]);
    manager.openWindow(dialog);
  }

  // ==== 26 Entry point ======================================================

  function addCleanupLink() {
    const label = getMsg("startPreview");
    const clickHandler = () =>
      initCleanupTool(cache_loadSettings(), { menuclick: true });

    if (mw.config.get("skin") !== "minerva") {
      const link = mw.util.addPortletLink(
        "p-cactions",
        "#",
        label,
        `${TOOL_NAME}_link`,
        getMsg("runPreview"),
      );
      if (link) {
        link.addEventListener("click", clickHandler);
        return;
      }
    }

    // Fallback for MinervaNeue
    const header =
      document.querySelector(".page-actions-menu") ||
      document.querySelector(".minerva-header") ||
      document.querySelector(".content");
    if (header) {
      const btn = document.createElement("button");
      btn.className =
        "cdx-button cdx-button--action-progressive wikidata-cleanup-launcher";
      btn.textContent = label;
      btn.addEventListener("click", clickHandler);
      header.appendChild(btn);
    }
  }

  function init() {
    mw.util.addCSS(`
      #WikidataCleanup_box table td:first-child,
      #WikidataCleanup_box table th:first-child {
        text-align: center;
        padding-left: 0.25em;
        padding-right: 0.25em;
      }
    `);

    // Debug helpers (available in browser console)
    window.wd_cleanup_debug = {
      getObsoleteIdProps: () => Array.from(obsoleteIdProps),
      getPropertyRegex: (pid) => propertyRegexCache.get(pid),
      getPropertyUrlPatterns: (pid) => propertyUrlPatternsCache.get(pid),
      getPropertyStatedIn: (pid) => propertyStatedInCache.get(pid),
      getAllCaches: () => ({
        obsoleteIdProps: Array.from(obsoleteIdProps),
        propertyRegexCache: new Map(propertyRegexCache),
        propertyUrlPatternsCache: new Map(propertyUrlPatternsCache),
        propertyStatedInCache: new Map(propertyStatedInCache),
      }),
      searchRegexCache: (q) =>
        [...propertyRegexCache.entries()]
          .filter(([p, r]) => p.includes(q) || r.includes(q))
          .map(([pid, regex]) => ({ pid, regex })),
      searchUrlPatterns: (q) =>
        [...propertyUrlPatternsCache.entries()]
          .filter(([p]) => p.includes(q))
          .map(([pid, patterns]) => ({ pid, patterns })),
      testUrlAgainstPatterns: (url) => matchUrlAgainstPatterns(url),
    };

    addCleanupLink();
    const settings = cache_loadSettings();
    if (settings.autoStartPreview) initCleanupTool(settings);
  }

  mw.loader
    .using(["mediawiki.util", "oojs-ui-core", "oojs-ui-widgets"])
    .then(init);
})(mediaWiki);
// </nowiki>