packages/django-postgres-cache: avoid regex queries when listing keys if possible (#23160)

Co-authored-by: Ryan Pesek <rpesek@cloudflare.com>
Signed-off-by: Marc 'risson' Schmitt <marc.schmitt@risson.space>
This commit is contained in:
Marc 'risson' Schmitt
2026-06-17 14:58:02 +02:00
committed by GitHub
parent 1b3bdc5aa7
commit 5839b40efa
2 changed files with 90 additions and 10 deletions
@@ -175,14 +175,26 @@ class DatabaseCache(BaseCache):
CacheEntry.objects.truncate()
def keys(self, keys_pattern: str, version: int | None = None) -> list[str]:
keys_pattern = self.make_key(keys_pattern.replace("*", ".*"), version=version)
"""Return cache keys matching a glob pattern (``*`` wildcard).
return [
self.reverse_key_func(key)
for key in CacheEntry.objects.filter(
cache_key__regex=keys_pattern,
).values_list(
"cache_key",
flat=True,
)
]
Simple ``prefix*`` patterns use Django's ``__startswith`` lookup
(``LIKE 'prefix%'``), which can be answered by a B-tree index on
``cache_key`` and is dramatically cheaper than the regex path even
without one. No-wildcard patterns reduce to primary-key equality.
Complex patterns (wildcards in non-suffix position, multiple
wildcards) fall back to ``__regex``.
"""
wildcard_count = keys_pattern.count("*")
if wildcard_count == 0:
key = self.make_key(keys_pattern, version=version)
qs = CacheEntry.objects.filter(cache_key=key)
# This also handles a pattern of "*". The prefix is then "", which will end up as an `all`
# query, +/- the various prefixes prepended to it.
elif wildcard_count == 1 and keys_pattern.endswith("*"):
prefix = self.make_key(keys_pattern[:-1], version=version)
qs = CacheEntry.objects.filter(cache_key__startswith=prefix)
else:
regex = self.make_key(keys_pattern.replace("*", ".*"), version=version)
qs = CacheEntry.objects.filter(cache_key__regex=regex)
return [self.reverse_key_func(key) for key in qs.values_list("cache_key", flat=True)]
@@ -0,0 +1,68 @@
"""Tests for ``DatabaseCache.keys`` glob-to-SQL translation.
``cache.keys("prefix*")`` must use ``__startswith`` (LIKE), not the regex
path. Pure unit tests — ``CacheEntry.objects`` is mocked.
"""
from typing import Any, cast
from unittest import TestCase, mock
from django_postgres_cache.backend import DatabaseCache
def _make_cache() -> DatabaseCache:
"""Construct a ``DatabaseCache`` without Django settings or a real DB."""
cache = DatabaseCache.__new__(DatabaseCache)
cache.key_prefix = ""
cache.version = 1
cache.key_func = lambda key, key_prefix, version: f":{version}:{key}"
# Reverse-key is not invoked in these tests (filter mocks return []), so
# an identity function is enough.
cache.reverse_key_func = lambda k: k
return cache
class TestKeysGlobToLookup(TestCase):
"""The SQL lookup chosen for each glob pattern shape."""
def _captured_filter_kwargs(self, pattern: str) -> dict[str, Any]:
"""Run ``cache.keys(pattern)`` and return the kwargs passed to
``CacheEntry.objects.filter(...)``."""
cache = _make_cache()
with mock.patch("django_postgres_cache.backend.CacheEntry") as mock_entry:
mock_entry.objects.filter.return_value.values_list.return_value = []
cache.keys(pattern)
self.assertEqual(mock_entry.objects.filter.call_count, 1)
return cast(dict[str, Any], mock_entry.objects.filter.call_args.kwargs)
def test_simple_prefix_glob_uses_startswith(self) -> None:
"""``cache.keys("foo*")`` uses ``__startswith``, not ``__regex``."""
kwargs = self._captured_filter_kwargs("foo*")
self.assertIn("cache_key__startswith", kwargs)
self.assertNotIn("cache_key__regex", kwargs)
self.assertNotIn("cache_key", kwargs)
def test_realistic_authentik_prefix_glob_uses_startswith(self) -> None:
"""The actual hot-query pattern from the bug report uses ``__startswith``."""
kwargs = self._captured_filter_kwargs("goauthentik.io/policies/app_access/*")
self.assertIn("cache_key__startswith", kwargs)
self.assertNotIn("cache_key__regex", kwargs)
def test_exact_match_uses_equality(self) -> None:
"""No-wildcard patterns use primary-key equality."""
kwargs = self._captured_filter_kwargs("exact")
self.assertIn("cache_key", kwargs)
self.assertNotIn("cache_key__startswith", kwargs)
self.assertNotIn("cache_key__regex", kwargs)
def test_complex_glob_falls_back_to_regex(self) -> None:
"""Multiple wildcards or non-suffix wildcards fall back to ``__regex``."""
kwargs = self._captured_filter_kwargs("foo*bar*")
self.assertIn("cache_key__regex", kwargs)
self.assertNotIn("cache_key__startswith", kwargs)
def test_leading_wildcard_falls_back_to_regex(self) -> None:
"""A leading wildcard cannot reduce to LIKE — fall back to ``__regex``."""
kwargs = self._captured_filter_kwargs("*foo")
self.assertIn("cache_key__regex", kwargs)
self.assertNotIn("cache_key__startswith", kwargs)