From eab41bd31a2c72d5a7255acfb8aa1c4d01ef2065 Mon Sep 17 00:00:00 2001 From: ontowhee <82607723+ontowhee@users.noreply.github.com> Date: Mon, 9 Dec 2024 07:29:18 -0800 Subject: [PATCH 1/7] wip optimize queries for dashboard index page --- dashboard/views.py | 27 +++++++++++++++++++++------ 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/dashboard/views.py b/dashboard/views.py index 0c6238793..95024c7c8 100644 --- a/dashboard/views.py +++ b/dashboard/views.py @@ -1,13 +1,13 @@ import datetime -import operator +from django.contrib.contenttypes.models import ContentType from django.core.cache import cache from django.forms.models import model_to_dict from django.http.response import Http404, JsonResponse from django.shortcuts import render from django.utils.translation import gettext as _ -from .models import Metric +from .models import Datum, Metric from .utils import generation_key @@ -19,12 +19,27 @@ def index(request): if data is None: metrics = [] for MC in Metric.__subclasses__(): - metrics.extend(MC.objects.filter(show_on_dashboard=True)) - metrics = sorted(metrics, key=operator.attrgetter("display_position")) + metrics.extend(MC.objects.filter(show_on_dashboard=True).select_related('category')) + + content_types = ContentType.objects.get_for_models(*metrics) + datum_queryset = Datum.objects.none() + for metric, content_type in content_types.items(): + datum_queryset = datum_queryset.union( + Datum.objects.filter(content_type_id=content_type.id, object_id=metric.id) + .order_by('-timestamp')[0:1].select_related('content_type') + ) + + datums = { + (datum.object_id, datum.content_type): datum + for datum in datum_queryset + } data = [] - for metric in metrics: - data.append({"metric": metric, "latest": metric.data.latest()}) + for metric, content_type in content_types.items(): + latest = datums.get((metric.id, content_type)) + if latest: + data.append({"metric": metric, "latest": latest}) + data = sorted(data, key=lambda elem: elem["metric"].display_position) cache.set(key, data, 60 * 60, version=generation) return render(request, "dashboard/index.html", {"data": data}) From 785a17a3f015ca7dfa40f333c38437039e3e5ce2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 9 Dec 2024 15:45:52 +0000 Subject: [PATCH 2/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dashboard/views.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/dashboard/views.py b/dashboard/views.py index 95024c7c8..ff55393f5 100644 --- a/dashboard/views.py +++ b/dashboard/views.py @@ -19,19 +19,23 @@ def index(request): if data is None: metrics = [] for MC in Metric.__subclasses__(): - metrics.extend(MC.objects.filter(show_on_dashboard=True).select_related('category')) + metrics.extend( + MC.objects.filter(show_on_dashboard=True).select_related("category") + ) content_types = ContentType.objects.get_for_models(*metrics) datum_queryset = Datum.objects.none() for metric, content_type in content_types.items(): datum_queryset = datum_queryset.union( - Datum.objects.filter(content_type_id=content_type.id, object_id=metric.id) - .order_by('-timestamp')[0:1].select_related('content_type') + Datum.objects.filter( + content_type_id=content_type.id, object_id=metric.id + ) + .order_by("-timestamp")[0:1] + .select_related("content_type") ) datums = { - (datum.object_id, datum.content_type): datum - for datum in datum_queryset + (datum.object_id, datum.content_type): datum for datum in datum_queryset } data = [] From 8fe1e0171ca3c1406ab9d452e5fdc6b1fe841ac5 Mon Sep 17 00:00:00 2001 From: ontowhee <82607723+ontowhee@users.noreply.github.com> Date: Tue, 10 Dec 2024 10:45:08 -0800 Subject: [PATCH 3/7] Update test to check num queries --- dashboard/tests.py | 4 +++- dashboard/views.py | 8 +++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/dashboard/tests.py b/dashboard/tests.py index 577158959..c01ddb24c 100644 --- a/dashboard/tests.py +++ b/dashboard/tests.py @@ -33,10 +33,12 @@ def setUp(self): def test_index(self): for MC in Metric.__subclasses__(): for metric in MC.objects.filter(show_on_dashboard=True): + metric.data.create(measurement=44) metric.data.create(measurement=42) request = self.factory.get(reverse("dashboard-index", host="dashboard")) - response = index(request) + with self.assertNumQueries(7): + response = index(request) self.assertContains(response, "Development dashboard") self.assertEqual(response.content.count(b'
Date: Tue, 10 Dec 2024 18:46:18 +0000 Subject: [PATCH 4/7] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- dashboard/views.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/dashboard/views.py b/dashboard/views.py index 458d46c73..fbe734809 100644 --- a/dashboard/views.py +++ b/dashboard/views.py @@ -29,8 +29,7 @@ def index(request): datum_queryset = datum_queryset.union( Datum.objects.filter( content_type_id=content_type.id, object_id=metric.id - ) - .order_by("-timestamp")[0:1] + ).order_by("-timestamp")[0:1] ) latest_datums = { From 14e2b13cdff5ab7aebf00706eda7b3e57cdfcf15 Mon Sep 17 00:00:00 2001 From: Baptiste Mispelon Date: Tue, 10 Dec 2024 20:10:53 +0100 Subject: [PATCH 5/7] Alternate approach --- dashboard/models.py | 22 ++++++++++++++++++++-- dashboard/views.py | 30 +++++++++++++----------------- 2 files changed, 33 insertions(+), 19 deletions(-) diff --git a/dashboard/models.py b/dashboard/models.py index a24036951..b8832073a 100644 --- a/dashboard/models.py +++ b/dashboard/models.py @@ -1,6 +1,8 @@ import ast import calendar import datetime +import operator +from functools import reduce import requests from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation @@ -105,7 +107,6 @@ def _gather_data_periodic(self, since, period): scale but works for now. """ OFFSET = "2 hours" # HACK! - ctid = ContentType.objects.get_for_model(self).id c = connections["default"].cursor() c.execute( @@ -117,10 +118,14 @@ def _gather_data_periodic(self, since, period): AND object_id = %s AND timestamp >= %s GROUP BY 1;""", - [period, OFFSET, ctid, self.id, since], + [period, OFFSET, self.content_type.id, self.id, since], ) return [(calendar.timegm(t.timetuple()), float(m)) for (t, m) in c.fetchall()] + @property + def content_type(self): + return ContentType.objects.get_for_model(self) + class TracTicketMetric(Metric): query = models.TextField() @@ -249,6 +254,17 @@ def link(self): return self.urljoin(self.jenkins_root_url, "job", self.build_name) +class DatumQuerySet(models.QuerySet): + def metrics(self, *metrics): + """ + Return only the data from the given metrics. + """ + if not metrics: + return self.none() + qobjs = [models.Q(content_type=m.content_type, object_id=m.pk) for m in metrics] + return self.filter(reduce(operator.or_, qobjs)) + + class Datum(models.Model): metric = GenericForeignKey() content_type = models.ForeignKey( @@ -258,6 +274,8 @@ class Datum(models.Model): timestamp = models.DateTimeField(default=datetime.datetime.now) measurement = models.BigIntegerField() + objects = DatumQuerySet.as_manager() + class Meta: ordering = ["-timestamp"] get_latest_by = "timestamp" diff --git a/dashboard/views.py b/dashboard/views.py index fbe734809..a2a449119 100644 --- a/dashboard/views.py +++ b/dashboard/views.py @@ -1,6 +1,6 @@ import datetime +import operator -from django.contrib.contenttypes.models import ContentType from django.core.cache import cache from django.forms.models import model_to_dict from django.http.response import Http404, JsonResponse @@ -22,25 +22,21 @@ def index(request): metrics.extend( MC.objects.filter(show_on_dashboard=True).select_related("category") ) - - content_types = ContentType.objects.get_for_models(*metrics) - datum_queryset = Datum.objects.none() - for metric, content_type in content_types.items(): - datum_queryset = datum_queryset.union( - Datum.objects.filter( - content_type_id=content_type.id, object_id=metric.id - ).order_by("-timestamp")[0:1] - ) - - latest_datums = { - (datum.object_id, datum.content_type_id): datum for datum in datum_queryset + metrics = sorted(metrics, key=operator.attrgetter("display_position")) + + metric_latest_querysets = [ + Datum.objects.metrics(metric).order_by("-timestamp")[0:1] + for metric in metrics + ] + data_latest = Datum.objects.none().union(*metric_latest_querysets) + latest_by_metric = { + (datum.content_type_id, datum.object_id): datum for datum in data_latest } data = [] - for metric, content_type in content_types.items(): - if latest := latest_datums.get((metric.id, content_type.id)): - data.append({"metric": metric, "latest": latest}) - data = sorted(data, key=lambda elem: elem["metric"].display_position) + for metric in metrics: + latest = latest_by_metric.get((metric.content_type.pk, metric.pk)) + data.append({"metric": metric, "latest": latest}) cache.set(key, data, 60 * 60, version=generation) return render(request, "dashboard/index.html", {"data": data}) From ce82b7a510e51c3b9ff3798bb25bab032aee74ca Mon Sep 17 00:00:00 2001 From: Baptiste Mispelon Date: Wed, 11 Dec 2024 14:38:33 +0100 Subject: [PATCH 6/7] =?UTF-8?q?Alternate=20alternate=20approach=20?= =?UTF-8?q?=F0=9F=99=83?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- dashboard/models.py | 15 --------------- dashboard/views.py | 3 +-- 2 files changed, 1 insertion(+), 17 deletions(-) diff --git a/dashboard/models.py b/dashboard/models.py index b8832073a..1aa11ffa9 100644 --- a/dashboard/models.py +++ b/dashboard/models.py @@ -1,8 +1,6 @@ import ast import calendar import datetime -import operator -from functools import reduce import requests from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation @@ -254,17 +252,6 @@ def link(self): return self.urljoin(self.jenkins_root_url, "job", self.build_name) -class DatumQuerySet(models.QuerySet): - def metrics(self, *metrics): - """ - Return only the data from the given metrics. - """ - if not metrics: - return self.none() - qobjs = [models.Q(content_type=m.content_type, object_id=m.pk) for m in metrics] - return self.filter(reduce(operator.or_, qobjs)) - - class Datum(models.Model): metric = GenericForeignKey() content_type = models.ForeignKey( @@ -274,8 +261,6 @@ class Datum(models.Model): timestamp = models.DateTimeField(default=datetime.datetime.now) measurement = models.BigIntegerField() - objects = DatumQuerySet.as_manager() - class Meta: ordering = ["-timestamp"] get_latest_by = "timestamp" diff --git a/dashboard/views.py b/dashboard/views.py index a2a449119..ab859f882 100644 --- a/dashboard/views.py +++ b/dashboard/views.py @@ -25,8 +25,7 @@ def index(request): metrics = sorted(metrics, key=operator.attrgetter("display_position")) metric_latest_querysets = [ - Datum.objects.metrics(metric).order_by("-timestamp")[0:1] - for metric in metrics + metric.data.order_by("-timestamp")[0:1] for metric in metrics ] data_latest = Datum.objects.none().union(*metric_latest_querysets) latest_by_metric = { From 2288df6adf8fa22b47fc48f0ae9adf0760e5aae0 Mon Sep 17 00:00:00 2001 From: Baptiste Mispelon Date: Wed, 11 Dec 2024 22:55:00 +0100 Subject: [PATCH 7/7] Alternate alternate alternate approach (last one I swear) --- dashboard/models.py | 40 ++++++++++++++++-- dashboard/templates/dashboard/index.html | 24 ++++++----- dashboard/tests.py | 52 ++++++++++++++++++++++-- dashboard/views.py | 37 ++++++++--------- 4 files changed, 114 insertions(+), 39 deletions(-) diff --git a/dashboard/models.py b/dashboard/models.py index 1aa11ffa9..11fc65430 100644 --- a/dashboard/models.py +++ b/dashboard/models.py @@ -6,6 +6,7 @@ from django.contrib.contenttypes.fields import GenericForeignKey, GenericRelation from django.contrib.contenttypes.models import ContentType from django.db import connections, models +from django.db.models.functions import JSONObject from django.utils.translation import gettext_lazy as _ from django_hosts.resolvers import reverse @@ -33,6 +34,35 @@ def __str__(self): return self.name +class MetricQuerySet(models.QuerySet): + def with_latest(self): + """ + Annotate the queryset with a `latest` JSON object containing two keys: + * `measurement` (int): the value of the most recent datum for that metric + * `timestamp` (str): the timestamp of the most recent datum + """ + data = Datum.objects.filter( + content_type=self.model.content_type(), + object_id=models.OuterRef("pk"), + ) + jsonobj = JSONObject( + measurement=models.F("measurement"), + timestamp=models.F("timestamp"), + ) + latest = models.Subquery(data.values_list(jsonobj).order_by("-timestamp")[:1]) + + return self.annotate(latest=latest) + + def for_dashboard(self): + """ + Return a queryset optimized for being displayed on the dashboard index + page. + """ + return ( + self.filter(show_on_dashboard=True).select_related("category").with_latest() + ) + + class Metric(models.Model): name = models.CharField(max_length=300) slug = models.SlugField() @@ -49,6 +79,8 @@ class Metric(models.Model): unit = models.CharField(max_length=100) unit_plural = models.CharField(max_length=100) + objects = MetricQuerySet.as_manager() + class Meta: abstract = True @@ -116,13 +148,13 @@ def _gather_data_periodic(self, since, period): AND object_id = %s AND timestamp >= %s GROUP BY 1;""", - [period, OFFSET, self.content_type.id, self.id, since], + [period, OFFSET, self.content_type().id, self.id, since], ) return [(calendar.timegm(t.timetuple()), float(m)) for (t, m) in c.fetchall()] - @property - def content_type(self): - return ContentType.objects.get_for_model(self) + @classmethod + def content_type(cls): + return ContentType.objects.get_for_model(cls) class TracTicketMetric(Metric): diff --git a/dashboard/templates/dashboard/index.html b/dashboard/templates/dashboard/index.html index e20ccb1d3..7aed3f1c1 100644 --- a/dashboard/templates/dashboard/index.html +++ b/dashboard/templates/dashboard/index.html @@ -3,23 +3,25 @@ {% block content %}
- {% for report in data %} - {% ifchanged report.metric.category %} - {% if report.metric.category %}

{{ report.metric.category }}

{% endif %} + {% for metric in data %} + {% ifchanged metric.category %} + {% if metric.category %}

{{ metric.category }}

{% endif %} {% endifchanged %} -
-

{{ report.metric.name }}

+ {% endfor %} -

- {% blocktranslate with timestamp=data.0.latest.timestamp|timesince %}Updated {{ timestamp }} ago.{% endblocktranslate %} -

+ {% if last_updated %} +

+ {% blocktranslate with timestamp=last_updated|timesince %}Updated {{ timestamp }} ago.{% endblocktranslate %} +

+ {% endif %}
{% endblock %} diff --git a/dashboard/tests.py b/dashboard/tests.py index c01ddb24c..102312766 100644 --- a/dashboard/tests.py +++ b/dashboard/tests.py @@ -16,6 +16,7 @@ from .models import ( METRIC_PERIOD_DAILY, METRIC_PERIOD_WEEKLY, + Category, GithubItemCountMetric, GitHubSearchCountMetric, Metric, @@ -33,12 +34,10 @@ def setUp(self): def test_index(self): for MC in Metric.__subclasses__(): for metric in MC.objects.filter(show_on_dashboard=True): - metric.data.create(measurement=44) metric.data.create(measurement=42) request = self.factory.get(reverse("dashboard-index", host="dashboard")) - with self.assertNumQueries(7): - response = index(request) + response = index(request) self.assertContains(response, "Development dashboard") self.assertEqual(response.content.count(b'