Context Navigation

Back to Ticket #14030

Ticket #14030: 14030.patch

File 14030.patch, 19.8 KB (added by Nate Bragg, 13 years ago)

django/db/models/aggregates.py

From c7a74c08def758c62997ba037eccfb8f73ba3efc Mon Sep 17 00:00:00 2001
From: Nate Bragg <jonathan.bragg@alum.rpi.edu>
Date: Thu, 19 Jan 2012 21:01:32 -0500
Subject: [PATCH] An attempt at rebasing out the changes required for
 supporting F expressions in aggregation from the more
 complex patch supporting conditional aggregation for
 #11305.

Additional changes needed to make F expressions usable without
being passed in inside an aggregation function.

Also added some doc, and some tests.
---
 django/db/models/aggregates.py        |    2 +
 django/db/models/sql/aggregates.py    |   20 ++++++-
 django/db/models/sql/compiler.py      |   39 +++++++++-----
 django/db/models/sql/expressions.py   |    3 +
 django/db/models/sql/query.py         |   93 +++++++++++++++++++-------------
 django/db/models/sql/where.py         |   10 ++++
 django/test/testcases.py              |   12 ++++
 docs/ref/models/querysets.txt         |   23 ++++++++
 tests/modeltests/aggregation/tests.py |   31 +++++++++++
 9 files changed, 177 insertions(+), 56 deletions(-)

diff --git a/django/db/models/aggregates.py b/django/db/models/aggregates.py
index a2349cf..61848fe 100644

                class Aggregate(object):
         self.extra = extra
     def _default_alias(self):
+        if hasattr(self.lookup, 'evaluate'):
+             raise ValueError('When aggregating over an expression, you need to give an alias.')
         return '%s__%s' % (self.lookup, self.name.lower())
     default_alias = property(_default_alias)

django/db/models/sql/aggregates.py

diff --git a/django/db/models/sql/aggregates.py b/django/db/models/sql/aggregates.py
index 207bc0c..7e131b9 100644

-              a
 """
 Classes to represent the default SQL aggregate functions
 """
+from django.db.models.sql.expressions import SQLEvaluator
 class AggregateField(object):
     """An internal field mockup used to identify aggregates in the
-…
+               class Aggregate(object):
     is_ordinal = False
     is_computed = False
     sql_template = '%(function)s(%(field)s)'
+    sql_function = ''
     def __init__(self, col, source=None, is_summary=False, **extra):
         """Instantiate an SQL aggregate
-…
+               class Aggregate(object):
                 tmp = computed_aggregate_field
             else:
                 tmp = tmp.source
+        # We don't know the real source of this aggregate, and the
+        # aggregate doesn't define ordinal or computed either. So
+        # we default to computed for these cases.
+        if tmp is None:
+            tmp = computed_aggregate_field
         self.field = tmp
     def relabel_aliases(self, change_map):
         if isinstance(self.col, (list, tuple)):
             self.col = (change_map.get(self.col[0], self.col[0]), self.col[1])
+        else:
+            self.col.relabel_aliases(change_map)
     def as_sql(self, qn, connection):
         "Return the aggregate, rendered as SQL."
+        col_params = []
         if hasattr(self.col, 'as_sql'):
+            field_name = self.col.as_sql(qn, connection)
+            if isinstance(self.col, SQLEvaluator):
+                field_name, col_params = self.col.as_sql(qn, connection)
+            else:
+                field_name = self.col.as_sql(qn, connection)
         elif isinstance(self.col, (list, tuple)):
             field_name = '.'.join([qn(c) for c in self.col])
         else:
-…
+               class Aggregate(object):
+        }
         params.update(self.extra)
         return self.sql_template % params
+        return (self.sql_template % params, col_params)
 class Avg(Aggregate):

django/db/models/sql/compiler.py

diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
index 72948f9..bf3cb25 100644

                class SQLCompiler(object):
         # as the pre_sql_setup will modify query state in a way that forbids
         # another run of it.
         self.refcounts_before = self.query.alias_refcount.copy()
         out_cols = self.get_columns(with_col_aliases)
+        out_cols, c_params = self.get_columns(with_col_aliases)
         ordering, ordering_group_by = self.get_ordering()
         distinct_fields = self.get_distinct()
-…
+               class SQLCompiler(object):
         params = []
         for val in self.query.extra_select.itervalues():
             params.extend(val[1])
+        # Extra-select comes before aggregation in the select list
+        params.extend(c_params)
         result = ['SELECT']
-…
+               class SQLCompiler(object):
         qn = self.quote_name_unless_alias
         qn2 = self.connection.ops.quote_name
         result = ['(%s) AS %s' % (col[0], qn2(alias)) for alias, col in self.query.extra_select.iteritems()]
+        query_params = []
         aliases = set(self.query.extra_select.keys())
         if with_aliases:
             col_aliases = aliases.copy()
-…
+               class SQLCompiler(object):
             aliases.update(new_aliases)
         max_name_length = self.connection.ops.max_name_length()
+        result.extend([
+            '%s%s' % (
+                aggregate.as_sql(qn, self.connection),
+                alias is not None
+                    and ' AS %s' % qn(truncate_name(alias, max_name_length))
+                    or ''
+        for alias, aggregate in self.query.aggregate_select.items():
+            sql, params = aggregate.as_sql(qn, self.connection)
+            result.append(
+                '%s%s' % (
+                    sql,
+                    alias is not None
+                       and ' AS %s' % qn(truncate_name(alias, max_name_length))
+                       or ''
+                )
+            )
+            for alias, aggregate in self.query.aggregate_select.items()
+        ])
+            query_params.extend(params)
         for table, col in self.query.related_select_cols:
             r = '%s.%s' % (qn(table), qn(col))
-…
+               class SQLCompiler(object):
                 col_aliases.add(col)
         self._select_aliases = aliases
         return result
+        return result, query_params
     def get_default_columns(self, with_aliases=False, col_aliases=None,
             start_alias=None, opts=None, as_pairs=False, local_only=False):
-…
+               class SQLAggregateCompiler(SQLCompiler):
         """
         if qn is None:
             qn = self.quote_name_unless_alias
+        buf = []
+        a_params = []
+        for aggregate in self.query.aggregate_select.values():
+            sql, query_params = aggregate.as_sql(qn, self.connection)
+            buf.append(sql)
+            a_params.extend(query_params)
+        aggregate_sql = ', '.join(buf)
         sql = ('SELECT %s FROM (%s) subquery' % (
+            ', '.join([
+                aggregate.as_sql(qn, self.connection)
+                for aggregate in self.query.aggregate_select.values()
+            ]),
+            aggregate_sql,
             self.query.subquery)
+        )
         params = self.query.sub_params
+        params = tuple(a_params) + (self.query.sub_params)
         return (sql, params)
 class SQLDateCompiler(SQLCompiler):

django/db/models/sql/expressions.py

diff --git a/django/db/models/sql/expressions.py b/django/db/models/sql/expressions.py
index 1bbf742..f9c23a9 100644

                class SQLEvaluator(object):
         for child in node.children:
             if hasattr(child, 'evaluate'):
                 sql, params = child.evaluate(self, qn, connection)
+                if isinstance(sql, tuple):
+                    expression_params.extend(sql[1])
+                    sql = sql[0]
             else:
                 sql, params = '%s', (child,)

django/db/models/sql/query.py

diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
index ed2bc06..2c0e973 100644

                from django.utils.encoding import force_unicode
 from django.utils.tree import Node
 from django.db import connections, DEFAULT_DB_ALIAS
 from django.db.models import signals
+from django.db.models.aggregates import Aggregate
 from django.db.models.expressions import ExpressionNode
 from django.db.models.fields import FieldDoesNotExist
 from django.db.models.query_utils import InvalidQuery
-…
+               class Query(object):
         Adds a single aggregate expression to the Query
         """
         opts = model._meta
+        field_list = aggregate.lookup.split(LOOKUP_SEP)
+        if len(field_list) == 1 and aggregate.lookup in self.aggregates:
+            # Aggregate is over an annotation
+            field_name = field_list[0]
+            col = field_name
+            source = self.aggregates[field_name]
+            if not is_summary:
+                raise FieldError("Cannot compute %s('%s'): '%s' is an aggregate" % (
+                    aggregate.name, field_name, field_name))
+        elif ((len(field_list) > 1) or
+            (field_list[0] not in [i.name for i in opts.fields]) or
+            self.group_by is None or
+            not is_summary):
+            # If:
+            #   - the field descriptor has more than one part (foo__bar), or
+            #   - the field descriptor is referencing an m2m/m2o field, or
+            #   - this is a reference to a model field (possibly inherited), or
+            #   - this is an annotation over a model field
+            # then we need to explore the joins that are required.
+            field, source, opts, join_list, last, _ = self.setup_joins(
+                field_list, opts, self.get_initial_alias(), False)
+            # Process the join chain to see if it can be trimmed
+            col, _, join_list = self.trim_joins(source, join_list, last, False)
+            # If the aggregate references a model or field that requires a join,
+            # those joins must be LEFT OUTER - empty join rows must be returned
+            # in order for zeros to be returned for those aggregates.
+            for column_alias in join_list:
+                self.promote_alias(column_alias, unconditional=True)
+            col = (join_list[-1], col)
+        if hasattr(aggregate, 'evaluate'):
+            # If aggregate is a query expression, make it an aggregate
+            # This is a 'cheat' to make an empty aggregate - i.e.,
+            # one that has no attached function.  This is because
+            # no computation needs to be done outside that which the
+            # F expression represents
+            aggregate = Aggregate(aggregate)
+            aggregate.name = 'Aggregate'
+        if hasattr(aggregate.lookup, 'evaluate'):
+            # If lookup is a query expression, evaluate it
+            col = SQLEvaluator(aggregate.lookup, self)
+            # TODO: find out the real source of this field. If any field has
+            # is_computed, then source can be set to is_computed.
+            source = None
         else:
+            # The simplest cases. No joins required -
+            # just reference the provided column alias.
+            field_name = field_list[0]
+            source = opts.get_field(field_name)
+            col = field_name
+            field_list = aggregate.lookup.split(LOOKUP_SEP)
+            join_list = []
+            if len(field_list) == 1 and aggregate.lookup in self.aggregates:
+                # Aggregate is over an annotation
+                field_name = field_list[0]
+                col = field_name
+                source = self.aggregates[field_name]
+                if not is_summary:
+                    raise FieldError("Cannot compute %s('%s'): '%s' is an aggregate" % (
+                        aggregate.name, field_name, field_name))
+            elif ((len(field_list) > 1) or
+                (field_list[0] not in [i.name for i in opts.fields]) or
+                self.group_by is None or
+                not is_summary):
+                # If:
+                #   - the field descriptor has more than one part (foo__bar), or
+                #   - the field descriptor is referencing an m2m/m2o field, or
+                #   - this is a reference to a model field (possibly inherited), or
+                #   - this is an annotation over a model field
+                # then we need to explore the joins that are required.
+                field, source, opts, join_list, last, _ = self.setup_joins(
+                    field_list, opts, self.get_initial_alias(), False)
+                # Process the join chain to see if it can be trimmed
+                col, _, join_list = self.trim_joins(source, join_list, last, False)
+                # If the aggregate references a model or field that requires a join,
+                # those joins must be LEFT OUTER - empty join rows must be returned
+                # in order for zeros to be returned for those aggregates.
+                for column_alias in join_list:
+                    self.promote_alias(column_alias, unconditional=True)
+                col = (join_list[-1], col)
+            else:
+                # The simplest cases. No joins required -
+                # just reference the provided column alias.
+                field_name = field_list[0]
+                source = opts.get_field(field_name)
+                col = field_name
         # Add the aggregate to the query
         aggregate.add_to_query(self, alias, col=col, source=source, is_summary=is_summary)

django/db/models/sql/where.py

diff --git a/django/db/models/sql/where.py b/django/db/models/sql/where.py
index 1455ba6..8b530bd 100644

                class WhereNode(tree.Node):
         it.
         """
         lvalue, lookup_type, value_annot, params_or_value = child
+        additional_params = []
         if hasattr(lvalue, 'process'):
             try:
                 lvalue, params = lvalue.process(lookup_type, params_or_value, connection)
-…
+               class WhereNode(tree.Node):
         else:
             # A smart object with an as_sql() method.
             field_sql = lvalue.as_sql(qn, connection)
+            if isinstance(field_sql, tuple):
+                # It returned also params
+                additional_params.extend(field_sql[1])
+                field_sql = field_sql[0]
         if value_annot is datetime.datetime:
             cast_sql = connection.ops.datetime_cast_sql()
-…
+               class WhereNode(tree.Node):
         if hasattr(params, 'as_sql'):
             extra, params = params.as_sql(qn, connection)
+            if isinstance(extra, tuple):
+                params = params + tuple(extra[1])
+                extra = extra[0]
             cast_sql = ''
         else:
             extra = ''
-…
+               class WhereNode(tree.Node):
             lookup_type = 'isnull'
             value_annot = True
+        additional_params.extend(params)
+        params = additional_params
         if lookup_type in connection.operators:
             format = "%s %%s %%s" % (connection.ops.lookup_cast(lookup_type),)
             return (format % (field_sql,

django/test/testcases.py

diff --git a/django/test/testcases.py b/django/test/testcases.py
index 53ea02a..ba4f496 100644

                class TransactionTestCase(SimpleTestCase):
             return self.assertEqual(set(map(transform, qs)), set(values))
         return self.assertEqual(map(transform, qs), values)
+    def assertQuerysetAlmostEqual(self, qs, values, transform=repr, ordered=True, places=7):
+        # This could have been done with iterating zip(map(transform, qs), values),
+        # checking each with assertAlmostEqual, which rounds the difference of each
+        # pair, but this way you get much nicer error messages, and you can have an
+        # unordered comparison, at the cost of a half a digit of accuracy.
+        round_to = lambda v: round(v,places)
+        tqs = map(round_to, map(transform, qs) )
+        tvs = map(round_to, values)
+        if not ordered:
+            return self.assertEqual(set(tqs), set(tvs))
+        return self.assertEqual(tqs, tvs)
     def assertNumQueries(self, num, func=None, *args, **kwargs):
         using = kwargs.pop("using", DEFAULT_DB_ALIAS)
         conn = connections[using]

docs/ref/models/querysets.txt

diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
index 7633555..d175f44 100644

                control the name of the annotation::
     >>> q[0].number_of_entries
+In addition to aggregation functions, `:ref:`F() objects <query-expressions>`
+can be used to perform a specific mathematical operation::
+    # The 1.0 is to force float conversion
+    >>> q = Entry.objects.annotate(cpb_ratio=F('n_comments')*1.0/F('n_pingbacks'))
+    # The ratio of comments to pingbacks for the first blog entry
+    >>> q[0].cpb_ratio
+.0625
 For an in-depth discussion of aggregation, see :doc:`the topic guide on
 Aggregation </topics/db/aggregation>`.
-…
+               control the name of the aggregation value that is returned::
     >>> q = Blog.objects.aggregate(number_of_entries=Count('entry'))
     {'number_of_entries': 16}
+Inside aggregation functions, `:ref:`F() objects <query-expressions>`
+can be used to perform a specific mathematical operation::
+    # The 1.0 is to force float conversion
+    >>> q = Entry.objects.aggregate(avg_cpb_ratio=Avg(F('n_comments')*1.0/F('n_pingbacks')))
+    {'avg_cpb_ratio': 0.125}
 For an in-depth discussion of aggregation, see :doc:`the topic guide on
 Aggregation </topics/db/aggregation>`.
-…
+               Django provides the following aggregation functions in the
 aggregate functions, see
 :doc:`the topic guide on aggregation </topics/db/aggregation>`.
+Note that in addition to taking a named field, aggregation
+functions can take `:ref:`F() objects <query-expressions>`.
+.. admonition:: Default aliases
+    When using ``F()`` objects, note that there is no default alias.
 Avg
 ~~~

tests/modeltests/aggregation/tests.py

diff --git a/tests/modeltests/aggregation/tests.py b/tests/modeltests/aggregation/tests.py
index a35dbb3..a5d3a4e 100644

                import datetime
 from decimal import Decimal
 from django.db.models import Avg, Sum, Count, Max, Min
+from django.db.models import F
 from django.test import TestCase, Approximate
 from .models import Author, Publisher, Book, Store
-…
+               class BaseAggregateTestCase(TestCase):
         self.assertEqual(len(vals), 1)
         self.assertAlmostEqual(vals["amazon_mean"], 4.08, places=2)
+    def test_aggregate_f_expression(self):
+        vals = Book.objects.all().aggregate(price_per_page=Avg(F('price')*1.0/F('pages')))
+        self.assertEqual(len(vals), 1)
+        self.assertAlmostEqual(vals["price_per_page"], 0.0745110754864109, places=2)
+    def test_annotate_f_expression(self):
+        self.assertQuerysetAlmostEqual(
+            Book.objects.all().annotate(price_per_page=F('price')*1.0/F('pages')), [
+.0671140939597315,
+.0437310606060606,
+.0989666666666667,
+.0848285714285714,
+.0731448763250883,
+.0792811839323467,
+            ],
+            lambda b: b.price_per_page,
+            places=4
+        )
+        self.assertQuerysetAlmostEqual(
+            Publisher.objects.all().annotate(price_per_page=Avg(F('book__price')*1.0/F('book__pages'))), [
+.0830403803131991,
+.0437310606060606,
+.0789867238768299,
+.0792811839323467,
+            ],
+            lambda p: p.price_per_page,
+            places=4
+        )
     def test_annotate_basic(self):
         self.assertQuerysetEqual(
             Book.objects.annotate().order_by('pk'), [

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #14030: 14030.patch

django/db/models/aggregates.py

django/db/models/sql/aggregates.py

django/db/models/sql/compiler.py

django/db/models/sql/expressions.py

django/db/models/sql/query.py

django/db/models/sql/where.py

django/test/testcases.py

docs/ref/models/querysets.txt

tests/modeltests/aggregation/tests.py

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us