Context Navigation

Back to Ticket #6422

Ticket #6422: distinct_on.diff

File distinct_on.diff, 12.8 KB (added by Jeffrey Gelens, 13 years ago)

AUTHORS

diff --git a/AUTHORS b/AUTHORS

-              a
     ye7cakf02@sneakemail.com
     ymasuda@ethercube.com
     Jesse Young <adunar@gmail.com>
     Mykola Zamkovoi <nickzam@gmail.com>
     zegor
     Gasper Zejn <zejn@kiberpipa.org>
     Jarek Zgoda <jarek.zgoda@gmail.com>
     Cheng Zhang
+    Jeffrey Gelens <jeffrey@gelens.org>
 A big THANK YOU goes to:
     Rob Curley and Ralph Gage for letting us open-source Django.
     Frank Wiles for making excellent arguments for open-sourcing, and for
     his sage sysadmin advice.

django/db/backends/init.py

diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py

-              a
     # Features that need to be confirmed at runtime
     # Cache whether the confirmation has been performed.
     _confirmed = False
     supports_transactions = None
     supports_stddev = None
     can_introspect_foreign_keys = None
+    # Support for the DISTINCT ON clause
+    can_distinct_on_fields = False
     def __init__(self, connection):
         self.connection = connection
     def confirm(self):
         "Perform manual checks of any database features that might vary between installs"
         self._confirmed = True
         self.supports_transactions = self._supports_transactions()
         self.supports_stddev = self._supports_stddev()
 …
     def fulltext_search_sql(self, field_name):
         """
         Returns the SQL WHERE clause to use in order to perform a full-text
         search of the given field_name. Note that the resulting string should
         contain a '%s' placeholder for the value being searched against.
         """
         raise NotImplementedError('Full-text search is not implemented for this database backend')
+    def distinct(self, db_table, fields):
+        """
+        Returns an SQL DISTINCT clause which removes duplicate rows from the
+        result set. If any fields are given, only the given fields are being
+        checked for duplicates.
+        """
+        if fields:
+            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
+        else:
+            return 'DISTINCT'
     def last_executed_query(self, cursor, sql, params):
         """
         Returns a string of the query last executed by the given cursor, with
         placeholders replaced with actual values.
         `sql` is the raw query containing placeholders, and `params` is the
         sequence of parameters. These are used by default, but this method
         exists for database backends to provide a better implementation

django/db/backends/postgresql_psycopg2/base.py

diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py

-              a
 class DatabaseFeatures(BaseDatabaseFeatures):
     needs_datetime_string_cast = False
     can_return_id_from_insert = True
     requires_rollback_on_dirty_transaction = True
     has_real_datatype = True
     can_defer_constraint_checks = True
     has_select_for_update = True
     has_select_for_update_nowait = True
+    can_distinct_on_fields = True
 class DatabaseWrapper(BaseDatabaseWrapper):
     vendor = 'postgresql'
     operators = {
         'exact': '= %s',
         'iexact': '= UPPER(%s)',
         'contains': 'LIKE %s',

django/db/backends/postgresql_psycopg2/operations.py

diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py

-              a
         macro in src/include/pg_config_manual.h .
         This implementation simply returns 63, but can easily be overridden by a
         custom database backend that inherits most of its behavior from this one.
         """
         return 63
+    def distinct(self, db_table, fields):
+        if fields:
+            table_name = self.quote_name(db_table)
+            fields = [table_name + "." + self.quote_name(field) for field in fields]
+            return 'DISTINCT ON (%s)' % ', '.join(fields)
+        else:
+            return 'DISTINCT'
     def last_executed_query(self, cursor, sql, params):
         # http://initd.org/psycopg/docs/cursor.html#cursor.query
         # The query attribute is a Psycopg extension to the DB API 2.0.
         return cursor.query
     def return_insert_id(self):
         return "RETURNING %s", ()

django/db/models/query.py

diff --git a/django/db/models/query.py b/django/db/models/query.py

-              a
         """
         assert self.query.can_filter(), \
                 "Cannot reorder a query once a slice has been taken."
         obj = self._clone()
         obj.query.clear_ordering()
         obj.query.add_ordering(*field_names)
         return obj
     def distinct(self, true_or_false=True):
+    def distinct(self, *field_names):
         """
         Returns a new QuerySet instance that will select only distinct results.
         """
         obj = self._clone()
+        obj.query.distinct = true_or_false
+        obj.query.add_distinct_fields(field_names)
+        obj.query.distinct = True
         return obj
     def extra(self, select=None, where=None, params=None, tables=None,
               order_by=None, select_params=None):
         """
         Adds extra SQL fragments to the query.
         """
         assert self.query.can_filter(), \
 …
         return self
     def order_by(self, *field_names):
         """
         Always returns EmptyQuerySet.
         """
         return self
     def distinct(self, true_or_false=True):
+    def distinct(self, fields=None):
         """
         Always returns EmptyQuerySet.
         """
         return self
     def extra(self, select=None, where=None, params=None, tables=None,
               order_by=None, select_params=None):
         """

django/db/models/sql/compiler.py

diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py

-              a
         where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection)
         having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection)
         params = []
         for val in self.query.extra_select.itervalues():
             params.extend(val[1])
         result = ['SELECT']
         if self.query.distinct:
+            result.append('DISTINCT')
+            distinct_sql = self.connection.ops.distinct(
+                self.query.model._meta.db_table, self.query.distinct_fields)
+            result.append(distinct_sql)
         result.append(', '.join(out_cols + self.query.ordering_aliases))
         result.append('FROM')
         result.extend(from_)
         params.extend(f_params)
         if where:
             result.append('WHERE %s' % where)

django/db/models/sql/query.py

diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py

-              a
         self.tables = []    # Aliases in the order they are created.
         self.where = where()
         self.where_class = where
         self.group_by = None
         self.having = where()
         self.order_by = []
         self.low_mark, self.high_mark = 0, None  # Used for offset/limit
         self.distinct = False
+        self.distinct_fields = None
         self.select_for_update = False
         self.select_for_update_nowait = False
         self.select_related = False
         self.related_select_cols = []
         # SQL aggregate-related attributes
         self.aggregates = SortedDict() # Maps alias -> SQL aggregate function
         self.aggregate_select_mask = None
 …
         if self.group_by is None:
             obj.group_by = None
         else:
             obj.group_by = self.group_by[:]
         obj.having = copy.deepcopy(self.having, memo=memo)
         obj.order_by = self.order_by[:]
         obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
         obj.distinct = self.distinct
+        obj.distinct_fields = self.distinct_fields
         obj.select_for_update = self.select_for_update
         obj.select_for_update_nowait = self.select_for_update_nowait
         obj.select_related = self.select_related
         obj.related_select_cols = []
         obj.aggregates = copy.deepcopy(self.aggregates, memo=memo)
         if self.aggregate_select_mask is None:
             obj.aggregate_select_mask = None
         else:
 …
             in zip(query.aggregate_select.items(), result)
         ])
     def get_count(self, using):
         """
         Performs a COUNT() query using the current filter constraints.
         """
         obj = self.clone()
         if len(self.select) > 1 or self.aggregate_select:
+        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
             # If a select clause exists, then the query has already started to
             # specify the columns that are to be returned.
             # In this case, we need to use a subquery to evaluate the count.
             from django.db.models.sql.subqueries import AggregateQuery
             subquery = obj
             subquery.clear_ordering(True)
             subquery.clear_limits()
 …
         """
         Clears the list of fields to select (but not extra_select columns).
         Some queryset types completely replace any existing list of select
         columns.
         """
         self.select = []
         self.select_fields = []
+    def add_distinct_fields(self, field_names):
+        self.distinct_fields = []
+        options = self.get_meta()
+        for name in field_names:
+            field, source, opts, join_list, last, _ = self.setup_joins(
+                name.split(LOOKUP_SEP), options, self.get_initial_alias(), False)
+            self.distinct_fields.append(field.column)
     def add_fields(self, field_names, allow_m2m=True):
         """
         Adds the given (model) fields to the select set. The field names are
         added in the order specified.
         """
         alias = self.get_initial_alias()
         opts = self.get_meta()

docs/ref/models/querysets.txt

diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt

-              a
     introspection:
     .. attribute:: ordered
         ``True`` if the ``QuerySet`` is ordered -- i.e. has an order_by()
         clause or a default ordering on the model. ``False`` otherwise.
     .. attribute:: db
         The database that will be used if this query is executed now.
     .. note::
         The ``query`` parameter to :class:`QuerySet` exists so that specialized
         query subclasses such as
         :class:`~django.contrib.gis.db.models.GeoQuerySet` can reconstruct
         internal query state. The value of the parameter is an opaque
 …
 ``order_by()``). If no such ordering is defined for a given
 ``QuerySet``, calling ``reverse()`` on it has no real effect (the
 ordering was undefined prior to calling ``reverse()``, and will remain
 undefined afterward).
 distinct
 ~~~~~~~~
 .. method:: distinct()
+.. method:: distinct(*fields)
 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
 eliminates duplicate rows from the query results.
 By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
 is rarely a problem, because simple queries such as ``Blog.objects.all()``
 don't introduce the possibility of duplicate result rows. However, if your
 query spans multiple tables, it's possible to get duplicate results when a
 ``QuerySet`` is evaluated. That's when you'd use ``distinct()``.
+.. versionadded:: 1.4
+   ``distinct()`` takes optional positional arguments ``*fields``, which specify
+   field names to which the ``DISTINCT`` should be limited. This translates to
+   a ``SELECT DISTINCT ON`` SQL query. Note that this ``DISTINCT ON`` query is
+   only available in PostgreSQL.
+.. note::
+    When optional ``*fields`` are given, you will have to add an :meth:`order_by`
+    call with the same field names as the leftmost arguments.
 .. note::
     Any fields used in an :meth:`order_by` call are included in the SQL
     ``SELECT`` columns. This can sometimes lead to unexpected results when
     used in conjunction with ``distinct()``. If you order by fields from a
     related model, those fields will be added to the selected columns and they
     may make otherwise duplicate rows appear to be distinct. Since the extra
     columns don't appear in the returned results (they are only there to
     support ordering), it sometimes looks like non-distinct results are being

Download in other formats:

Original Format

Issues

Context Navigation

Ticket #6422: distinct_on.diff

AUTHORS

django/db/backends/init.py

django/db/backends/postgresql_psycopg2/base.py

django/db/backends/postgresql_psycopg2/operations.py

django/db/models/query.py

django/db/models/sql/compiler.py

django/db/models/sql/query.py

docs/ref/models/querysets.txt

Download in other formats:

Django Links

Learn More

Get Involved

Get Help

Follow Us

Support Us