Ticket #6422: distinct_on.9.diff

File distinct_on.9.diff, 19.5 KB (added by Jeffrey Gelens, 13 years ago)

Added docstring

  • AUTHORS

    diff --git a/AUTHORS b/AUTHORS
    a b  
    198198    Vincent Foley <vfoleybourgon@yahoo.ca>
    199199    Alcides Fonseca
    200200    Rudolph Froger <rfroger@estrate.nl>
    201201    Jorge Gajon <gajon@gajon.org>
    202202    gandalf@owca.info
    203203    Marc Garcia <marc.garcia@accopensys.com>
    204204    Andy Gayton <andy-django@thecablelounge.com>
    205205    geber@datacollect.com
     206    Jeffrey Gelens <jeffrey@gelens.org>
    206207    Baishampayan Ghose
    207208    Joshua Ginsberg <jag@flowtheory.net>
    208209    Dimitris Glezos <dimitris@glezos.com>
    209210    glin@seznam.cz
    210211    martin.glueck@gmail.com
    211212    Artyom Gnilov <boobsd@gmail.com>
    212213    Ben Godfrey <http://aftnn.org>
    213214    GomoX <gomo@datafull.com>
  • django/db/backends/__init__.py

    diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
    a b  
    372372
    373373    # Features that need to be confirmed at runtime
    374374    # Cache whether the confirmation has been performed.
    375375    _confirmed = False
    376376    supports_transactions = None
    377377    supports_stddev = None
    378378    can_introspect_foreign_keys = None
    379379
     380    # Support for the DISTINCT ON clause
     381    can_distinct_on_fields = False
     382
    380383    def __init__(self, connection):
    381384        self.connection = connection
    382385
    383386    def confirm(self):
    384387        "Perform manual checks of any database features that might vary between installs"
    385388        self._confirmed = True
    386389        self.supports_transactions = self._supports_transactions()
    387390        self.supports_stddev = self._supports_stddev()
     
    525528    def fulltext_search_sql(self, field_name):
    526529        """
    527530        Returns the SQL WHERE clause to use in order to perform a full-text
    528531        search of the given field_name. Note that the resulting string should
    529532        contain a '%s' placeholder for the value being searched against.
    530533        """
    531534        raise NotImplementedError('Full-text search is not implemented for this database backend')
    532535
     536    def distinct(self, fields):
     537        """
     538        Returns an SQL DISTINCT clause which removes duplicate rows from the
     539        result set. If any fields are given, only the given fields are being
     540        checked for duplicates.
     541        """
     542        if fields:
     543            raise NotImplementedError('DISTINCT ON fields is not supported by this database backend')
     544        else:
     545            return 'DISTINCT'
     546
    533547    def last_executed_query(self, cursor, sql, params):
    534548        """
    535549        Returns a string of the query last executed by the given cursor, with
    536550        placeholders replaced with actual values.
    537551
    538552        `sql` is the raw query containing placeholders, and `params` is the
    539553        sequence of parameters. These are used by default, but this method
    540554        exists for database backends to provide a better implementation
  • django/db/backends/postgresql_psycopg2/base.py

    diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
    a b  
    7777    can_return_id_from_insert = True
    7878    requires_rollback_on_dirty_transaction = True
    7979    has_real_datatype = True
    8080    can_defer_constraint_checks = True
    8181    has_select_for_update = True
    8282    has_select_for_update_nowait = True
    8383    has_bulk_insert = True
    8484    supports_tablespaces = True
     85    can_distinct_on_fields = True
    8586
    8687class DatabaseWrapper(BaseDatabaseWrapper):
    8788    vendor = 'postgresql'
    8889    operators = {
    8990        'exact': '= %s',
    9091        'iexact': '= UPPER(%s)',
    9192        'contains': 'LIKE %s',
    9293        'icontains': 'LIKE UPPER(%s)',
  • django/db/backends/postgresql_psycopg2/operations.py

    diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
    a b  
    174174        macro in src/include/pg_config_manual.h .
    175175
    176176        This implementation simply returns 63, but can easily be overridden by a
    177177        custom database backend that inherits most of its behavior from this one.
    178178        """
    179179
    180180        return 63
    181181
     182    def distinct(self, fields):
     183        if fields:
     184            fields_sql = []
     185
     186            for field in fields:
     187                fields_sql.append(
     188                    self.quote_name(field.model._meta.db_table) + "." + \
     189                    self.quote_name(field.column)
     190                )
     191
     192            return 'DISTINCT ON (%s)' % ', '.join(fields_sql)
     193        else:
     194            return 'DISTINCT'
     195
    182196    def last_executed_query(self, cursor, sql, params):
    183197        # http://initd.org/psycopg/docs/cursor.html#cursor.query
    184198        # The query attribute is a Psycopg extension to the DB API 2.0.
    185199        return cursor.query
    186200
    187201    def return_insert_id(self):
    188202        return "RETURNING %s", ()
    189203
  • django/db/models/query.py

    diff --git a/django/db/models/query.py b/django/db/models/query.py
    a b  
    733733        """
    734734        assert self.query.can_filter(), \
    735735                "Cannot reorder a query once a slice has been taken."
    736736        obj = self._clone()
    737737        obj.query.clear_ordering()
    738738        obj.query.add_ordering(*field_names)
    739739        return obj
    740740
    741     def distinct(self, true_or_false=True):
     741    def distinct(self, *field_names):
    742742        """
    743743        Returns a new QuerySet instance that will select only distinct results.
    744744        """
    745745        obj = self._clone()
    746         obj.query.distinct = true_or_false
     746        obj.query.add_distinct_fields(field_names)
     747        obj.query.distinct = True
     748
    747749        return obj
    748750
    749751    def extra(self, select=None, where=None, params=None, tables=None,
    750752              order_by=None, select_params=None):
    751753        """
    752754        Adds extra SQL fragments to the query.
    753755        """
    754756        assert self.query.can_filter(), \
     
    11611163        return self
    11621164
    11631165    def order_by(self, *field_names):
    11641166        """
    11651167        Always returns EmptyQuerySet.
    11661168        """
    11671169        return self
    11681170
    1169     def distinct(self, true_or_false=True):
     1171    def distinct(self, fields=None):
    11701172        """
    11711173        Always returns EmptyQuerySet.
    11721174        """
    11731175        return self
    11741176
    11751177    def extra(self, select=None, where=None, params=None, tables=None,
    11761178              order_by=None, select_params=None):
    11771179        """
  • django/db/models/sql/compiler.py

    diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
    a b  
    7171
    7272        where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection)
    7373        having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection)
    7474        params = []
    7575        for val in self.query.extra_select.itervalues():
    7676            params.extend(val[1])
    7777
    7878        result = ['SELECT']
     79
    7980        if self.query.distinct:
    80             result.append('DISTINCT')
     81            result.append(self.connection.ops.distinct(self.query.distinct_fields))
     82
    8183        result.append(', '.join(out_cols + self.query.ordering_aliases))
    8284
    8385        result.append('FROM')
    8486        result.extend(from_)
    8587        params.extend(f_params)
    8688
    8789        if where:
    8890            result.append('WHERE %s' % where)
  • django/db/models/sql/query.py

    diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
    a b  
    122122        self.tables = []    # Aliases in the order they are created.
    123123        self.where = where()
    124124        self.where_class = where
    125125        self.group_by = None
    126126        self.having = where()
    127127        self.order_by = []
    128128        self.low_mark, self.high_mark = 0, None  # Used for offset/limit
    129129        self.distinct = False
     130        self.distinct_fields = None
    130131        self.select_for_update = False
    131132        self.select_for_update_nowait = False
    132133        self.select_related = False
    133134        self.related_select_cols = []
    134135
    135136        # SQL aggregate-related attributes
    136137        self.aggregates = SortedDict() # Maps alias -> SQL aggregate function
    137138        self.aggregate_select_mask = None
     
    260261        if self.group_by is None:
    261262            obj.group_by = None
    262263        else:
    263264            obj.group_by = self.group_by[:]
    264265        obj.having = copy.deepcopy(self.having, memo=memo)
    265266        obj.order_by = self.order_by[:]
    266267        obj.low_mark, obj.high_mark = self.low_mark, self.high_mark
    267268        obj.distinct = self.distinct
     269        obj.distinct_fields = self.distinct_fields
    268270        obj.select_for_update = self.select_for_update
    269271        obj.select_for_update_nowait = self.select_for_update_nowait
    270272        obj.select_related = self.select_related
    271273        obj.related_select_cols = []
    272274        obj.aggregates = copy.deepcopy(self.aggregates, memo=memo)
    273275        if self.aggregate_select_mask is None:
    274276            obj.aggregate_select_mask = None
    275277        else:
     
    388390            in zip(query.aggregate_select.items(), result)
    389391        ])
    390392
    391393    def get_count(self, using):
    392394        """
    393395        Performs a COUNT() query using the current filter constraints.
    394396        """
    395397        obj = self.clone()
    396         if len(self.select) > 1 or self.aggregate_select:
     398        if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields):
    397399            # If a select clause exists, then the query has already started to
    398400            # specify the columns that are to be returned.
    399401            # In this case, we need to use a subquery to evaluate the count.
    400402            from django.db.models.sql.subqueries import AggregateQuery
    401403            subquery = obj
    402404            subquery.clear_ordering(True)
    403405            subquery.clear_limits()
    404406
     
    15911593        """
    15921594        Clears the list of fields to select (but not extra_select columns).
    15931595        Some queryset types completely replace any existing list of select
    15941596        columns.
    15951597        """
    15961598        self.select = []
    15971599        self.select_fields = []
    15981600
     1601    def add_distinct_fields(self, field_names):
     1602        """
     1603        Adds and resolves the given fields to the query's "distinct on" clause.
     1604        """
     1605        self.distinct_fields = []
     1606        options = self.get_meta()
     1607
     1608        for name in field_names:
     1609            field, source, opts, join_list, last, _ = self.setup_joins(
     1610                name.split(LOOKUP_SEP), options, self.get_initial_alias(), False)
     1611            self.distinct_fields.append(field)
     1612
    15991613    def add_fields(self, field_names, allow_m2m=True):
    16001614        """
    16011615        Adds the given (model) fields to the select set. The field names are
    16021616        added in the order specified.
    16031617        """
    16041618        alias = self.get_initial_alias()
    16051619        opts = self.get_meta()
    16061620
  • docs/ref/models/querysets.txt

    diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
    a b  
    340340a default ordering, or when using :meth:`order_by()`). If no such ordering is
    341341defined for a given ``QuerySet``, calling ``reverse()`` on it has no real
    342342effect (the ordering was undefined prior to calling ``reverse()``, and will
    343343remain undefined afterward).
    344344
    345345distinct
    346346~~~~~~~~
    347347
    348 .. method:: distinct()
     348.. method:: distinct([*fields])
    349349
    350350Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This
    351351eliminates duplicate rows from the query results.
    352352
    353353By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this
    354354is rarely a problem, because simple queries such as ``Blog.objects.all()``
    355355don't introduce the possibility of duplicate result rows. However, if your
    356356query spans multiple tables, it's possible to get duplicate results when a
     
    369369    selected, the columns used in any :meth:`order_by()` (or default model
    370370    ordering) will still be involved and may affect uniqueness of the results.
    371371
    372372    The moral here is that if you are using ``distinct()`` be careful about
    373373    ordering by related models. Similarly, when using ``distinct()`` and
    374374    :meth:`values()` together, be careful when ordering by fields not in the
    375375    :meth:`values()` call.
    376376
     377.. versionadded:: 1.4
     378
     379The possibility to pass positional arguments (``*fields``) is new in Django 1.4.
     380They are names of fields to which the ``DISTINCT`` should be limited. This
     381translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates
     382duplicate rows not by comparing all fields in a row, but by comparing only the given
     383fields.
     384
     385.. note::
     386    Note that the ability to specify field names is only available in PostgreSQL.
     387
     388.. note::
     389    When using the ``DISTINCT ON`` functionality it is required that the columns given
     390    to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT
     391    DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you
     392    don't specify an order, then you'll get some arbitrary row.
     393
     394Examples::
     395
     396    >>> Author.objects.distinct()
     397    [...]
     398
     399    >>> Entry.objects.order_by('pub_date').distinct('pub_date')
     400    [...]
     401
     402    >>> Entry.objects.order_by('blog').distinct('blog')
     403    [...]
     404
     405    >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date')
     406    [...]
     407
     408    >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date')
     409    [...]
     410
     411    >>> Entry.objects.order_by('author', 'pub_date').distinct('author')
     412    [...]
     413
    377414values
    378415~~~~~~
    379416
    380417.. method:: values(*fields)
    381418
    382419Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns
    383420dictionaries when used as an iterable, rather than model-instance objects.
    384421
  • tests/regressiontests/queries/models.py

    diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
    a b  
    204204
    205205# An inter-related setup with a model subclass that has a nullable
    206206# path to another model, and a return path from that model.
    207207
    208208class Celebrity(models.Model):
    209209    name = models.CharField("Name", max_length=20)
    210210    greatest_fan = models.ForeignKey("Fan", null=True, unique=True)
    211211
     212    def __unicode__(self):
     213        return self.name
     214
    212215class TvChef(Celebrity):
    213216    pass
    214217
    215218class Fan(models.Model):
    216219    fan_of = models.ForeignKey(Celebrity)
    217220
    218221# Multiple foreign keys
    219222class LeafA(models.Model):
     
    339342
    340343class OneToOneCategory(models.Model):
    341344    new_name = models.CharField(max_length=15)
    342345    category = models.OneToOneField(SimpleCategory)
    343346
    344347    def __unicode__(self):
    345348        return "one2one " + self.new_name
    346349
     350class Staff(models.Model):
     351    name = models.CharField(max_length=50)
     352    organisation = models.CharField(max_length=100)
     353    tags = models.ManyToManyField(Tag, through='StaffTag')
     354
     355    def __unicode__(self):
     356        return self.name
     357
     358class StaffTag(models.Model):
     359    staff = models.ForeignKey(Staff)
     360    tag = models.ForeignKey(Tag)
     361
     362    def __unicode__(self):
     363        return u"%s -> %s" % (self.tag, self.staff)
  • tests/regressiontests/queries/tests.py

    diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
    a b  
    1313from django.utils import unittest
    1414from django.utils.datastructures import SortedDict
    1515
    1616from .models import (Annotation, Article, Author, Celebrity, Child, Cover,
    1717    Detail, DumbCategory, ExtraInfo, Fan, Item, LeafA, LoopX, LoopZ,
    1818    ManagedModel, Member, NamedCategory, Note, Number, Plaything, PointerA,
    1919    Ranking, Related, Report, ReservedName, Tag, TvChef, Valid, X, Food, Eaten,
    2020    Node, ObjectA, ObjectB, ObjectC, CategoryItem, SimpleCategory,
    21     SpecialCategory, OneToOneCategory)
     21    SpecialCategory, OneToOneCategory, Staff, StaffTag)
    2222
    2323
    2424class BaseQuerysetTest(TestCase):
    2525    def assertValueQuerysetEqual(self, qs, values):
    2626        return self.assertQuerysetEqual(qs, values, transform=lambda x: x)
    2727
    2828
    2929class Queries1Tests(BaseQuerysetTest):
     
    17341734    def setUp(self):
    17351735        generic = NamedCategory.objects.create(name="Generic")
    17361736        t1 = Tag.objects.create(name='t1', category=generic)
    17371737        t2 = Tag.objects.create(name='t2', parent=t1, category=generic)
    17381738        t3 = Tag.objects.create(name='t3', parent=t1)
    17391739        t4 = Tag.objects.create(name='t4', parent=t3)
    17401740        t5 = Tag.objects.create(name='t5', parent=t3)
    17411741
     1742        p1_o1 = Staff.objects.create(name="p1", organisation="o1")
     1743        p2_o1 = Staff.objects.create(name="p2", organisation="o1")
     1744        p3_o1 = Staff.objects.create(name="p3", organisation="o1")
     1745        p1_o2 = Staff.objects.create(name="p1", organisation="o2")
     1746
     1747        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1748        StaffTag.objects.create(staff=p1_o1, tag=t1)
     1749
     1750        celeb1 = Celebrity.objects.create(name="c1")
     1751        celeb2 = Celebrity.objects.create(name="c2")
     1752
     1753        self.fan1 = Fan.objects.create(fan_of=celeb1)
     1754        self.fan2 = Fan.objects.create(fan_of=celeb1)
     1755        self.fan3 = Fan.objects.create(fan_of=celeb2)
     1756
    17421757    # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed
    17431758    # (Python issue 1242657), so these cases return an empty list, rather than
    17441759    # raising an exception. Not a lot we can do about that, unfortunately, due to
    17451760    # the way Python handles list() calls internally. Thus, we skip the tests for
    17461761    # Python 2.6.
    17471762    @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6")
    17481763    def test_infinite_loop(self):
    17491764        # If you're not careful, it's possible to introduce infinite loops via
     
    18051820            Number.objects.filter(num__in=numbers[:2000]).count(),
    18061821            2000
    18071822        )
    18081823        self.assertEqual(
    18091824            Number.objects.filter(num__in=numbers).count(),
    18101825            2500
    18111826        )
    18121827
     1828    @skipUnlessDBFeature('can_distinct_on_fields')
     1829    def test_ticket6422(self):
     1830        """QuerySet.distinct('field', ...) works"""
     1831        # (qset, expected) tuples
     1832        qsets = (
     1833            (
     1834                Staff.objects.distinct().order_by('name'),
     1835                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1836            ),
     1837            (
     1838                Staff.objects.distinct('name').order_by('name'),
     1839                ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1840            ),
     1841            (
     1842                Staff.objects.distinct('organisation').order_by('organisation', 'name'),
     1843                ['<Staff: p1>', '<Staff: p1>'],
     1844            ),
     1845            (
     1846                Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'),
     1847                ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'],
     1848            ),
     1849            (
     1850                Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\
     1851                    distinct('name').order_by('name'),
     1852                ['<Celebrity: c1>', '<Celebrity: c2>'],
     1853            ),
     1854            (
     1855                StaffTag.objects.distinct('staff','tag'),
     1856                ['<StaffTag: t1 -> p1>'],
     1857            ),
     1858            (
     1859                Tag.objects.order_by('parent__pk', 'pk').distinct('parent'),
     1860                ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'],
     1861            ),
     1862            (
     1863                StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'),
     1864                ['<StaffTag: t1 -> p1>'],
     1865            ),
     1866        )
     1867
     1868        for qset, expected in qsets:
     1869            self.assertQuerysetEqual(qset, expected)
     1870            self.assertEqual(qset.count(), len(expected))
     1871
     1872        # and check the fieldlookup
     1873        self.assertRaises(
     1874            FieldError,
     1875            lambda: Staff.objects.distinct('shrubbery')
     1876        )
     1877
     1878
    18131879class UnionTests(unittest.TestCase):
    18141880    """
    18151881    Tests for the union of two querysets. Bug #12252.
    18161882    """
    18171883    def setUp(self):
    18181884        objectas = []
    18191885        objectbs = []
    18201886        objectcs = []
Back to Top