Ticket #6422: distinct_on.9.diff
File distinct_on.9.diff, 19.5 KB (added by , 13 years ago) |
---|
-
AUTHORS
diff --git a/AUTHORS b/AUTHORS
a b 198 198 Vincent Foley <vfoleybourgon@yahoo.ca> 199 199 Alcides Fonseca 200 200 Rudolph Froger <rfroger@estrate.nl> 201 201 Jorge Gajon <gajon@gajon.org> 202 202 gandalf@owca.info 203 203 Marc Garcia <marc.garcia@accopensys.com> 204 204 Andy Gayton <andy-django@thecablelounge.com> 205 205 geber@datacollect.com 206 Jeffrey Gelens <jeffrey@gelens.org> 206 207 Baishampayan Ghose 207 208 Joshua Ginsberg <jag@flowtheory.net> 208 209 Dimitris Glezos <dimitris@glezos.com> 209 210 glin@seznam.cz 210 211 martin.glueck@gmail.com 211 212 Artyom Gnilov <boobsd@gmail.com> 212 213 Ben Godfrey <http://aftnn.org> 213 214 GomoX <gomo@datafull.com> -
django/db/backends/__init__.py
diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
a b 372 372 373 373 # Features that need to be confirmed at runtime 374 374 # Cache whether the confirmation has been performed. 375 375 _confirmed = False 376 376 supports_transactions = None 377 377 supports_stddev = None 378 378 can_introspect_foreign_keys = None 379 379 380 # Support for the DISTINCT ON clause 381 can_distinct_on_fields = False 382 380 383 def __init__(self, connection): 381 384 self.connection = connection 382 385 383 386 def confirm(self): 384 387 "Perform manual checks of any database features that might vary between installs" 385 388 self._confirmed = True 386 389 self.supports_transactions = self._supports_transactions() 387 390 self.supports_stddev = self._supports_stddev() … … 525 528 def fulltext_search_sql(self, field_name): 526 529 """ 527 530 Returns the SQL WHERE clause to use in order to perform a full-text 528 531 search of the given field_name. Note that the resulting string should 529 532 contain a '%s' placeholder for the value being searched against. 530 533 """ 531 534 raise NotImplementedError('Full-text search is not implemented for this database backend') 532 535 536 def distinct(self, fields): 537 """ 538 Returns an SQL DISTINCT clause which removes duplicate rows from the 539 result set. If any fields are given, only the given fields are being 540 checked for duplicates. 541 """ 542 if fields: 543 raise NotImplementedError('DISTINCT ON fields is not supported by this database backend') 544 else: 545 return 'DISTINCT' 546 533 547 def last_executed_query(self, cursor, sql, params): 534 548 """ 535 549 Returns a string of the query last executed by the given cursor, with 536 550 placeholders replaced with actual values. 537 551 538 552 `sql` is the raw query containing placeholders, and `params` is the 539 553 sequence of parameters. These are used by default, but this method 540 554 exists for database backends to provide a better implementation -
django/db/backends/postgresql_psycopg2/base.py
diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
a b 77 77 can_return_id_from_insert = True 78 78 requires_rollback_on_dirty_transaction = True 79 79 has_real_datatype = True 80 80 can_defer_constraint_checks = True 81 81 has_select_for_update = True 82 82 has_select_for_update_nowait = True 83 83 has_bulk_insert = True 84 84 supports_tablespaces = True 85 can_distinct_on_fields = True 85 86 86 87 class DatabaseWrapper(BaseDatabaseWrapper): 87 88 vendor = 'postgresql' 88 89 operators = { 89 90 'exact': '= %s', 90 91 'iexact': '= UPPER(%s)', 91 92 'contains': 'LIKE %s', 92 93 'icontains': 'LIKE UPPER(%s)', -
django/db/backends/postgresql_psycopg2/operations.py
diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
a b 174 174 macro in src/include/pg_config_manual.h . 175 175 176 176 This implementation simply returns 63, but can easily be overridden by a 177 177 custom database backend that inherits most of its behavior from this one. 178 178 """ 179 179 180 180 return 63 181 181 182 def distinct(self, fields): 183 if fields: 184 fields_sql = [] 185 186 for field in fields: 187 fields_sql.append( 188 self.quote_name(field.model._meta.db_table) + "." + \ 189 self.quote_name(field.column) 190 ) 191 192 return 'DISTINCT ON (%s)' % ', '.join(fields_sql) 193 else: 194 return 'DISTINCT' 195 182 196 def last_executed_query(self, cursor, sql, params): 183 197 # http://initd.org/psycopg/docs/cursor.html#cursor.query 184 198 # The query attribute is a Psycopg extension to the DB API 2.0. 185 199 return cursor.query 186 200 187 201 def return_insert_id(self): 188 202 return "RETURNING %s", () 189 203 -
django/db/models/query.py
diff --git a/django/db/models/query.py b/django/db/models/query.py
a b 733 733 """ 734 734 assert self.query.can_filter(), \ 735 735 "Cannot reorder a query once a slice has been taken." 736 736 obj = self._clone() 737 737 obj.query.clear_ordering() 738 738 obj.query.add_ordering(*field_names) 739 739 return obj 740 740 741 def distinct(self, true_or_false=True):741 def distinct(self, *field_names): 742 742 """ 743 743 Returns a new QuerySet instance that will select only distinct results. 744 744 """ 745 745 obj = self._clone() 746 obj.query.distinct = true_or_false 746 obj.query.add_distinct_fields(field_names) 747 obj.query.distinct = True 748 747 749 return obj 748 750 749 751 def extra(self, select=None, where=None, params=None, tables=None, 750 752 order_by=None, select_params=None): 751 753 """ 752 754 Adds extra SQL fragments to the query. 753 755 """ 754 756 assert self.query.can_filter(), \ … … 1161 1163 return self 1162 1164 1163 1165 def order_by(self, *field_names): 1164 1166 """ 1165 1167 Always returns EmptyQuerySet. 1166 1168 """ 1167 1169 return self 1168 1170 1169 def distinct(self, true_or_false=True):1171 def distinct(self, fields=None): 1170 1172 """ 1171 1173 Always returns EmptyQuerySet. 1172 1174 """ 1173 1175 return self 1174 1176 1175 1177 def extra(self, select=None, where=None, params=None, tables=None, 1176 1178 order_by=None, select_params=None): 1177 1179 """ -
django/db/models/sql/compiler.py
diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
a b 71 71 72 72 where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection) 73 73 having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection) 74 74 params = [] 75 75 for val in self.query.extra_select.itervalues(): 76 76 params.extend(val[1]) 77 77 78 78 result = ['SELECT'] 79 79 80 if self.query.distinct: 80 result.append('DISTINCT') 81 result.append(self.connection.ops.distinct(self.query.distinct_fields)) 82 81 83 result.append(', '.join(out_cols + self.query.ordering_aliases)) 82 84 83 85 result.append('FROM') 84 86 result.extend(from_) 85 87 params.extend(f_params) 86 88 87 89 if where: 88 90 result.append('WHERE %s' % where) -
django/db/models/sql/query.py
diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
a b 122 122 self.tables = [] # Aliases in the order they are created. 123 123 self.where = where() 124 124 self.where_class = where 125 125 self.group_by = None 126 126 self.having = where() 127 127 self.order_by = [] 128 128 self.low_mark, self.high_mark = 0, None # Used for offset/limit 129 129 self.distinct = False 130 self.distinct_fields = None 130 131 self.select_for_update = False 131 132 self.select_for_update_nowait = False 132 133 self.select_related = False 133 134 self.related_select_cols = [] 134 135 135 136 # SQL aggregate-related attributes 136 137 self.aggregates = SortedDict() # Maps alias -> SQL aggregate function 137 138 self.aggregate_select_mask = None … … 260 261 if self.group_by is None: 261 262 obj.group_by = None 262 263 else: 263 264 obj.group_by = self.group_by[:] 264 265 obj.having = copy.deepcopy(self.having, memo=memo) 265 266 obj.order_by = self.order_by[:] 266 267 obj.low_mark, obj.high_mark = self.low_mark, self.high_mark 267 268 obj.distinct = self.distinct 269 obj.distinct_fields = self.distinct_fields 268 270 obj.select_for_update = self.select_for_update 269 271 obj.select_for_update_nowait = self.select_for_update_nowait 270 272 obj.select_related = self.select_related 271 273 obj.related_select_cols = [] 272 274 obj.aggregates = copy.deepcopy(self.aggregates, memo=memo) 273 275 if self.aggregate_select_mask is None: 274 276 obj.aggregate_select_mask = None 275 277 else: … … 388 390 in zip(query.aggregate_select.items(), result) 389 391 ]) 390 392 391 393 def get_count(self, using): 392 394 """ 393 395 Performs a COUNT() query using the current filter constraints. 394 396 """ 395 397 obj = self.clone() 396 if len(self.select) > 1 or self.aggregate_select :398 if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields): 397 399 # If a select clause exists, then the query has already started to 398 400 # specify the columns that are to be returned. 399 401 # In this case, we need to use a subquery to evaluate the count. 400 402 from django.db.models.sql.subqueries import AggregateQuery 401 403 subquery = obj 402 404 subquery.clear_ordering(True) 403 405 subquery.clear_limits() 404 406 … … 1591 1593 """ 1592 1594 Clears the list of fields to select (but not extra_select columns). 1593 1595 Some queryset types completely replace any existing list of select 1594 1596 columns. 1595 1597 """ 1596 1598 self.select = [] 1597 1599 self.select_fields = [] 1598 1600 1601 def add_distinct_fields(self, field_names): 1602 """ 1603 Adds and resolves the given fields to the query's "distinct on" clause. 1604 """ 1605 self.distinct_fields = [] 1606 options = self.get_meta() 1607 1608 for name in field_names: 1609 field, source, opts, join_list, last, _ = self.setup_joins( 1610 name.split(LOOKUP_SEP), options, self.get_initial_alias(), False) 1611 self.distinct_fields.append(field) 1612 1599 1613 def add_fields(self, field_names, allow_m2m=True): 1600 1614 """ 1601 1615 Adds the given (model) fields to the select set. The field names are 1602 1616 added in the order specified. 1603 1617 """ 1604 1618 alias = self.get_initial_alias() 1605 1619 opts = self.get_meta() 1606 1620 -
docs/ref/models/querysets.txt
diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
a b 340 340 a default ordering, or when using :meth:`order_by()`). If no such ordering is 341 341 defined for a given ``QuerySet``, calling ``reverse()`` on it has no real 342 342 effect (the ordering was undefined prior to calling ``reverse()``, and will 343 343 remain undefined afterward). 344 344 345 345 distinct 346 346 ~~~~~~~~ 347 347 348 .. method:: distinct( )348 .. method:: distinct([*fields]) 349 349 350 350 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This 351 351 eliminates duplicate rows from the query results. 352 352 353 353 By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this 354 354 is rarely a problem, because simple queries such as ``Blog.objects.all()`` 355 355 don't introduce the possibility of duplicate result rows. However, if your 356 356 query spans multiple tables, it's possible to get duplicate results when a … … 369 369 selected, the columns used in any :meth:`order_by()` (or default model 370 370 ordering) will still be involved and may affect uniqueness of the results. 371 371 372 372 The moral here is that if you are using ``distinct()`` be careful about 373 373 ordering by related models. Similarly, when using ``distinct()`` and 374 374 :meth:`values()` together, be careful when ordering by fields not in the 375 375 :meth:`values()` call. 376 376 377 .. versionadded:: 1.4 378 379 The possibility to pass positional arguments (``*fields``) is new in Django 1.4. 380 They are names of fields to which the ``DISTINCT`` should be limited. This 381 translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates 382 duplicate rows not by comparing all fields in a row, but by comparing only the given 383 fields. 384 385 .. note:: 386 Note that the ability to specify field names is only available in PostgreSQL. 387 388 .. note:: 389 When using the ``DISTINCT ON`` functionality it is required that the columns given 390 to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT 391 DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you 392 don't specify an order, then you'll get some arbitrary row. 393 394 Examples:: 395 396 >>> Author.objects.distinct() 397 [...] 398 399 >>> Entry.objects.order_by('pub_date').distinct('pub_date') 400 [...] 401 402 >>> Entry.objects.order_by('blog').distinct('blog') 403 [...] 404 405 >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date') 406 [...] 407 408 >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date') 409 [...] 410 411 >>> Entry.objects.order_by('author', 'pub_date').distinct('author') 412 [...] 413 377 414 values 378 415 ~~~~~~ 379 416 380 417 .. method:: values(*fields) 381 418 382 419 Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns 383 420 dictionaries when used as an iterable, rather than model-instance objects. 384 421 -
tests/regressiontests/queries/models.py
diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
a b 204 204 205 205 # An inter-related setup with a model subclass that has a nullable 206 206 # path to another model, and a return path from that model. 207 207 208 208 class Celebrity(models.Model): 209 209 name = models.CharField("Name", max_length=20) 210 210 greatest_fan = models.ForeignKey("Fan", null=True, unique=True) 211 211 212 def __unicode__(self): 213 return self.name 214 212 215 class TvChef(Celebrity): 213 216 pass 214 217 215 218 class Fan(models.Model): 216 219 fan_of = models.ForeignKey(Celebrity) 217 220 218 221 # Multiple foreign keys 219 222 class LeafA(models.Model): … … 339 342 340 343 class OneToOneCategory(models.Model): 341 344 new_name = models.CharField(max_length=15) 342 345 category = models.OneToOneField(SimpleCategory) 343 346 344 347 def __unicode__(self): 345 348 return "one2one " + self.new_name 346 349 350 class Staff(models.Model): 351 name = models.CharField(max_length=50) 352 organisation = models.CharField(max_length=100) 353 tags = models.ManyToManyField(Tag, through='StaffTag') 354 355 def __unicode__(self): 356 return self.name 357 358 class StaffTag(models.Model): 359 staff = models.ForeignKey(Staff) 360 tag = models.ForeignKey(Tag) 361 362 def __unicode__(self): 363 return u"%s -> %s" % (self.tag, self.staff) -
tests/regressiontests/queries/tests.py
diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
a b 13 13 from django.utils import unittest 14 14 from django.utils.datastructures import SortedDict 15 15 16 16 from .models import (Annotation, Article, Author, Celebrity, Child, Cover, 17 17 Detail, DumbCategory, ExtraInfo, Fan, Item, LeafA, LoopX, LoopZ, 18 18 ManagedModel, Member, NamedCategory, Note, Number, Plaything, PointerA, 19 19 Ranking, Related, Report, ReservedName, Tag, TvChef, Valid, X, Food, Eaten, 20 20 Node, ObjectA, ObjectB, ObjectC, CategoryItem, SimpleCategory, 21 SpecialCategory, OneToOneCategory )21 SpecialCategory, OneToOneCategory, Staff, StaffTag) 22 22 23 23 24 24 class BaseQuerysetTest(TestCase): 25 25 def assertValueQuerysetEqual(self, qs, values): 26 26 return self.assertQuerysetEqual(qs, values, transform=lambda x: x) 27 27 28 28 29 29 class Queries1Tests(BaseQuerysetTest): … … 1734 1734 def setUp(self): 1735 1735 generic = NamedCategory.objects.create(name="Generic") 1736 1736 t1 = Tag.objects.create(name='t1', category=generic) 1737 1737 t2 = Tag.objects.create(name='t2', parent=t1, category=generic) 1738 1738 t3 = Tag.objects.create(name='t3', parent=t1) 1739 1739 t4 = Tag.objects.create(name='t4', parent=t3) 1740 1740 t5 = Tag.objects.create(name='t5', parent=t3) 1741 1741 1742 p1_o1 = Staff.objects.create(name="p1", organisation="o1") 1743 p2_o1 = Staff.objects.create(name="p2", organisation="o1") 1744 p3_o1 = Staff.objects.create(name="p3", organisation="o1") 1745 p1_o2 = Staff.objects.create(name="p1", organisation="o2") 1746 1747 StaffTag.objects.create(staff=p1_o1, tag=t1) 1748 StaffTag.objects.create(staff=p1_o1, tag=t1) 1749 1750 celeb1 = Celebrity.objects.create(name="c1") 1751 celeb2 = Celebrity.objects.create(name="c2") 1752 1753 self.fan1 = Fan.objects.create(fan_of=celeb1) 1754 self.fan2 = Fan.objects.create(fan_of=celeb1) 1755 self.fan3 = Fan.objects.create(fan_of=celeb2) 1756 1742 1757 # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed 1743 1758 # (Python issue 1242657), so these cases return an empty list, rather than 1744 1759 # raising an exception. Not a lot we can do about that, unfortunately, due to 1745 1760 # the way Python handles list() calls internally. Thus, we skip the tests for 1746 1761 # Python 2.6. 1747 1762 @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6") 1748 1763 def test_infinite_loop(self): 1749 1764 # If you're not careful, it's possible to introduce infinite loops via … … 1805 1820 Number.objects.filter(num__in=numbers[:2000]).count(), 1806 1821 2000 1807 1822 ) 1808 1823 self.assertEqual( 1809 1824 Number.objects.filter(num__in=numbers).count(), 1810 1825 2500 1811 1826 ) 1812 1827 1828 @skipUnlessDBFeature('can_distinct_on_fields') 1829 def test_ticket6422(self): 1830 """QuerySet.distinct('field', ...) works""" 1831 # (qset, expected) tuples 1832 qsets = ( 1833 ( 1834 Staff.objects.distinct().order_by('name'), 1835 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1836 ), 1837 ( 1838 Staff.objects.distinct('name').order_by('name'), 1839 ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1840 ), 1841 ( 1842 Staff.objects.distinct('organisation').order_by('organisation', 'name'), 1843 ['<Staff: p1>', '<Staff: p1>'], 1844 ), 1845 ( 1846 Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'), 1847 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1848 ), 1849 ( 1850 Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\ 1851 distinct('name').order_by('name'), 1852 ['<Celebrity: c1>', '<Celebrity: c2>'], 1853 ), 1854 ( 1855 StaffTag.objects.distinct('staff','tag'), 1856 ['<StaffTag: t1 -> p1>'], 1857 ), 1858 ( 1859 Tag.objects.order_by('parent__pk', 'pk').distinct('parent'), 1860 ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'], 1861 ), 1862 ( 1863 StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'), 1864 ['<StaffTag: t1 -> p1>'], 1865 ), 1866 ) 1867 1868 for qset, expected in qsets: 1869 self.assertQuerysetEqual(qset, expected) 1870 self.assertEqual(qset.count(), len(expected)) 1871 1872 # and check the fieldlookup 1873 self.assertRaises( 1874 FieldError, 1875 lambda: Staff.objects.distinct('shrubbery') 1876 ) 1877 1878 1813 1879 class UnionTests(unittest.TestCase): 1814 1880 """ 1815 1881 Tests for the union of two querysets. Bug #12252. 1816 1882 """ 1817 1883 def setUp(self): 1818 1884 objectas = [] 1819 1885 objectbs = [] 1820 1886 objectcs = []