Ticket #6422: distinct_on.7.diff
File distinct_on.7.diff, 18.5 KB (added by , 13 years ago) |
---|
-
AUTHORS
diff --git a/AUTHORS b/AUTHORS
a b 544 544 ye7cakf02@sneakemail.com 545 545 ymasuda@ethercube.com 546 546 Jesse Young <adunar@gmail.com> 547 547 Mykola Zamkovoi <nickzam@gmail.com> 548 548 zegor 549 549 Gasper Zejn <zejn@kiberpipa.org> 550 550 Jarek Zgoda <jarek.zgoda@gmail.com> 551 551 Cheng Zhang 552 Jeffrey Gelens <jeffrey@gelens.org> 552 553 553 554 A big THANK YOU goes to: 554 555 555 556 Rob Curley and Ralph Gage for letting us open-source Django. 556 557 557 558 Frank Wiles for making excellent arguments for open-sourcing, and for 558 559 his sage sysadmin advice. 559 560 -
django/db/backends/__init__.py
diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
a b 368 368 369 369 # Features that need to be confirmed at runtime 370 370 # Cache whether the confirmation has been performed. 371 371 _confirmed = False 372 372 supports_transactions = None 373 373 supports_stddev = None 374 374 can_introspect_foreign_keys = None 375 375 376 # Support for the DISTINCT ON clause 377 can_distinct_on_fields = False 378 376 379 def __init__(self, connection): 377 380 self.connection = connection 378 381 379 382 def confirm(self): 380 383 "Perform manual checks of any database features that might vary between installs" 381 384 self._confirmed = True 382 385 self.supports_transactions = self._supports_transactions() 383 386 self.supports_stddev = self._supports_stddev() … … 521 524 def fulltext_search_sql(self, field_name): 522 525 """ 523 526 Returns the SQL WHERE clause to use in order to perform a full-text 524 527 search of the given field_name. Note that the resulting string should 525 528 contain a '%s' placeholder for the value being searched against. 526 529 """ 527 530 raise NotImplementedError('Full-text search is not implemented for this database backend') 528 531 532 def distinct(self, fields): 533 """ 534 Returns an SQL DISTINCT clause which removes duplicate rows from the 535 result set. If any fields are given, only the given fields are being 536 checked for duplicates. 537 """ 538 if fields: 539 raise NotImplementedError('DISTINCT ON fields is not supported by this database backend') 540 else: 541 return 'DISTINCT' 542 529 543 def last_executed_query(self, cursor, sql, params): 530 544 """ 531 545 Returns a string of the query last executed by the given cursor, with 532 546 placeholders replaced with actual values. 533 547 534 548 `sql` is the raw query containing placeholders, and `params` is the 535 549 sequence of parameters. These are used by default, but this method 536 550 exists for database backends to provide a better implementation -
django/db/backends/postgresql_psycopg2/base.py
diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
a b 70 70 needs_datetime_string_cast = False 71 71 can_return_id_from_insert = True 72 72 requires_rollback_on_dirty_transaction = True 73 73 has_real_datatype = True 74 74 can_defer_constraint_checks = True 75 75 has_select_for_update = True 76 76 has_select_for_update_nowait = True 77 77 has_bulk_insert = True 78 can_distinct_on_fields = True 78 79 79 80 80 81 class DatabaseWrapper(BaseDatabaseWrapper): 81 82 vendor = 'postgresql' 82 83 operators = { 83 84 'exact': '= %s', 84 85 'iexact': '= UPPER(%s)', 85 86 'contains': 'LIKE %s', -
django/db/backends/postgresql_psycopg2/operations.py
diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
a b 168 168 macro in src/include/pg_config_manual.h . 169 169 170 170 This implementation simply returns 63, but can easily be overridden by a 171 171 custom database backend that inherits most of its behavior from this one. 172 172 """ 173 173 174 174 return 63 175 175 176 def distinct(self, fields): 177 if fields: 178 fields_sql = [] 179 180 for field in fields: 181 fields_sql.append( 182 self.quote_name(field.model._meta.db_table) + "." + \ 183 self.quote_name(field.column) 184 ) 185 186 return 'DISTINCT ON (%s)' % ', '.join(fields_sql) 187 else: 188 return 'DISTINCT' 189 176 190 def last_executed_query(self, cursor, sql, params): 177 191 # http://initd.org/psycopg/docs/cursor.html#cursor.query 178 192 # The query attribute is a Psycopg extension to the DB API 2.0. 179 193 return cursor.query 180 194 181 195 def return_insert_id(self): 182 196 return "RETURNING %s", () 183 197 -
django/db/models/query.py
diff --git a/django/db/models/query.py b/django/db/models/query.py
a b 693 693 """ 694 694 assert self.query.can_filter(), \ 695 695 "Cannot reorder a query once a slice has been taken." 696 696 obj = self._clone() 697 697 obj.query.clear_ordering() 698 698 obj.query.add_ordering(*field_names) 699 699 return obj 700 700 701 def distinct(self, true_or_false=True):701 def distinct(self, *field_names): 702 702 """ 703 703 Returns a new QuerySet instance that will select only distinct results. 704 704 """ 705 705 obj = self._clone() 706 obj.query.distinct = true_or_false 706 obj.query.add_distinct_fields(field_names) 707 obj.query.distinct = True 708 707 709 return obj 708 710 709 711 def extra(self, select=None, where=None, params=None, tables=None, 710 712 order_by=None, select_params=None): 711 713 """ 712 714 Adds extra SQL fragments to the query. 713 715 """ 714 716 assert self.query.can_filter(), \ … … 1118 1120 return self 1119 1121 1120 1122 def order_by(self, *field_names): 1121 1123 """ 1122 1124 Always returns EmptyQuerySet. 1123 1125 """ 1124 1126 return self 1125 1127 1126 def distinct(self, true_or_false=True):1128 def distinct(self, fields=None): 1127 1129 """ 1128 1130 Always returns EmptyQuerySet. 1129 1131 """ 1130 1132 return self 1131 1133 1132 1134 def extra(self, select=None, where=None, params=None, tables=None, 1133 1135 order_by=None, select_params=None): 1134 1136 """ -
django/db/models/sql/compiler.py
diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
a b 71 71 72 72 where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection) 73 73 having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection) 74 74 params = [] 75 75 for val in self.query.extra_select.itervalues(): 76 76 params.extend(val[1]) 77 77 78 78 result = ['SELECT'] 79 79 80 if self.query.distinct: 80 result.append('DISTINCT') 81 result.append(self.connection.ops.distinct(self.query.distinct_fields)) 82 81 83 result.append(', '.join(out_cols + self.query.ordering_aliases)) 82 84 83 85 result.append('FROM') 84 86 result.extend(from_) 85 87 params.extend(f_params) 86 88 87 89 if where: 88 90 result.append('WHERE %s' % where) -
django/db/models/sql/query.py
diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
a b 121 121 self.tables = [] # Aliases in the order they are created. 122 122 self.where = where() 123 123 self.where_class = where 124 124 self.group_by = None 125 125 self.having = where() 126 126 self.order_by = [] 127 127 self.low_mark, self.high_mark = 0, None # Used for offset/limit 128 128 self.distinct = False 129 self.distinct_fields = None 129 130 self.select_for_update = False 130 131 self.select_for_update_nowait = False 131 132 self.select_related = False 132 133 self.related_select_cols = [] 133 134 134 135 # SQL aggregate-related attributes 135 136 self.aggregates = SortedDict() # Maps alias -> SQL aggregate function 136 137 self.aggregate_select_mask = None … … 259 260 if self.group_by is None: 260 261 obj.group_by = None 261 262 else: 262 263 obj.group_by = self.group_by[:] 263 264 obj.having = copy.deepcopy(self.having, memo=memo) 264 265 obj.order_by = self.order_by[:] 265 266 obj.low_mark, obj.high_mark = self.low_mark, self.high_mark 266 267 obj.distinct = self.distinct 268 obj.distinct_fields = self.distinct_fields 267 269 obj.select_for_update = self.select_for_update 268 270 obj.select_for_update_nowait = self.select_for_update_nowait 269 271 obj.select_related = self.select_related 270 272 obj.related_select_cols = [] 271 273 obj.aggregates = copy.deepcopy(self.aggregates, memo=memo) 272 274 if self.aggregate_select_mask is None: 273 275 obj.aggregate_select_mask = None 274 276 else: … … 387 389 in zip(query.aggregate_select.items(), result) 388 390 ]) 389 391 390 392 def get_count(self, using): 391 393 """ 392 394 Performs a COUNT() query using the current filter constraints. 393 395 """ 394 396 obj = self.clone() 395 if len(self.select) > 1 or self.aggregate_select :397 if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields): 396 398 # If a select clause exists, then the query has already started to 397 399 # specify the columns that are to be returned. 398 400 # In this case, we need to use a subquery to evaluate the count. 399 401 from django.db.models.sql.subqueries import AggregateQuery 400 402 subquery = obj 401 403 subquery.clear_ordering(True) 402 404 subquery.clear_limits() 403 405 … … 1590 1592 """ 1591 1593 Clears the list of fields to select (but not extra_select columns). 1592 1594 Some queryset types completely replace any existing list of select 1593 1595 columns. 1594 1596 """ 1595 1597 self.select = [] 1596 1598 self.select_fields = [] 1597 1599 1600 def add_distinct_fields(self, field_names): 1601 self.distinct_fields = [] 1602 options = self.get_meta() 1603 1604 for name in field_names: 1605 field, source, opts, join_list, last, _ = self.setup_joins( 1606 name.split(LOOKUP_SEP), options, self.get_initial_alias(), False) 1607 self.distinct_fields.append(field) 1608 1598 1609 def add_fields(self, field_names, allow_m2m=True): 1599 1610 """ 1600 1611 Adds the given (model) fields to the select set. The field names are 1601 1612 added in the order specified. 1602 1613 """ 1603 1614 alias = self.get_initial_alias() 1604 1615 opts = self.get_meta() 1605 1616 -
docs/ref/models/querysets.txt
diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
a b 340 340 a default ordering, or when using :meth:`order_by()`). If no such ordering is 341 341 defined for a given ``QuerySet``, calling ``reverse()`` on it has no real 342 342 effect (the ordering was undefined prior to calling ``reverse()``, and will 343 343 remain undefined afterward). 344 344 345 345 distinct 346 346 ~~~~~~~~ 347 347 348 .. method:: distinct( )348 .. method:: distinct(*fields) 349 349 350 350 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This 351 351 eliminates duplicate rows from the query results. 352 352 353 353 By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this 354 354 is rarely a problem, because simple queries such as ``Blog.objects.all()`` 355 355 don't introduce the possibility of duplicate result rows. However, if your 356 356 query spans multiple tables, it's possible to get duplicate results when a … … 369 369 selected, the columns used in any :meth:`order_by()` (or default model 370 370 ordering) will still be involved and may affect uniqueness of the results. 371 371 372 372 The moral here is that if you are using ``distinct()`` be careful about 373 373 ordering by related models. Similarly, when using ``distinct()`` and 374 374 :meth:`values()` together, be careful when ordering by fields not in the 375 375 :meth:`values()` call. 376 376 377 .. versionadded:: 1.4 378 ``distinct()`` takes optional positional arguments ``*fields``, which specify 379 field names to which the ``DISTINCT`` should be limited. This translates to 380 a ``SELECT DISTINCT ON`` SQL query. Note that this ``DISTINCT ON`` query is 381 only available in PostgreSQL. 382 383 .. note:: 384 When optional ``*fields`` are given, you will have to add an :meth:`order_by` 385 call with the same field names as the leftmost arguments. 386 377 387 values 378 388 ~~~~~~ 379 389 380 390 .. method:: values(*fields) 381 391 382 392 Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns 383 393 dictionaries when used as an iterable, rather than model-instance objects. 384 394 -
tests/regressiontests/queries/models.py
diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
a b 203 203 204 204 # An inter-related setup with a model subclass that has a nullable 205 205 # path to another model, and a return path from that model. 206 206 207 207 class Celebrity(models.Model): 208 208 name = models.CharField("Name", max_length=20) 209 209 greatest_fan = models.ForeignKey("Fan", null=True, unique=True) 210 210 211 def __unicode__(self): 212 return self.name 213 211 214 class TvChef(Celebrity): 212 215 pass 213 216 214 217 class Fan(models.Model): 215 218 fan_of = models.ForeignKey(Celebrity) 216 219 217 220 # Multiple foreign keys 218 221 class LeafA(models.Model): … … 337 340 return "category item: " + str(self.category) 338 341 339 342 class OneToOneCategory(models.Model): 340 343 new_name = models.CharField(max_length=15) 341 344 category = models.OneToOneField(SimpleCategory) 342 345 343 346 def __unicode__(self): 344 347 return "one2one " + self.new_name 345 348 349 class Staff(models.Model): 350 name = models.CharField(max_length=50) 351 organisation = models.CharField(max_length=100) 352 tags = models.ManyToManyField(Tag, through='StaffTag') 353 354 def __unicode__(self): 355 return self.name 356 357 class StaffTag(models.Model): 358 staff = models.ForeignKey(Staff) 359 tag = models.ForeignKey(Tag) 360 361 def __unicode__(self): 362 return u"%s -> %s" % (self.tag, self.staff) -
tests/regressiontests/queries/tests.py
diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
a b 10 10 from django.test import TestCase, skipUnlessDBFeature 11 11 from django.utils import unittest 12 12 from django.utils.datastructures import SortedDict 13 13 14 14 from models import (Annotation, Article, Author, Celebrity, Child, Cover, Detail, 15 15 DumbCategory, ExtraInfo, Fan, Item, LeafA, LoopX, LoopZ, ManagedModel, 16 16 Member, NamedCategory, Note, Number, Plaything, PointerA, Ranking, Related, 17 17 Report, ReservedName, Tag, TvChef, Valid, X, Food, Eaten, Node, ObjectA, ObjectB, 18 ObjectC, CategoryItem, SimpleCategory, SpecialCategory, OneToOneCategory) 18 ObjectC, CategoryItem, SimpleCategory, SpecialCategory, OneToOneCategory, 19 Staff, StaffTag) 19 20 20 21 21 22 class BaseQuerysetTest(TestCase): 22 23 def assertValueQuerysetEqual(self, qs, values): 23 24 return self.assertQuerysetEqual(qs, values, transform=lambda x: x) 24 25 25 26 26 27 class Queries1Tests(BaseQuerysetTest): … … 1731 1732 def setUp(self): 1732 1733 generic = NamedCategory.objects.create(name="Generic") 1733 1734 t1 = Tag.objects.create(name='t1', category=generic) 1734 1735 t2 = Tag.objects.create(name='t2', parent=t1, category=generic) 1735 1736 t3 = Tag.objects.create(name='t3', parent=t1) 1736 1737 t4 = Tag.objects.create(name='t4', parent=t3) 1737 1738 t5 = Tag.objects.create(name='t5', parent=t3) 1738 1739 1740 p1_o1 = Staff.objects.create(name="p1", organisation="o1") 1741 p2_o1 = Staff.objects.create(name="p2", organisation="o1") 1742 p3_o1 = Staff.objects.create(name="p3", organisation="o1") 1743 p1_o2 = Staff.objects.create(name="p1", organisation="o2") 1744 1745 StaffTag.objects.create(staff=p1_o1, tag=t1) 1746 StaffTag.objects.create(staff=p1_o1, tag=t1) 1747 1748 celeb1 = Celebrity.objects.create(name="c1") 1749 celeb2 = Celebrity.objects.create(name="c2") 1750 1751 self.fan1 = Fan.objects.create(fan_of=celeb1) 1752 self.fan2 = Fan.objects.create(fan_of=celeb1) 1753 self.fan3 = Fan.objects.create(fan_of=celeb2) 1754 1739 1755 # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed 1740 1756 # (Python issue 1242657), so these cases return an empty list, rather than 1741 1757 # raising an exception. Not a lot we can do about that, unfortunately, due to 1742 1758 # the way Python handles list() calls internally. Thus, we skip the tests for 1743 1759 # Python 2.6. 1744 1760 @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6") 1745 1761 def test_infinite_loop(self): 1746 1762 # If you're not careful, it's possible to introduce infinite loops via … … 1802 1818 Number.objects.filter(num__in=numbers[:2000]).count(), 1803 1819 2000 1804 1820 ) 1805 1821 self.assertEqual( 1806 1822 Number.objects.filter(num__in=numbers).count(), 1807 1823 2500 1808 1824 ) 1809 1825 1826 @skipUnlessDBFeature('can_distinct_on_fields') 1827 def test_ticket6422(self): 1828 # (qset, expected) tuples 1829 qsets = ( 1830 ( 1831 Staff.objects.distinct().order_by('name'), 1832 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1833 ), 1834 ( 1835 Staff.objects.distinct('name').order_by('name'), 1836 ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1837 ), 1838 ( 1839 Staff.objects.distinct('organisation').order_by('organisation', 'name'), 1840 ['<Staff: p1>', '<Staff: p1>'], 1841 ), 1842 ( 1843 Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'), 1844 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1845 ), 1846 ( 1847 Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\ 1848 distinct('name').order_by('name'), 1849 ['<Celebrity: c1>', '<Celebrity: c2>'], 1850 ), 1851 ( 1852 StaffTag.objects.distinct('staff','tag'), 1853 ['<StaffTag: t1 -> p1>'], 1854 ), 1855 ( 1856 Tag.objects.order_by('parent__pk').distinct('parent'), 1857 ['<Tag: t3>', '<Tag: t5>', '<Tag: t1>'], 1858 ), 1859 ( 1860 StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'), 1861 ['<StaffTag: t1 -> p1>'], 1862 ), 1863 ) 1864 1865 for qset, expected in qsets: 1866 self.assertQuerysetEqual(qset, expected) 1867 self.assertEqual(qset.count(), len(expected)) 1868 1869 # and check the fieldlookup 1870 self.assertRaises( 1871 FieldError, 1872 lambda: Staff.objects.distinct('shrubbery') 1873 ) 1874 1875 1810 1876 class UnionTests(unittest.TestCase): 1811 1877 """ 1812 1878 Tests for the union of two querysets. Bug #12252. 1813 1879 """ 1814 1880 def setUp(self): 1815 1881 objectas = [] 1816 1882 objectbs = [] 1817 1883 objectcs = []