Ticket #6422: distinct_on.11.diff
File distinct_on.11.diff, 23.5 KB (added by , 13 years ago) |
---|
-
AUTHORS
diff --git a/AUTHORS b/AUTHORS index cec5db1..06ba219 100644
a b answer newbie questions, and generally made Django that much better: 203 203 Marc Garcia <marc.garcia@accopensys.com> 204 204 Andy Gayton <andy-django@thecablelounge.com> 205 205 geber@datacollect.com 206 Jeffrey Gelens <jeffrey@gelens.org> 206 207 Baishampayan Ghose 207 208 Joshua Ginsberg <jag@flowtheory.net> 208 209 Dimitris Glezos <dimitris@glezos.com> -
django/db/backends/__init__.py
diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py index f2bde84..126b5fc 100644
a b class BaseDatabaseFeatures(object): 379 379 supports_stddev = None 380 380 can_introspect_foreign_keys = None 381 381 382 # Support for the DISTINCT ON clause 383 can_distinct_on_fields = False 384 382 385 def __init__(self, connection): 383 386 self.connection = connection 384 387 … … class BaseDatabaseOperations(object): 532 535 """ 533 536 raise NotImplementedError('Full-text search is not implemented for this database backend') 534 537 538 def distinct(self, fields): 539 """ 540 Returns an SQL DISTINCT clause which removes duplicate rows from the 541 result set. If any fields are given, only the given fields are being 542 checked for duplicates. 543 """ 544 if fields: 545 raise NotImplementedError('DISTINCT ON fields is not supported by this database backend') 546 else: 547 return 'DISTINCT' 548 535 549 def last_executed_query(self, cursor, sql, params): 536 550 """ 537 551 Returns a string of the query last executed by the given cursor, with -
django/db/backends/postgresql_psycopg2/base.py
diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py index c816237..cff3fa5 100644
a b class DatabaseFeatures(BaseDatabaseFeatures): 82 82 has_select_for_update_nowait = True 83 83 has_bulk_insert = True 84 84 supports_tablespaces = True 85 can_distinct_on_fields = True 85 86 86 87 class DatabaseWrapper(BaseDatabaseWrapper): 87 88 vendor = 'postgresql' -
django/db/backends/postgresql_psycopg2/operations.py
diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py index acfeeaf..fe01566 100644
a b class DatabaseOperations(BaseDatabaseOperations): 179 179 180 180 return 63 181 181 182 def distinct(self, fields): 183 if fields: 184 return 'DISTINCT ON (%s)' % ', '.join(fields) 185 else: 186 return 'DISTINCT' 187 182 188 def last_executed_query(self, cursor, sql, params): 183 189 # http://initd.org/psycopg/docs/cursor.html#cursor.query 184 190 # The query attribute is a Psycopg extension to the DB API 2.0. -
django/db/models/query.py
diff --git a/django/db/models/query.py b/django/db/models/query.py index c752049..43b3954 100644
a b class QuerySet(object): 751 751 obj.query.add_ordering(*field_names) 752 752 return obj 753 753 754 def distinct(self, true_or_false=True):754 def distinct(self, *field_names): 755 755 """ 756 756 Returns a new QuerySet instance that will select only distinct results. 757 757 """ 758 assert self.query.can_filter(), \ 759 "Cannot create distinct fields once a slice has been taken." 758 760 obj = self._clone() 759 obj.query. distinct = true_or_false761 obj.query.add_distinct_fields(*field_names) 760 762 return obj 761 763 762 764 def extra(self, select=None, where=None, params=None, tables=None, … … class EmptyQuerySet(QuerySet): 1179 1181 """ 1180 1182 return self 1181 1183 1182 def distinct(self, true_or_false=True):1184 def distinct(self, fields=None): 1183 1185 """ 1184 1186 Always returns EmptyQuerySet. 1185 1187 """ -
django/db/models/sql/compiler.py
diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py index cebd77f..62a38ec 100644
a b class SQLCompiler(object): 23 23 Does any necessary class setup immediately prior to producing SQL. This 24 24 is for things that can't necessarily be done in __init__ because we 25 25 might not have all the pieces in place at that time. 26 # TODO: after the query has been executed, the altered state should be 27 # cleaned. We are not using a clone() of the query here. 26 28 """ 27 29 if not self.query.tables: 28 30 self.query.join((None, self.query.model._meta.db_table, None, None)) … … class SQLCompiler(object): 60 62 return '', () 61 63 62 64 self.pre_sql_setup() 65 # After executing the query, we must get rid of any joins the query 66 # setup created. So, take note of alias counts before the query ran. 67 # However we do not want to get rid of stuff done in pre_sql_setup(), 68 # as the pre_sql_setup will modify query state in a way that forbids 69 # another run of it. 70 self.refcounts_before = self.query.alias_refcount.copy() 63 71 out_cols = self.get_columns(with_col_aliases) 64 72 ordering, ordering_group_by = self.get_ordering() 65 73 66 # This must come after 'select' and 'ordering' -- see docstring of 67 # get_from_clause() for details. 74 distinct_fields = self.get_distinct() 75 76 # This must come after 'select', 'ordering' and 'distinct' -- see 77 # docstring of get_from_clause() for details. 68 78 from_, f_params = self.get_from_clause() 69 79 70 80 qn = self.quote_name_unless_alias … … class SQLCompiler(object): 76 86 params.extend(val[1]) 77 87 78 88 result = ['SELECT'] 89 79 90 if self.query.distinct: 80 result.append('DISTINCT') 91 result.append(self.connection.ops.distinct(distinct_fields)) 92 81 93 result.append(', '.join(out_cols + self.query.ordering_aliases)) 82 94 83 95 result.append('FROM') … … class SQLCompiler(object): 129 141 raise DatabaseError('NOWAIT is not supported on this database backend.') 130 142 result.append(self.connection.ops.for_update_sql(nowait=nowait)) 131 143 144 # Finally do cleanup - get rid of the joins we created above. 145 self.query.reset_refcounts(self.refcounts_before) 146 132 147 return ' '.join(result), tuple(params) 133 148 134 149 def as_nested_sql(self): … … class SQLCompiler(object): 292 307 col_aliases.add(field.column) 293 308 return result, aliases 294 309 310 def get_distinct(self): 311 """ 312 Returns a quoted list of fields to use in DISTINCT ON part of the query. 313 314 Note that this method can alter the tables in the query, and thus this 315 must be called before get_from_clause(). 316 """ 317 qn = self.quote_name_unless_alias 318 qn2 = self.connection.ops.quote_name 319 result = [] 320 options = self.query.model._meta 321 322 for name in self.query.distinct_fields: 323 # We do pretty much the same join creation & promotion & trimming as in 324 # get_ordering 325 field, target, opts, joins, last, extra = self.query.setup_joins( 326 name.split(LOOKUP_SEP), options, self.query.get_initial_alias(), False) 327 alias = joins[-1] 328 col = target.column 329 if not field.rel: 330 # To avoid inadvertent trimming of a necessary alias, use the 331 # refcount to show that we are referencing a non-relation field on 332 # the model. 333 self.query.ref_alias(alias) 334 # Must use left outer joins for nullable fields and their relations. 335 self.query.promote_alias_chain(joins, 336 self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER) 337 if alias: 338 # We have to do the same "final join" optimisation as in 339 # add_filter, since the final column might not otherwise be part of 340 # the select set (so we can't order on it). 341 while 1: 342 join = self.query.alias_map[alias] 343 if col != join[RHS_JOIN_COL]: 344 break 345 self.query.unref_alias(alias) 346 alias = join[LHS_ALIAS] 347 col = join[LHS_JOIN_COL] 348 result.append("%s.%s" % (qn(alias), qn2(col))) 349 return result 350 351 295 352 def get_ordering(self): 296 353 """ 297 354 Returns a tuple containing a list representing the SQL elements in the … … class SQLCompiler(object): 438 495 from-clause via a "select". 439 496 440 497 This should only be called after any SQL construction methods that 441 might change the tables we need. This means the select columns and442 ordering must be done first.498 might change the tables we need. This means the select columns, 499 ordering and distinct must be done first. 443 500 """ 444 501 result = [] 445 502 qn = self.quote_name_unless_alias … … class SQLAggregateCompiler(SQLCompiler): 984 1041 """ 985 1042 if qn is None: 986 1043 qn = self.quote_name_unless_alias 1044 987 1045 sql = ('SELECT %s FROM (%s) subquery' % ( 988 1046 ', '.join([ 989 1047 aggregate.as_sql(qn, self.connection) -
django/db/models/sql/query.py
diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py index 4afe288..362f6fd 100644
a b class Query(object): 127 127 self.order_by = [] 128 128 self.low_mark, self.high_mark = 0, None # Used for offset/limit 129 129 self.distinct = False 130 self.distinct_fields = [] 130 131 self.select_for_update = False 131 132 self.select_for_update_nowait = False 132 133 self.select_related = False … … class Query(object): 265 266 obj.order_by = self.order_by[:] 266 267 obj.low_mark, obj.high_mark = self.low_mark, self.high_mark 267 268 obj.distinct = self.distinct 269 obj.distinct_fields = self.distinct_fields[:] 268 270 obj.select_for_update = self.select_for_update 269 271 obj.select_for_update_nowait = self.select_for_update_nowait 270 272 obj.select_related = self.select_related … … class Query(object): 298 300 else: 299 301 obj.used_aliases = set() 300 302 obj.filter_is_sticky = False 303 301 304 obj.__dict__.update(kwargs) 302 305 if hasattr(obj, '_setup_query'): 303 306 obj._setup_query() … … class Query(object): 393 396 Performs a COUNT() query using the current filter constraints. 394 397 """ 395 398 obj = self.clone() 396 if len(self.select) > 1 or self.aggregate_select :399 if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields): 397 400 # If a select clause exists, then the query has already started to 398 401 # specify the columns that are to be returned. 399 402 # In this case, we need to use a subquery to evaluate the count. … … class Query(object): 452 455 "Cannot combine queries once a slice has been taken." 453 456 assert self.distinct == rhs.distinct, \ 454 457 "Cannot combine a unique query with a non-unique query." 458 assert self.distinct_fields == rhs.distinct_fields, \ 459 "Cannot combine queries with different distinct fields." 455 460 456 461 self.remove_inherited_models() 457 462 # Work out how to relabel the rhs aliases, if necessary. … … class Query(object): 674 679 """ Increases the reference count for this alias. """ 675 680 self.alias_refcount[alias] += 1 676 681 677 def unref_alias(self, alias ):682 def unref_alias(self, alias, amount=1): 678 683 """ Decreases the reference count for this alias. """ 679 self.alias_refcount[alias] -= 1684 self.alias_refcount[alias] -= amount 680 685 681 686 def promote_alias(self, alias, unconditional=False): 682 687 """ … … class Query(object): 705 710 if self.promote_alias(alias, must_promote): 706 711 must_promote = True 707 712 713 def reset_refcounts(self, to_counts): 714 """ 715 This method will reset reference counts for aliases so that they match 716 that given in to_counts. 717 """ 718 for alias, cur_refcount in self.alias_refcount.copy().items(): 719 unref_amount = cur_refcount - to_counts.get(alias, 0) 720 self.unref_alias(alias, unref_amount) 721 708 722 def promote_unused_aliases(self, initial_refcounts, used_aliases): 709 723 """ 710 724 Given a "before" copy of the alias_refcounts dictionary (as … … class Query(object): 832 846 def count_active_tables(self): 833 847 """ 834 848 Returns the number of tables in this query with a non-zero reference 835 count. 849 count. Note that after execution, the reference counts are zeroed, so 850 tables added in compiler will not be seen by this method. 836 851 """ 837 852 return len([1 for count in self.alias_refcount.itervalues() if count]) 838 853 … … class Query(object): 1596 1611 self.select = [] 1597 1612 self.select_fields = [] 1598 1613 1614 def add_distinct_fields(self, *field_names): 1615 """ 1616 Adds and resolves the given fields to the query's "distinct on" clause. 1617 """ 1618 self.distinct_fields = field_names 1619 self.distinct = True 1620 1621 1599 1622 def add_fields(self, field_names, allow_m2m=True): 1600 1623 """ 1601 1624 Adds the given (model) fields to the select set. The field names are -
docs/ref/models/querysets.txt
diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt index 6f2cad3..b7bc647 100644
a b remain undefined afterward). 345 345 distinct 346 346 ~~~~~~~~ 347 347 348 .. method:: distinct( )348 .. method:: distinct([*fields]) 349 349 350 350 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This 351 351 eliminates duplicate rows from the query results. … … query spans multiple tables, it's possible to get duplicate results when a 374 374 :meth:`values()` together, be careful when ordering by fields not in the 375 375 :meth:`values()` call. 376 376 377 .. versionadded:: 1.4 378 379 The possibility to pass positional arguments (``*fields``) is new in Django 1.4. 380 They are names of fields to which the ``DISTINCT`` should be limited. This 381 translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates 382 duplicate rows not by comparing all fields in a row, but by comparing only the given 383 fields. 384 385 .. note:: 386 Note that the ability to specify field names is only available in PostgreSQL. 387 388 .. note:: 389 When using the ``DISTINCT ON`` functionality it is required that the columns given 390 to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT 391 DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you 392 don't specify an order, then you'll get some arbitrary row. 393 394 Examples:: 395 396 >>> Author.objects.distinct() 397 [...] 398 399 >>> Entry.objects.order_by('pub_date').distinct('pub_date') 400 [...] 401 402 >>> Entry.objects.order_by('blog').distinct('blog') 403 [...] 404 405 >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date') 406 [...] 407 408 >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date') 409 [...] 410 411 >>> Entry.objects.order_by('author', 'pub_date').distinct('author') 412 [...] 413 377 414 values 378 415 ~~~~~~ 379 416 -
tests/regressiontests/queries/models.py
diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py index e69ce48..6ad9986 100644
a b class Celebrity(models.Model): 209 209 name = models.CharField("Name", max_length=20) 210 210 greatest_fan = models.ForeignKey("Fan", null=True, unique=True) 211 211 212 def __unicode__(self): 213 return self.name 214 212 215 class TvChef(Celebrity): 213 216 pass 214 217 … … class OneToOneCategory(models.Model): 344 347 def __unicode__(self): 345 348 return "one2one " + self.new_name 346 349 350 class Staff(models.Model): 351 id = models.IntegerField(primary_key=True) 352 name = models.CharField(max_length=50) 353 organisation = models.CharField(max_length=100) 354 tags = models.ManyToManyField(Tag, through='StaffTag') 355 coworkers = models.ManyToManyField('self') 356 357 def __unicode__(self): 358 return self.name 359 360 class StaffTag(models.Model): 361 staff = models.ForeignKey(Staff) 362 tag = models.ForeignKey(Tag) 363 364 def __unicode__(self): 365 return u"%s -> %s" % (self.tag, self.staff) -
tests/regressiontests/queries/tests.py
diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py index 6a54125..fca99be 100644
a b from .models import (Annotation, Article, Author, Celebrity, Child, Cover, 18 18 ManagedModel, Member, NamedCategory, Note, Number, Plaything, PointerA, 19 19 Ranking, Related, Report, ReservedName, Tag, TvChef, Valid, X, Food, Eaten, 20 20 Node, ObjectA, ObjectB, ObjectC, CategoryItem, SimpleCategory, 21 SpecialCategory, OneToOneCategory )21 SpecialCategory, OneToOneCategory, Staff, StaffTag) 22 22 23 23 24 24 class BaseQuerysetTest(TestCase): … … class Queries1Tests(BaseQuerysetTest): 234 234 ['<Item: four>', '<Item: one>'] 235 235 ) 236 236 237 # FIXME: This is difficult to fix and very much an edge case, so punt for238 # now. This is related to the order_by() tests for ticket #2253, but the239 # old bug exhibited itself here (q2 was pulling too many tables into the240 # combined query with the new ordering, but only because we have evaluated241 # q2 already).242 @unittest.expectedFailure243 237 def test_order_by_tables(self): 244 238 q1 = Item.objects.order_by('name') 245 239 q2 = Item.objects.filter(id=self.i1.id) 246 240 list(q2) 247 241 self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1) 248 242 243 def test_order_by_join_unref(self): 244 """ 245 This test is related to the above one, testing that there aren't 246 old JOINs in the query. 247 """ 248 qs = Celebrity.objects.order_by('greatest_fan__fan_of') 249 self.assertIn('OUTER JOIN', str(qs.query)) 250 qs = qs.order_by('id') 251 self.assertNotIn('OUTER JOIN', str(qs.query)) 252 249 253 def test_tickets_4088_4306(self): 250 254 self.assertQuerysetEqual( 251 255 Report.objects.filter(creator=1001), … … class ConditionalTests(BaseQuerysetTest): 1739 1743 t4 = Tag.objects.create(name='t4', parent=t3) 1740 1744 t5 = Tag.objects.create(name='t5', parent=t3) 1741 1745 1746 p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1") 1747 p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1") 1748 p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1") 1749 p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2") 1750 p1_o1.coworkers.add(p2_o1, p3_o1) 1751 StaffTag.objects.create(staff=p1_o1, tag=t1) 1752 StaffTag.objects.create(staff=p1_o1, tag=t1) 1753 1754 celeb1 = Celebrity.objects.create(name="c1") 1755 celeb2 = Celebrity.objects.create(name="c2") 1756 1757 self.fan1 = Fan.objects.create(fan_of=celeb1) 1758 self.fan2 = Fan.objects.create(fan_of=celeb1) 1759 self.fan3 = Fan.objects.create(fan_of=celeb2) 1760 1742 1761 # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed 1743 1762 # (Python issue 1242657), so these cases return an empty list, rather than 1744 1763 # raising an exception. Not a lot we can do about that, unfortunately, due to … … class ConditionalTests(BaseQuerysetTest): 1810 1829 2500 1811 1830 ) 1812 1831 1832 @skipUnlessDBFeature('can_distinct_on_fields') 1833 def test_ticket6422(self): 1834 """QuerySet.distinct('field', ...) works""" 1835 # (qset, expected) tuples 1836 qsets = ( 1837 ( 1838 Staff.objects.distinct().order_by('name'), 1839 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1840 ), 1841 ( 1842 Staff.objects.distinct('name').order_by('name'), 1843 ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1844 ), 1845 ( 1846 Staff.objects.distinct('organisation').order_by('organisation', 'name'), 1847 ['<Staff: p1>', '<Staff: p1>'], 1848 ), 1849 ( 1850 Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'), 1851 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 1852 ), 1853 ( 1854 Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\ 1855 distinct('name').order_by('name'), 1856 ['<Celebrity: c1>', '<Celebrity: c2>'], 1857 ), 1858 # Does combining querysets work? 1859 ( 1860 (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\ 1861 distinct('name').order_by('name') 1862 |Celebrity.objects.filter(fan__in=[self.fan3]).\ 1863 distinct('name').order_by('name')), 1864 ['<Celebrity: c1>', '<Celebrity: c2>'], 1865 ), 1866 ( 1867 StaffTag.objects.distinct('staff','tag'), 1868 ['<StaffTag: t1 -> p1>'], 1869 ), 1870 ( 1871 Tag.objects.order_by('parent__pk', 'pk').distinct('parent'), 1872 ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'], 1873 ), 1874 ( 1875 StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'), 1876 ['<StaffTag: t1 -> p1>'], 1877 ), 1878 # Fetch the alphabetically first coworker for each worker 1879 ( 1880 (Staff.objects.distinct('id').order_by('id', 'coworkers__name'). 1881 values_list('id', 'coworkers__name')), 1882 ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"] 1883 ), 1884 ) 1885 for qset, expected in qsets: 1886 self.assertQuerysetEqual(qset, expected) 1887 self.assertEqual(qset.count(), len(expected)) 1888 1889 # Combining queries with different distinct_fields is not allowed. 1890 base_qs = Celebrity.objects.all() 1891 self.assertRaisesMessage( 1892 AssertionError, 1893 "Cannot combine queries with different distinct fields.", 1894 lambda: (base_qs.distinct('id') & base_qs.distinct('name')) 1895 ) 1896 1897 # Test join unreffing 1898 c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of') 1899 self.assertIn('OUTER JOIN', str(c1.query)) 1900 c2 = c1.distinct('pk') 1901 self.assertNotIn('OUTER JOIN', str(c2.query)) 1902 1813 1903 class UnionTests(unittest.TestCase): 1814 1904 """ 1815 1905 Tests for the union of two querysets. Bug #12252. -
tests/regressiontests/select_related_regress/tests.py
diff --git a/tests/regressiontests/select_related_regress/tests.py b/tests/regressiontests/select_related_regress/tests.py index 4818b95..4cd4f78 100644
a b class SelectRelatedRegressTests(TestCase): 40 40 self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections], 41 41 [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')]) 42 42 43 # This final query should only joinseven tables (port, device and building44 # twice each, plus connection once). 45 self.assertEqual( connections.query.count_active_tables(), 7)43 # This final query should only have seven tables (port, device and building 44 # twice each, plus connection once). Thus, 6 joins plus the FROM table. 45 self.assertEqual(str(connections.query).count(" JOIN "), 6) 46 46 47 47 48 48 def test_regression_8106(self):