Ticket #6422: distinct_on.13.diff
File distinct_on.13.diff, 42.5 KB (added by , 13 years ago) |
---|
-
AUTHORS
diff --git a/AUTHORS b/AUTHORS
a b 198 198 Vincent Foley <vfoleybourgon@yahoo.ca> 199 199 Alcides Fonseca 200 200 Rudolph Froger <rfroger@estrate.nl> 201 201 Jorge Gajon <gajon@gajon.org> 202 202 gandalf@owca.info 203 203 Marc Garcia <marc.garcia@accopensys.com> 204 204 Andy Gayton <andy-django@thecablelounge.com> 205 205 geber@datacollect.com 206 Jeffrey Gelens <jeffrey@gelens.org> 206 207 Baishampayan Ghose 207 208 Joshua Ginsberg <jag@flowtheory.net> 208 209 Dimitris Glezos <dimitris@glezos.com> 209 210 glin@seznam.cz 210 211 martin.glueck@gmail.com 211 212 Artyom Gnilov <boobsd@gmail.com> 212 213 Ben Godfrey <http://aftnn.org> 213 214 GomoX <gomo@datafull.com> -
django/db/backends/__init__.py
diff --git a/django/db/backends/__init__.py b/django/db/backends/__init__.py
a b 374 374 375 375 # Features that need to be confirmed at runtime 376 376 # Cache whether the confirmation has been performed. 377 377 _confirmed = False 378 378 supports_transactions = None 379 379 supports_stddev = None 380 380 can_introspect_foreign_keys = None 381 381 382 # Support for the DISTINCT ON clause 383 can_distinct_on_fields = False 384 382 385 def __init__(self, connection): 383 386 self.connection = connection 384 387 385 388 def confirm(self): 386 389 "Perform manual checks of any database features that might vary between installs" 387 390 self._confirmed = True 388 391 self.supports_transactions = self._supports_transactions() 389 392 self.supports_stddev = self._supports_stddev() … … 527 530 def fulltext_search_sql(self, field_name): 528 531 """ 529 532 Returns the SQL WHERE clause to use in order to perform a full-text 530 533 search of the given field_name. Note that the resulting string should 531 534 contain a '%s' placeholder for the value being searched against. 532 535 """ 533 536 raise NotImplementedError('Full-text search is not implemented for this database backend') 534 537 538 def distinct(self, fields): 539 """ 540 Returns an SQL DISTINCT clause which removes duplicate rows from the 541 result set. If any fields are given, only the given fields are being 542 checked for duplicates. 543 """ 544 if fields: 545 raise NotImplementedError('DISTINCT ON fields is not supported by this database backend') 546 else: 547 return 'DISTINCT' 548 535 549 def last_executed_query(self, cursor, sql, params): 536 550 """ 537 551 Returns a string of the query last executed by the given cursor, with 538 552 placeholders replaced with actual values. 539 553 540 554 `sql` is the raw query containing placeholders, and `params` is the 541 555 sequence of parameters. These are used by default, but this method 542 556 exists for database backends to provide a better implementation -
django/db/backends/postgresql_psycopg2/base.py
diff --git a/django/db/backends/postgresql_psycopg2/base.py b/django/db/backends/postgresql_psycopg2/base.py
a b 77 77 can_return_id_from_insert = True 78 78 requires_rollback_on_dirty_transaction = True 79 79 has_real_datatype = True 80 80 can_defer_constraint_checks = True 81 81 has_select_for_update = True 82 82 has_select_for_update_nowait = True 83 83 has_bulk_insert = True 84 84 supports_tablespaces = True 85 can_distinct_on_fields = True 85 86 86 87 class DatabaseWrapper(BaseDatabaseWrapper): 87 88 vendor = 'postgresql' 88 89 operators = { 89 90 'exact': '= %s', 90 91 'iexact': '= UPPER(%s)', 91 92 'contains': 'LIKE %s', 92 93 'icontains': 'LIKE UPPER(%s)', -
django/db/backends/postgresql_psycopg2/operations.py
diff --git a/django/db/backends/postgresql_psycopg2/operations.py b/django/db/backends/postgresql_psycopg2/operations.py
a b 174 174 macro in src/include/pg_config_manual.h . 175 175 176 176 This implementation simply returns 63, but can easily be overridden by a 177 177 custom database backend that inherits most of its behavior from this one. 178 178 """ 179 179 180 180 return 63 181 181 182 def distinct(self, fields): 183 if fields: 184 return 'DISTINCT ON (%s)' % ', '.join(fields) 185 else: 186 return 'DISTINCT' 187 182 188 def last_executed_query(self, cursor, sql, params): 183 189 # http://initd.org/psycopg/docs/cursor.html#cursor.query 184 190 # The query attribute is a Psycopg extension to the DB API 2.0. 185 191 return cursor.query 186 192 187 193 def return_insert_id(self): 188 194 return "RETURNING %s", () 189 195 -
django/db/models/query.py
diff --git a/django/db/models/query.py b/django/db/models/query.py
a b 318 318 def aggregate(self, *args, **kwargs): 319 319 """ 320 320 Returns a dictionary containing the calculations (aggregation) 321 321 over the current queryset 322 322 323 323 If args is present the expression is passed as a kwarg using 324 324 the Aggregate object's default alias. 325 325 """ 326 if self.query.distinct_fields: 327 raise NotImplementedError("aggregate() + distinct(fields) not implemented.") 326 328 for arg in args: 327 329 kwargs[arg.default_alias] = arg 328 330 329 331 query = self.query.clone() 330 332 331 333 for (alias, aggregate_expr) in kwargs.items(): 332 334 query.add_aggregate(aggregate_expr, self.model, alias, 333 335 is_summary=True) … … 746 748 """ 747 749 assert self.query.can_filter(), \ 748 750 "Cannot reorder a query once a slice has been taken." 749 751 obj = self._clone() 750 752 obj.query.clear_ordering() 751 753 obj.query.add_ordering(*field_names) 752 754 return obj 753 755 754 def distinct(self, true_or_false=True):756 def distinct(self, *field_names): 755 757 """ 756 758 Returns a new QuerySet instance that will select only distinct results. 757 759 """ 760 assert self.query.can_filter(), \ 761 "Cannot create distinct fields once a slice has been taken." 758 762 obj = self._clone() 759 obj.query. distinct = true_or_false763 obj.query.add_distinct_fields(*field_names) 760 764 return obj 761 765 762 766 def extra(self, select=None, where=None, params=None, tables=None, 763 767 order_by=None, select_params=None): 764 768 """ 765 769 Adds extra SQL fragments to the query. 766 770 """ 767 771 assert self.query.can_filter(), \ … … 1174 1178 return self 1175 1179 1176 1180 def order_by(self, *field_names): 1177 1181 """ 1178 1182 Always returns EmptyQuerySet. 1179 1183 """ 1180 1184 return self 1181 1185 1182 def distinct(self, true_or_false=True):1186 def distinct(self, fields=None): 1183 1187 """ 1184 1188 Always returns EmptyQuerySet. 1185 1189 """ 1186 1190 return self 1187 1191 1188 1192 def extra(self, select=None, where=None, params=None, tables=None, 1189 1193 order_by=None, select_params=None): 1190 1194 """ -
django/db/models/sql/compiler.py
diff --git a/django/db/models/sql/compiler.py b/django/db/models/sql/compiler.py
a b 18 18 self.using = using 19 19 self.quote_cache = {} 20 20 21 21 def pre_sql_setup(self): 22 22 """ 23 23 Does any necessary class setup immediately prior to producing SQL. This 24 24 is for things that can't necessarily be done in __init__ because we 25 25 might not have all the pieces in place at that time. 26 # TODO: after the query has been executed, the altered state should be 27 # cleaned. We are not using a clone() of the query here. 26 28 """ 27 29 if not self.query.tables: 28 30 self.query.join((None, self.query.model._meta.db_table, None, None)) 29 31 if (not self.query.select and self.query.default_cols and not 30 32 self.query.included_inherited_models): 31 33 self.query.setup_inherited_models() 32 34 if self.query.select_related and not self.query.related_select_cols: 33 35 self.fill_related_selections() … … 55 57 56 58 If 'with_limits' is False, any limit/offset information is not included 57 59 in the query. 58 60 """ 59 61 if with_limits and self.query.low_mark == self.query.high_mark: 60 62 return '', () 61 63 62 64 self.pre_sql_setup() 65 # After executing the query, we must get rid of any joins the query 66 # setup created. So, take note of alias counts before the query ran. 67 # However we do not want to get rid of stuff done in pre_sql_setup(), 68 # as the pre_sql_setup will modify query state in a way that forbids 69 # another run of it. 70 self.refcounts_before = self.query.alias_refcount.copy() 63 71 out_cols = self.get_columns(with_col_aliases) 64 72 ordering, ordering_group_by = self.get_ordering() 65 73 66 # This must come after 'select' and 'ordering' -- see docstring of 67 # get_from_clause() for details. 74 distinct_fields = self.get_distinct() 75 76 # This must come after 'select', 'ordering' and 'distinct' -- see 77 # docstring of get_from_clause() for details. 68 78 from_, f_params = self.get_from_clause() 69 79 70 80 qn = self.quote_name_unless_alias 71 81 72 82 where, w_params = self.query.where.as_sql(qn=qn, connection=self.connection) 73 83 having, h_params = self.query.having.as_sql(qn=qn, connection=self.connection) 74 84 params = [] 75 85 for val in self.query.extra_select.itervalues(): 76 86 params.extend(val[1]) 77 87 78 88 result = ['SELECT'] 89 79 90 if self.query.distinct: 80 result.append('DISTINCT') 91 result.append(self.connection.ops.distinct(distinct_fields)) 92 81 93 result.append(', '.join(out_cols + self.query.ordering_aliases)) 82 94 83 95 result.append('FROM') 84 96 result.extend(from_) 85 97 params.extend(f_params) 86 98 87 99 if where: 88 100 result.append('WHERE %s' % where) 89 101 params.extend(w_params) 90 102 91 103 grouping, gb_params = self.get_grouping() 92 104 if grouping: 105 if distinct_fields: 106 raise NotImplementedError( 107 "annotate() + distinct(fields) not implemented.") 93 108 if ordering: 94 109 # If the backend can't group by PK (i.e., any database 95 110 # other than MySQL), then any fields mentioned in the 96 111 # ordering clause needs to be in the group by clause. 97 112 if not self.connection.features.allows_group_by_pk: 98 113 for col, col_params in ordering_group_by: 99 114 if col not in grouping: 100 115 grouping.append(str(col)) … … 124 139 if self.query.select_for_update and self.connection.features.has_select_for_update: 125 140 # If we've been asked for a NOWAIT query but the backend does not support it, 126 141 # raise a DatabaseError otherwise we could get an unexpected deadlock. 127 142 nowait = self.query.select_for_update_nowait 128 143 if nowait and not self.connection.features.has_select_for_update_nowait: 129 144 raise DatabaseError('NOWAIT is not supported on this database backend.') 130 145 result.append(self.connection.ops.for_update_sql(nowait=nowait)) 131 146 147 # Finally do cleanup - get rid of the joins we created above. 148 self.query.reset_refcounts(self.refcounts_before) 149 132 150 return ' '.join(result), tuple(params) 133 151 134 152 def as_nested_sql(self): 135 153 """ 136 154 Perform the same functionality as the as_sql() method, returning an 137 155 SQL string and parameters. However, the alias prefixes are bumped 138 156 beforehand (in a copy -- the current query isn't changed), and any 139 157 ordering is removed if the query is unsliced. … … 287 305 else: 288 306 r = '%s.%s' % (qn(alias), qn2(field.column)) 289 307 result.append(r) 290 308 aliases.add(r) 291 309 if with_aliases: 292 310 col_aliases.add(field.column) 293 311 return result, aliases 294 312 313 def get_distinct(self): 314 """ 315 Returns a quoted list of fields to use in DISTINCT ON part of the query. 316 317 Note that this method can alter the tables in the query, and thus this 318 must be called before get_from_clause(). 319 """ 320 qn = self.quote_name_unless_alias 321 qn2 = self.connection.ops.quote_name 322 result = [] 323 opts = self.query.model._meta 324 325 for name in self.query.distinct_fields: 326 parts = name.split(LOOKUP_SEP) 327 field, col, alias, _, _ = self._setup_joins(parts, opts, None) 328 col, alias = self._final_join_removal(col, alias) 329 result.append("%s.%s" % (qn(alias), qn2(col))) 330 return result 331 332 295 333 def get_ordering(self): 296 334 """ 297 335 Returns a tuple containing a list representing the SQL elements in the 298 336 "order by" clause, and the list of SQL elements that need to be added 299 337 to the GROUP BY clause as a result of the ordering. 300 338 301 339 Also sets the ordering_aliases attribute on this instance to a list of 302 340 extra aliases needed in the select. … … 379 417 already_seen=None): 380 418 """ 381 419 Returns the table alias (the name might be ambiguous, the alias will 382 420 not be) and column name for ordering by the given 'name' parameter. 383 421 The 'name' is of the form 'field1__field2__...__fieldN'. 384 422 """ 385 423 name, order = get_order_dir(name, default_order) 386 424 pieces = name.split(LOOKUP_SEP) 387 if not alias: 388 alias = self.query.get_initial_alias() 389 field, target, opts, joins, last, extra = self.query.setup_joins(pieces, 390 opts, alias, False) 391 alias = joins[-1] 392 col = target.column 393 if not field.rel: 394 # To avoid inadvertent trimming of a necessary alias, use the 395 # refcount to show that we are referencing a non-relation field on 396 # the model. 397 self.query.ref_alias(alias) 398 399 # Must use left outer joins for nullable fields and their relations. 400 self.query.promote_alias_chain(joins, 401 self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER) 425 field, col, alias, joins, opts = self._setup_joins(pieces, opts, alias) 402 426 403 427 # If we get to this point and the field is a relation to another model, 404 428 # append the default ordering for that model. 405 429 if field.rel and len(joins) > 1 and opts.ordering: 406 430 # Firstly, avoid infinite loops. 407 431 if not already_seen: 408 432 already_seen = set() 409 433 join_tuple = tuple([self.query.alias_map[j][TABLE_NAME] for j in joins]) … … 411 435 raise FieldError('Infinite loop caused by ordering.') 412 436 already_seen.add(join_tuple) 413 437 414 438 results = [] 415 439 for item in opts.ordering: 416 440 results.extend(self.find_ordering_name(item, opts, alias, 417 441 order, already_seen)) 418 442 return results 443 col, alias = self._final_join_removal(col, alias) 444 return [(alias, col, order)] 419 445 446 def _setup_joins(self, pieces, opts, alias): 447 """ 448 A helper method for get_ordering and get_distinct. This method will 449 call query.setup_joins, handle refcounts and then promote the joins. 450 451 Note that get_ordering and get_distinct must produce same target 452 columns on same input, as the prefixes of get_ordering and get_distinct 453 must match. Executing SQL where this is not true is an error. 454 """ 455 if not alias: 456 alias = self.query.get_initial_alias() 457 field, target, opts, joins, _, _ = self.query.setup_joins(pieces, 458 opts, alias, False) 459 alias = joins[-1] 460 col = target.column 461 if not field.rel: 462 # To avoid inadvertent trimming of a necessary alias, use the 463 # refcount to show that we are referencing a non-relation field on 464 # the model. 465 self.query.ref_alias(alias) 466 467 # Must use left outer joins for nullable fields and their relations. 468 # Ordering or distinct must not affect the returned set, and INNER 469 # JOINS for nullable fields could do this. 470 self.query.promote_alias_chain(joins, 471 self.query.alias_map[joins[0]][JOIN_TYPE] == self.query.LOUTER) 472 return field, col, alias, joins, opts 473 474 def _final_join_removal(self, col, alias): 475 """ 476 A helper method for get_distinct and get_ordering. This method will 477 trim extra not-needed joins from the tail of the join chain. 478 479 This is very similar to what is done in trim_joins, but we will 480 trim LEFT JOINS here. It would be a good idea to consolidate this 481 method and query.trim_joins(). 482 """ 420 483 if alias: 421 # We have to do the same "final join" optimisation as in422 # add_filter, since the final column might not otherwise be part of423 # the select set (so we can't order on it).424 484 while 1: 425 485 join = self.query.alias_map[alias] 426 486 if col != join[RHS_JOIN_COL]: 427 487 break 428 488 self.query.unref_alias(alias) 429 489 alias = join[LHS_ALIAS] 430 490 col = join[LHS_JOIN_COL] 431 return [(alias, col, order)]491 return col, alias 432 492 433 493 def get_from_clause(self): 434 494 """ 435 495 Returns a list of strings that are joined together to go after the 436 496 "FROM" part of the query, as well as a list any extra parameters that 437 497 need to be included. Sub-classes, can override this to create a 438 498 from-clause via a "select". 439 499 440 500 This should only be called after any SQL construction methods that 441 might change the tables we need. This means the select columns and442 ordering must be done first.501 might change the tables we need. This means the select columns, 502 ordering and distinct must be done first. 443 503 """ 444 504 result = [] 445 505 qn = self.quote_name_unless_alias 446 506 qn2 = self.connection.ops.quote_name 447 507 first = True 448 508 for alias in self.query.tables: 449 509 if not self.query.alias_refcount[alias]: 450 510 continue … … 979 1039 class SQLAggregateCompiler(SQLCompiler): 980 1040 def as_sql(self, qn=None): 981 1041 """ 982 1042 Creates the SQL for this query. Returns the SQL string and list of 983 1043 parameters. 984 1044 """ 985 1045 if qn is None: 986 1046 qn = self.quote_name_unless_alias 1047 987 1048 sql = ('SELECT %s FROM (%s) subquery' % ( 988 1049 ', '.join([ 989 1050 aggregate.as_sql(qn, self.connection) 990 1051 for aggregate in self.query.aggregate_select.values() 991 1052 ]), 992 1053 self.query.subquery) 993 1054 ) 994 1055 params = self.query.sub_params -
django/db/models/sql/query.py
diff --git a/django/db/models/sql/query.py b/django/db/models/sql/query.py
a b 122 122 self.tables = [] # Aliases in the order they are created. 123 123 self.where = where() 124 124 self.where_class = where 125 125 self.group_by = None 126 126 self.having = where() 127 127 self.order_by = [] 128 128 self.low_mark, self.high_mark = 0, None # Used for offset/limit 129 129 self.distinct = False 130 self.distinct_fields = [] 130 131 self.select_for_update = False 131 132 self.select_for_update_nowait = False 132 133 self.select_related = False 133 134 self.related_select_cols = [] 134 135 135 136 # SQL aggregate-related attributes 136 137 self.aggregates = SortedDict() # Maps alias -> SQL aggregate function 137 138 self.aggregate_select_mask = None … … 260 261 if self.group_by is None: 261 262 obj.group_by = None 262 263 else: 263 264 obj.group_by = self.group_by[:] 264 265 obj.having = copy.deepcopy(self.having, memo=memo) 265 266 obj.order_by = self.order_by[:] 266 267 obj.low_mark, obj.high_mark = self.low_mark, self.high_mark 267 268 obj.distinct = self.distinct 269 obj.distinct_fields = self.distinct_fields[:] 268 270 obj.select_for_update = self.select_for_update 269 271 obj.select_for_update_nowait = self.select_for_update_nowait 270 272 obj.select_related = self.select_related 271 273 obj.related_select_cols = [] 272 274 obj.aggregates = copy.deepcopy(self.aggregates, memo=memo) 273 275 if self.aggregate_select_mask is None: 274 276 obj.aggregate_select_mask = None 275 277 else: … … 293 295 obj.extra_tables = self.extra_tables 294 296 obj.extra_order_by = self.extra_order_by 295 297 obj.deferred_loading = copy.deepcopy(self.deferred_loading, memo=memo) 296 298 if self.filter_is_sticky and self.used_aliases: 297 299 obj.used_aliases = self.used_aliases.copy() 298 300 else: 299 301 obj.used_aliases = set() 300 302 obj.filter_is_sticky = False 303 301 304 obj.__dict__.update(kwargs) 302 305 if hasattr(obj, '_setup_query'): 303 306 obj._setup_query() 304 307 return obj 305 308 306 309 def convert_values(self, value, field, connection): 307 310 """Convert the database-returned value into a type that is consistent 308 311 across database backends. … … 388 391 in zip(query.aggregate_select.items(), result) 389 392 ]) 390 393 391 394 def get_count(self, using): 392 395 """ 393 396 Performs a COUNT() query using the current filter constraints. 394 397 """ 395 398 obj = self.clone() 396 if len(self.select) > 1 or self.aggregate_select :399 if len(self.select) > 1 or self.aggregate_select or (self.distinct and self.distinct_fields): 397 400 # If a select clause exists, then the query has already started to 398 401 # specify the columns that are to be returned. 399 402 # In this case, we need to use a subquery to evaluate the count. 400 403 from django.db.models.sql.subqueries import AggregateQuery 401 404 subquery = obj 402 405 subquery.clear_ordering(True) 403 406 subquery.clear_limits() 404 407 … … 447 450 'rhs' query. 448 451 """ 449 452 assert self.model == rhs.model, \ 450 453 "Cannot combine queries on two different base models." 451 454 assert self.can_filter(), \ 452 455 "Cannot combine queries once a slice has been taken." 453 456 assert self.distinct == rhs.distinct, \ 454 457 "Cannot combine a unique query with a non-unique query." 458 assert self.distinct_fields == rhs.distinct_fields, \ 459 "Cannot combine queries with different distinct fields." 455 460 456 461 self.remove_inherited_models() 457 462 # Work out how to relabel the rhs aliases, if necessary. 458 463 change_map = {} 459 464 used = set() 460 465 conjunction = (connector == AND) 461 466 first = True 462 467 for alias in rhs.tables: … … 669 674 self.alias_refcount[alias] = 1 670 675 self.tables.append(alias) 671 676 return alias, True 672 677 673 678 def ref_alias(self, alias): 674 679 """ Increases the reference count for this alias. """ 675 680 self.alias_refcount[alias] += 1 676 681 677 def unref_alias(self, alias ):682 def unref_alias(self, alias, amount=1): 678 683 """ Decreases the reference count for this alias. """ 679 self.alias_refcount[alias] -= 1684 self.alias_refcount[alias] -= amount 680 685 681 686 def promote_alias(self, alias, unconditional=False): 682 687 """ 683 688 Promotes the join type of an alias to an outer join if it's possible 684 689 for the join to contain NULL values on the left. If 'unconditional' is 685 690 False, the join is only promoted if it is nullable, otherwise it is 686 691 always promoted. 687 692 … … 700 705 Walks along a chain of aliases, promoting the first nullable join and 701 706 any joins following that. If 'must_promote' is True, all the aliases in 702 707 the chain are promoted. 703 708 """ 704 709 for alias in chain: 705 710 if self.promote_alias(alias, must_promote): 706 711 must_promote = True 707 712 713 def reset_refcounts(self, to_counts): 714 """ 715 This method will reset reference counts for aliases so that they match 716 that given in to_counts. 717 """ 718 for alias, cur_refcount in self.alias_refcount.copy().items(): 719 unref_amount = cur_refcount - to_counts.get(alias, 0) 720 self.unref_alias(alias, unref_amount) 721 708 722 def promote_unused_aliases(self, initial_refcounts, used_aliases): 709 723 """ 710 724 Given a "before" copy of the alias_refcounts dictionary (as 711 725 'initial_refcounts') and a collection of aliases that may have been 712 726 changed or created, works out which aliases have been created since 713 727 then and which ones haven't been used and promotes all of those 714 728 aliases, plus any children of theirs in the alias tree, to outer joins. 715 729 """ … … 827 841 self.ref_alias(alias) 828 842 else: 829 843 alias = self.join((None, self.model._meta.db_table, None, None)) 830 844 return alias 831 845 832 846 def count_active_tables(self): 833 847 """ 834 848 Returns the number of tables in this query with a non-zero reference 835 count. 849 count. Note that after execution, the reference counts are zeroed, so 850 tables added in compiler will not be seen by this method. 836 851 """ 837 852 return len([1 for count in self.alias_refcount.itervalues() if count]) 838 853 839 854 def join(self, connection, always_create=False, exclusions=(), 840 855 promote=False, outer_if_first=False, nullable=False, reuse=None): 841 856 """ 842 857 Returns an alias for the join in 'connection', either reusing an 843 858 existing alias for that join or creating a new one. 'connection' is a … … 1591 1606 """ 1592 1607 Clears the list of fields to select (but not extra_select columns). 1593 1608 Some queryset types completely replace any existing list of select 1594 1609 columns. 1595 1610 """ 1596 1611 self.select = [] 1597 1612 self.select_fields = [] 1598 1613 1614 def add_distinct_fields(self, *field_names): 1615 """ 1616 Adds and resolves the given fields to the query's "distinct on" clause. 1617 """ 1618 self.distinct_fields = field_names 1619 self.distinct = True 1620 1621 1599 1622 def add_fields(self, field_names, allow_m2m=True): 1600 1623 """ 1601 1624 Adds the given (model) fields to the select set. The field names are 1602 1625 added in the order specified. 1603 1626 """ 1604 1627 alias = self.get_initial_alias() 1605 1628 opts = self.get_meta() 1606 1629 -
docs/ref/models/querysets.txt
diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
a b 340 340 a default ordering, or when using :meth:`order_by()`). If no such ordering is 341 341 defined for a given ``QuerySet``, calling ``reverse()`` on it has no real 342 342 effect (the ordering was undefined prior to calling ``reverse()``, and will 343 343 remain undefined afterward). 344 344 345 345 distinct 346 346 ~~~~~~~~ 347 347 348 .. method:: distinct( )348 .. method:: distinct([*fields]) 349 349 350 350 Returns a new ``QuerySet`` that uses ``SELECT DISTINCT`` in its SQL query. This 351 351 eliminates duplicate rows from the query results. 352 352 353 353 By default, a ``QuerySet`` will not eliminate duplicate rows. In practice, this 354 354 is rarely a problem, because simple queries such as ``Blog.objects.all()`` 355 355 don't introduce the possibility of duplicate result rows. However, if your 356 356 query spans multiple tables, it's possible to get duplicate results when a … … 369 369 selected, the columns used in any :meth:`order_by()` (or default model 370 370 ordering) will still be involved and may affect uniqueness of the results. 371 371 372 372 The moral here is that if you are using ``distinct()`` be careful about 373 373 ordering by related models. Similarly, when using ``distinct()`` and 374 374 :meth:`values()` together, be careful when ordering by fields not in the 375 375 :meth:`values()` call. 376 376 377 .. versionadded:: 1.4 378 379 The possibility to pass positional arguments (``*fields``) is new in Django 1.4. 380 They are names of fields to which the ``DISTINCT`` should be limited. This 381 translates to a ``SELECT DISTINCT ON`` SQL query. A ``DISTINCT ON`` query eliminates 382 duplicate rows not by comparing all fields in a row, but by comparing only the given 383 fields. 384 385 .. note:: 386 Note that the ability to specify field names is only available in PostgreSQL. 387 388 .. note:: 389 When using the ``DISTINCT ON`` functionality it is required that the columns given 390 to :meth:`distinct` match the first :meth:`order_by` columns. For example ``SELECT 391 DISTINCT ON (a)`` gives you the first row for each value in column ``a``. If you 392 don't specify an order, then you'll get some arbitrary row. 393 394 Examples:: 395 396 >>> Author.objects.distinct() 397 [...] 398 399 >>> Entry.objects.order_by('pub_date').distinct('pub_date') 400 [...] 401 402 >>> Entry.objects.order_by('blog').distinct('blog') 403 [...] 404 405 >>> Entry.objects.order_by('author', 'pub_date').distinct('author', 'pub_date') 406 [...] 407 408 >>> Entry.objects.order_by('blog__name', 'mod_date').distinct('blog__name', 'mod_date') 409 [...] 410 411 >>> Entry.objects.order_by('author', 'pub_date').distinct('author') 412 [...] 413 377 414 values 378 415 ~~~~~~ 379 416 380 417 .. method:: values(*fields) 381 418 382 419 Returns a ``ValuesQuerySet`` — a ``QuerySet`` subclass that returns 383 420 dictionaries when used as an iterable, rather than model-instance objects. 384 421 -
new file tests/modeltests/distinct_on_fields/models.py
diff --git a/tests/modeltests/distinct_on_fields/__init__.py b/tests/modeltests/distinct_on_fields/__init__.py new file mode 100644 diff --git a/tests/modeltests/distinct_on_fields/models.py b/tests/modeltests/distinct_on_fields/models.py new file mode 100644
- + 1 from django.db import models 2 3 class Tag(models.Model): 4 name = models.CharField(max_length=10) 5 parent = models.ForeignKey('self', blank=True, null=True, 6 related_name='children') 7 8 class Meta: 9 ordering = ['name'] 10 11 def __unicode__(self): 12 return self.name 13 14 class Celebrity(models.Model): 15 name = models.CharField("Name", max_length=20) 16 greatest_fan = models.ForeignKey("Fan", null=True, unique=True) 17 18 def __unicode__(self): 19 return self.name 20 21 class Fan(models.Model): 22 fan_of = models.ForeignKey(Celebrity) 23 24 class Staff(models.Model): 25 id = models.IntegerField(primary_key=True) 26 name = models.CharField(max_length=50) 27 organisation = models.CharField(max_length=100) 28 tags = models.ManyToManyField(Tag, through='StaffTag') 29 coworkers = models.ManyToManyField('self') 30 31 def __unicode__(self): 32 return self.name 33 34 class StaffTag(models.Model): 35 staff = models.ForeignKey(Staff) 36 tag = models.ForeignKey(Tag) 37 38 def __unicode__(self): 39 return u"%s -> %s" % (self.tag, self.staff) -
new file tests/modeltests/distinct_on_fields/tests.py
diff --git a/tests/modeltests/distinct_on_fields/tests.py b/tests/modeltests/distinct_on_fields/tests.py new file mode 100644
- + 1 from __future__ import absolute_import 2 3 from django.db.models import Max 4 from django.test import TestCase, skipUnlessDBFeature 5 6 from .models import Tag, Celebrity, Fan, Staff, StaffTag 7 8 class DistinctOnTests(TestCase): 9 def setUp(self): 10 t1 = Tag.objects.create(name='t1') 11 t2 = Tag.objects.create(name='t2', parent=t1) 12 t3 = Tag.objects.create(name='t3', parent=t1) 13 t4 = Tag.objects.create(name='t4', parent=t3) 14 t5 = Tag.objects.create(name='t5', parent=t3) 15 16 p1_o1 = Staff.objects.create(id=1, name="p1", organisation="o1") 17 p2_o1 = Staff.objects.create(id=2, name="p2", organisation="o1") 18 p3_o1 = Staff.objects.create(id=3, name="p3", organisation="o1") 19 p1_o2 = Staff.objects.create(id=4, name="p1", organisation="o2") 20 p1_o1.coworkers.add(p2_o1, p3_o1) 21 StaffTag.objects.create(staff=p1_o1, tag=t1) 22 StaffTag.objects.create(staff=p1_o1, tag=t1) 23 24 celeb1 = Celebrity.objects.create(name="c1") 25 celeb2 = Celebrity.objects.create(name="c2") 26 27 self.fan1 = Fan.objects.create(fan_of=celeb1) 28 self.fan2 = Fan.objects.create(fan_of=celeb1) 29 self.fan3 = Fan.objects.create(fan_of=celeb2) 30 31 @skipUnlessDBFeature('can_distinct_on_fields') 32 def test_basic_distinct_on(self): 33 """QuerySet.distinct('field', ...) works""" 34 # (qset, expected) tuples 35 qsets = ( 36 ( 37 Staff.objects.distinct().order_by('name'), 38 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 39 ), 40 ( 41 Staff.objects.distinct('name').order_by('name'), 42 ['<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 43 ), 44 ( 45 Staff.objects.distinct('organisation').order_by('organisation', 'name'), 46 ['<Staff: p1>', '<Staff: p1>'], 47 ), 48 ( 49 Staff.objects.distinct('name', 'organisation').order_by('name', 'organisation'), 50 ['<Staff: p1>', '<Staff: p1>', '<Staff: p2>', '<Staff: p3>'], 51 ), 52 ( 53 Celebrity.objects.filter(fan__in=[self.fan1, self.fan2, self.fan3]).\ 54 distinct('name').order_by('name'), 55 ['<Celebrity: c1>', '<Celebrity: c2>'], 56 ), 57 # Does combining querysets work? 58 ( 59 (Celebrity.objects.filter(fan__in=[self.fan1, self.fan2]).\ 60 distinct('name').order_by('name') 61 |Celebrity.objects.filter(fan__in=[self.fan3]).\ 62 distinct('name').order_by('name')), 63 ['<Celebrity: c1>', '<Celebrity: c2>'], 64 ), 65 ( 66 StaffTag.objects.distinct('staff','tag'), 67 ['<StaffTag: t1 -> p1>'], 68 ), 69 ( 70 Tag.objects.order_by('parent__pk', 'pk').distinct('parent'), 71 ['<Tag: t2>', '<Tag: t4>', '<Tag: t1>'], 72 ), 73 ( 74 StaffTag.objects.select_related('staff').distinct('staff__name').order_by('staff__name'), 75 ['<StaffTag: t1 -> p1>'], 76 ), 77 # Fetch the alphabetically first coworker for each worker 78 ( 79 (Staff.objects.distinct('id').order_by('id', 'coworkers__name'). 80 values_list('id', 'coworkers__name')), 81 ["(1, u'p2')", "(2, u'p1')", "(3, u'p1')", "(4, None)"] 82 ), 83 ) 84 for qset, expected in qsets: 85 self.assertQuerysetEqual(qset, expected) 86 self.assertEqual(qset.count(), len(expected)) 87 88 # Combining queries with different distinct_fields is not allowed. 89 base_qs = Celebrity.objects.all() 90 self.assertRaisesMessage( 91 AssertionError, 92 "Cannot combine queries with different distinct fields.", 93 lambda: (base_qs.distinct('id') & base_qs.distinct('name')) 94 ) 95 96 # Test join unreffing 97 c1 = Celebrity.objects.distinct('greatest_fan__id', 'greatest_fan__fan_of') 98 self.assertIn('OUTER JOIN', str(c1.query)) 99 c2 = c1.distinct('pk') 100 self.assertNotIn('OUTER JOIN', str(c2.query)) 101 102 @skipUnlessDBFeature('can_distinct_on_fields') 103 def test_distinct_not_implemented_checks(self): 104 # distinct + annotate not allowed 105 with self.assertRaises(NotImplementedError): 106 Celebrity.objects.annotate(Max('id')).distinct('id')[0] 107 with self.assertRaises(NotImplementedError): 108 Celebrity.objects.distinct('id').annotate(Max('id'))[0] 109 110 # However this check is done only when the query executes, so you 111 # can use distinct() to remove the fields before execution. 112 Celebrity.objects.distinct('id').annotate(Max('id')).distinct()[0] 113 # distinct + aggregate not allowed 114 with self.assertRaises(NotImplementedError): 115 Celebrity.objects.distinct('id').aggregate(Max('id')) 116 -
tests/regressiontests/queries/models.py
diff --git a/tests/regressiontests/queries/models.py b/tests/regressiontests/queries/models.py
a b 204 204 205 205 # An inter-related setup with a model subclass that has a nullable 206 206 # path to another model, and a return path from that model. 207 207 208 208 class Celebrity(models.Model): 209 209 name = models.CharField("Name", max_length=20) 210 210 greatest_fan = models.ForeignKey("Fan", null=True, unique=True) 211 211 212 def __unicode__(self): 213 return self.name 214 212 215 class TvChef(Celebrity): 213 216 pass 214 217 215 218 class Fan(models.Model): 216 219 fan_of = models.ForeignKey(Celebrity) 217 220 218 221 # Multiple foreign keys 219 222 class LeafA(models.Model): … … 338 341 return "category item: " + str(self.category) 339 342 340 343 class OneToOneCategory(models.Model): 341 344 new_name = models.CharField(max_length=15) 342 345 category = models.OneToOneField(SimpleCategory) 343 346 344 347 def __unicode__(self): 345 348 return "one2one " + self.new_name 346 -
tests/regressiontests/queries/tests.py
diff --git a/tests/regressiontests/queries/tests.py b/tests/regressiontests/queries/tests.py
a b 229 229 q1 = Item.objects.filter(tags=self.t1) 230 230 q2 = Item.objects.filter(note=self.n3, tags=self.t2) 231 231 q3 = Item.objects.filter(creator=self.a4) 232 232 self.assertQuerysetEqual( 233 233 ((q1 & q2) | q3).order_by('name'), 234 234 ['<Item: four>', '<Item: one>'] 235 235 ) 236 236 237 # FIXME: This is difficult to fix and very much an edge case, so punt for238 # now. This is related to the order_by() tests for ticket #2253, but the239 # old bug exhibited itself here (q2 was pulling too many tables into the240 # combined query with the new ordering, but only because we have evaluated241 # q2 already).242 @unittest.expectedFailure243 237 def test_order_by_tables(self): 244 238 q1 = Item.objects.order_by('name') 245 239 q2 = Item.objects.filter(id=self.i1.id) 246 240 list(q2) 247 241 self.assertEqual(len((q1 & q2).order_by('name').query.tables), 1) 248 242 243 def test_order_by_join_unref(self): 244 """ 245 This test is related to the above one, testing that there aren't 246 old JOINs in the query. 247 """ 248 qs = Celebrity.objects.order_by('greatest_fan__fan_of') 249 self.assertIn('OUTER JOIN', str(qs.query)) 250 qs = qs.order_by('id') 251 self.assertNotIn('OUTER JOIN', str(qs.query)) 252 249 253 def test_tickets_4088_4306(self): 250 254 self.assertQuerysetEqual( 251 255 Report.objects.filter(creator=1001), 252 256 ['<Report: r1>'] 253 257 ) 254 258 self.assertQuerysetEqual( 255 259 Report.objects.filter(creator__num=1001), 256 260 ['<Report: r1>'] … … 1723 1727 1724 1728 self.assertEqual( 1725 1729 list(Node.objects.filter(node=node2)), 1726 1730 [node1] 1727 1731 ) 1728 1732 1729 1733 1730 1734 class ConditionalTests(BaseQuerysetTest): 1731 """Tests whose execution depend on d fferent environment conditions like1735 """Tests whose execution depend on different environment conditions like 1732 1736 Python version or DB backend features""" 1733 1737 1734 1738 def setUp(self): 1735 1739 generic = NamedCategory.objects.create(name="Generic") 1736 1740 t1 = Tag.objects.create(name='t1', category=generic) 1737 1741 t2 = Tag.objects.create(name='t2', parent=t1, category=generic) 1738 1742 t3 = Tag.objects.create(name='t3', parent=t1) 1739 1743 t4 = Tag.objects.create(name='t4', parent=t3) 1740 1744 t5 = Tag.objects.create(name='t5', parent=t3) 1741 1745 1746 1742 1747 # In Python 2.6 beta releases, exceptions raised in __len__ are swallowed 1743 1748 # (Python issue 1242657), so these cases return an empty list, rather than 1744 1749 # raising an exception. Not a lot we can do about that, unfortunately, due to 1745 1750 # the way Python handles list() calls internally. Thus, we skip the tests for 1746 1751 # Python 2.6. 1747 1752 @unittest.skipIf(sys.version_info[:2] == (2, 6), "Python version is 2.6") 1748 1753 def test_infinite_loop(self): 1749 1754 # If you're not careful, it's possible to introduce infinite loops via … … 1786 1791 1787 1792 # Sqlite 3 does not support passing in more than 1000 parameters except by 1788 1793 # changing a parameter at compilation time. 1789 1794 @skipUnlessDBFeature('supports_1000_query_parameters') 1790 1795 def test_ticket14244(self): 1791 1796 # Test that the "in" lookup works with lists of 1000 items or more. 1792 1797 Number.objects.all().delete() 1793 1798 numbers = range(2500) 1799 # Speed up object creation by using bulk_create. However keep the 1800 # batches somewhat small - otherwise we might hit the parameter limit 1801 # in bulk_create. 1802 batch = [] 1794 1803 for num in numbers: 1795 _ = Number.objects.create(num=num) 1804 batch.append(Number(num=num)) 1805 if len(batch) == 100: 1806 Number.objects.bulk_create(batch) 1807 batch = [] 1808 if batch: 1809 Number.objects.bulk_create(batch) 1796 1810 self.assertEqual( 1797 1811 Number.objects.filter(num__in=numbers[:1000]).count(), 1798 1812 1000 1799 1813 ) 1800 1814 self.assertEqual( 1801 1815 Number.objects.filter(num__in=numbers[:1001]).count(), 1802 1816 1001 1803 1817 ) … … 1805 1819 Number.objects.filter(num__in=numbers[:2000]).count(), 1806 1820 2000 1807 1821 ) 1808 1822 self.assertEqual( 1809 1823 Number.objects.filter(num__in=numbers).count(), 1810 1824 2500 1811 1825 ) 1812 1826 1827 1813 1828 class UnionTests(unittest.TestCase): 1814 1829 """ 1815 1830 Tests for the union of two querysets. Bug #12252. 1816 1831 """ 1817 1832 def setUp(self): 1818 1833 objectas = [] 1819 1834 objectbs = [] 1820 1835 objectcs = [] -
tests/regressiontests/select_related_regress/tests.py
diff --git a/tests/regressiontests/select_related_regress/tests.py b/tests/regressiontests/select_related_regress/tests.py
a b 35 35 connections=Connection.objects.filter(start__device__building=b, end__device__building=b).order_by('id') 36 36 self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections], 37 37 [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')]) 38 38 39 39 connections=Connection.objects.filter(start__device__building=b, end__device__building=b).select_related().order_by('id') 40 40 self.assertEqual([(c.id, unicode(c.start), unicode(c.end)) for c in connections], 41 41 [(c1.id, u'router/4', u'switch/7'), (c2.id, u'switch/7', u'server/1')]) 42 42 43 # This final query should only joinseven tables (port, device and building44 # twice each, plus connection once). 45 self.assertEqual( connections.query.count_active_tables(), 7)43 # This final query should only have seven tables (port, device and building 44 # twice each, plus connection once). Thus, 6 joins plus the FROM table. 45 self.assertEqual(str(connections.query).count(" JOIN "), 6) 46 46 47 47 48 48 def test_regression_8106(self): 49 49 """ 50 50 Regression test for bug #8106. 51 51 52 52 Same sort of problem as the previous test, but this time there are 53 53 more extra tables to pull in as part of the select_related() and some