Ticket #16937: prefetch_2.diff
File prefetch_2.diff, 18.6 KB (added by , 13 years ago) |
---|
-
django/db/models/fields/related.py
diff --git a/django/db/models/fields/related.py b/django/db/models/fields/related.py
a b 435 435 db = self._db or router.db_for_read(self.model, instance=self.instance) 436 436 return super(RelatedManager, self).get_query_set().using(db).filter(**(self.core_filters)) 437 437 438 def get_prefetch_query_set(self, instances): 439 """ 440 Return a queryset that does the bulk lookup needed 441 by prefetch_related functionality. 442 """ 443 if not instances: 444 return self.model.objects.none() 445 446 db = self._db or router.db_for_read(self.model, instance=instances[0]) 447 query = {'%s__%s__in' % (rel_field.name, attname): 448 [getattr(obj, attname) for obj in instances]} 449 return super(RelatedManager, self).get_query_set().using(db).filter(**query) 450 451 def select_matching_instances(self, obj, related_objects): 452 field_val = getattr(obj, attname) 453 other_attname = rel_field.get_attname() 454 return [rel_obj for rel_obj in related_objects 455 if getattr(rel_obj, other_attname) == field_val] 456 457 def all(self): 458 try: 459 return self.instance._prefetched_objects_cache[rel_field.related_query_name()] 460 except (AttributeError, KeyError): 461 return super(RelatedManager, self).all() 462 438 463 def add(self, *objs): 439 464 for obj in objs: 440 465 if not isinstance(obj, self.model): … … 482 507 """Creates a manager that subclasses 'superclass' (which is a Manager) 483 508 and adds behavior for many-to-many related objects.""" 484 509 class ManyRelatedManager(superclass): 485 def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None,510 def __init__(self, model=None, query_field_name=None, instance=None, symmetrical=None, 486 511 source_field_name=None, target_field_name=None, reverse=False, 487 through=None ):512 through=None, prefetch_cache_name=None): 488 513 super(ManyRelatedManager, self).__init__() 489 514 self.model = model 490 self.core_filters = core_filters 515 self.query_field_name = query_field_name 516 self.core_filters = {'%s__pk' % query_field_name: instance._get_pk_val()} 491 517 self.instance = instance 492 518 self.symmetrical = symmetrical 493 519 self.source_field_name = source_field_name 494 520 self.target_field_name = target_field_name 495 521 self.reverse = reverse 496 522 self.through = through 523 self.prefetch_cache_name = prefetch_cache_name 497 524 self._pk_val = self.instance.pk 498 525 if self._pk_val is None: 499 526 raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__) … … 502 529 db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance) 503 530 return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**(self.core_filters)) 504 531 532 def get_prefetch_query_set(self, instances): 533 if not instances: 534 return self.model.objects.none() 535 536 from django.db import connections 537 538 db = self._db or router.db_for_read(self.model, instance=instances[0]) 539 query = {'%s__pk__in' % self.query_field_name: 540 [obj._get_pk_val() for obj in instances]} 541 qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query) 542 543 # M2M: need to annotate the query in order to get the PK of the 544 # primary model that the secondary model was actually related to. 545 546 # We know that there will already be a join on the join table, so we 547 # can just add the select. 548 join_table = self.through._meta.db_table 549 pk_col = "%s_id" % self.source_field_name 550 connection = connections[db] 551 qn = connection.ops.quote_name 552 qs = qs.extra(select={'_prefetch_related_pk': 553 '%s.%s' % (qn(join_table), qn(pk_col))}) 554 return qs 555 556 def select_matching_instances(self, obj, related_objects): 557 pk_val = obj._get_pk_val() 558 return [rel_obj for rel_obj in related_objects 559 if rel_obj._prefetch_related_pk == pk_val] 560 561 def all(self): 562 try: 563 return self.instance._prefetched_objects_cache[self.prefetch_cache_name] 564 except (AttributeError, KeyError): 565 return super(ManyRelatedManager, self).all() 566 505 567 # If the ManyToMany relation has an intermediary model, 506 568 # the add and remove methods do not exist. 507 569 if rel.through._meta.auto_created: … … 683 745 684 746 manager = self.related_manager_cls( 685 747 model=rel_model, 686 core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()}, 748 query_field_name=self.related.field.name, 749 prefetch_cache_name=self.related.field.related_query_name(), 687 750 instance=instance, 688 751 symmetrical=False, 689 752 source_field_name=self.related.field.m2m_reverse_field_name(), … … 739 802 740 803 manager = self.related_manager_cls( 741 804 model=self.field.rel.to, 742 core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()}, 805 query_field_name=self.field.related_query_name(), 806 prefetch_cache_name=self.field.name, 743 807 instance=instance, 744 808 symmetrical=self.field.rel.symmetrical, 745 809 source_field_name=self.field.m2m_field_name(), -
django/db/models/manager.py
diff --git a/django/db/models/manager.py b/django/db/models/manager.py
a b 172 172 def select_related(self, *args, **kwargs): 173 173 return self.get_query_set().select_related(*args, **kwargs) 174 174 175 def prefetch_related(self, *args, **kwargs): 176 return self.get_query_set().prefetch_related(*args, **kwargs) 177 175 178 def values(self, *args, **kwargs): 176 179 return self.get_query_set().values(*args, **kwargs) 177 180 -
django/db/models/query.py
diff --git a/django/db/models/query.py b/django/db/models/query.py
a b 36 36 self._iter = None 37 37 self._sticky_filter = False 38 38 self._for_write = False 39 self._prefetch_related = set() 40 self._prefetch_done = False 39 41 40 42 ######################## 41 43 # PYTHON MAGIC METHODS # … … 81 83 self._result_cache = list(self.iterator()) 82 84 elif self._iter: 83 85 self._result_cache.extend(self._iter) 86 if self._prefetch_related and not self._prefetch_done: 87 self._prefetch_related_objects() 84 88 return len(self._result_cache) 85 89 86 90 def __iter__(self): 91 if self._prefetch_related: 92 # We need all the results in order to be able to do the prefetch 93 # in one go. To minimize code duplication, we use the __len__ 94 # code path which also forces this, and also does the prefetch 95 len(self) 96 87 97 if self._result_cache is None: 88 98 self._iter = self.iterator() 89 99 self._result_cache = [] … … 106 116 self._fill_cache() 107 117 108 118 def __nonzero__(self): 119 if self._prefetch_related: 120 # We need all the results in order to be able to do the prefetch 121 # in one go. To minimize code duplication, we use the __len__ 122 # code path which also forces this, and also does the prefetch 123 len(self) 124 109 125 if self._result_cache is not None: 110 126 return bool(self._result_cache) 111 127 try: … … 526 542 return self.query.has_results(using=self.db) 527 543 return bool(self._result_cache) 528 544 545 def _prefetch_related_objects(self): 546 # This method can only be called once the result cache has been filled. 547 prefetch_related_objects(self._result_cache, self._prefetch_related) 548 self._prefetch_done = True 549 529 550 ################################################## 530 551 # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS # 531 552 ################################################## … … 649 670 obj.query.max_depth = depth 650 671 return obj 651 672 673 def prefetch_related(self, *fields): 674 """ 675 Returns a new QuerySet instance that will prefetch Many-To-One 676 and Many-To-Many related objects when the QuerySet is evaluated. 677 678 The fields specified must be attributes that return a RelatedManager of 679 some kind when used on instances of the evaluated QuerySet. 680 681 These RelatedManagers will be modified so that their 'all()' method will 682 return a QuerySet whose cache is already filled with objects that were 683 looked up in a single batch, rather than one query per object in the 684 current QuerySet. 685 686 When prefetch_related() is called more than once, the list of fields to 687 prefetch is added to. Call prefetch_related() with no arguments to 688 clears the list. 689 """ 690 if fields: 691 new_fields = self._prefetch_related.union(set(fields)) 692 else: 693 new_fields = set() 694 return self._clone(_prefetch_related=new_fields) 695 652 696 def dup_select_related(self, other): 653 697 """ 654 698 Copies the related selection status from the QuerySet 'other' to the … … 798 842 query.filter_is_sticky = True 799 843 c = klass(model=self.model, query=query, using=self._db) 800 844 c._for_write = self._for_write 845 c._prefetch_related = self._prefetch_related 801 846 c.__dict__.update(kwargs) 802 847 if setup and hasattr(c, '_setup_query'): 803 848 c._setup_query() … … 1484 1529 query = sql.InsertQuery(model) 1485 1530 query.insert_values(fields, objs, raw=raw) 1486 1531 return query.get_compiler(using=using).execute_sql(return_id) 1532 1533 1534 def prefetch_related_objects(result_cache, fields): 1535 """ 1536 Populates prefetched objects caches for a list of results 1537 from a QuerySet 1538 """ 1539 obj = None 1540 for obj in result_cache: 1541 if not hasattr(obj, '_prefetched_objects_cache'): 1542 try: 1543 obj._prefetched_objects_cache = {} 1544 except AttributeError: 1545 # Must be in a QuerySet subclass that is not getting Model 1546 # instances, either in Django or 3rd party. prefetch_related 1547 # doesn't make sense, so quit now. 1548 return 1549 1550 if obj is None: 1551 return # nothing to do 1552 1553 for attname in fields: 1554 # We get one related manager, from the first object, and re-use this for 1555 # all operations. We use only APIs that ignore the relmanager.instance 1556 # attribute. 1557 relmanager = getattr(obj, attname) 1558 1559 all_related_objects = list(relmanager.get_prefetch_query_set(result_cache)) 1560 for obj in result_cache: 1561 qs = getattr(obj, attname).all() 1562 qs._result_cache = relmanager.select_matching_instances(obj, all_related_objects) 1563 obj._prefetched_objects_cache[attname] = qs -
docs/ref/models/querysets.txt
diff --git a/docs/ref/models/querysets.txt b/docs/ref/models/querysets.txt
a b 690 690 A :class:`~django.db.models.OneToOneField` is not traversed in the reverse 691 691 direction if you are performing a depth-based ``select_related()`` call. 692 692 693 prefetch_related 694 ~~~~~~~~~~~~~~~~ 695 696 .. method:: prefetch_related(*fields) 697 698 .. versionadded:: 1.4 699 700 Returns a ``QuerySet`` that will automatically retrieve, in a single batch, 701 related many-to-many and many-to-one objects for the specified fields. 702 703 This is similar to ``select_related`` for the 'many related objects' case, with 704 the following important differences: 705 706 * ``prefetch_related`` causes a separate query to be issued for each set of 707 related objects that you request. This query is done as soon as the QuerySet 708 begins to be evaluated. 709 710 This is in contrast to ``select_related``, which modifies the original query 711 with joins in order to get the related objects in the same query as the main 712 objects. 713 714 * It is 'single depth' only, and doesn't support join syntax. 715 716 The fields that must be supplied to this method can be any attributes on the 717 model instances which represent related queries that return multiple 718 objects. This includes attributes representing the 'many' side of ``ForeignKey`` 719 relationships and ``ManyToManyField`` attributes. 720 721 For example, suppose you have these models:: 722 723 class Topping(models.Model): 724 name = models.CharField(max_length=30) 725 726 class Pizza(models.Model): 727 name = models.CharField(max_length=50) 728 toppings = models.ManyToManyField(Topping) 729 730 def __unicode__(self): 731 return u"%s (%s)" % (self.name, u", ".join([topping.name 732 for topping in self.toppings.all()])) 733 734 and run this code:: 735 736 >>> Pizza.objects.all() 737 [u"Hawaiian (ham, pineaapple)", u"Seafood (prawns, smoked salmon)"... 738 739 The problem with this code is that it will run a query on the Toppings table for 740 **every** item in the Pizza ``QuerySet``. Using ``prefetch_related``, this can 741 be reduced to two: 742 743 >>> pizzas = Pizza.objects.all().prefetch_related('toppings') 744 745 All the relevant toppings will be fetched in a single query, and used to make 746 ``QuerySets`` that have a pre-filled cache of the relevant results. These 747 ``QuerySets`` are then used in the ``self.toppings.all()`` calls. 748 749 Please note that use of ``prefetch_related`` will mean that the additional 750 queries run will **always** be executed - even if you never use the related 751 objects - and it always fully populates the result cache on the primary 752 ``QuerySet`` (which can sometimes be avoided in other cases). 753 754 Remember that, as always with QuerySets, any subsequent chained methods will 755 ignore previously cached results, and retrieve data in a fresh database 756 query. So, if you write the following: 757 758 >>> pizzas = Pizza.objects.prefetch_related('toppings') 759 >>> [list(pizza.topppings.filter(spicy=True) for pizza in pizzas] 760 761 ...then the fact that `pizza.toppings.all()` has been prefetched will not help 762 you - in fact it hurts preformance, since you have done a database query that 763 you haven't used. So use this feature with caution! 764 765 Chaining ``prefetch_related`` calls will accumulate the fields that should have 766 this behaviour applied. To clear any ``prefetch_related`` behaviour, call the 767 method with no arguments. 768 769 693 770 extra 694 771 ~~~~~ 695 772 -
new file tests/modeltests/prefetch_related/models.py
diff --git a/tests/modeltests/prefetch_related/__init__.py b/tests/modeltests/prefetch_related/__init__.py new file mode 100644 diff --git a/tests/modeltests/prefetch_related/models.py b/tests/modeltests/prefetch_related/models.py new file mode 100644
- + 1 from django.db import models 2 3 4 class Author(models.Model): 5 name = models.CharField(max_length=50) 6 first_book = models.ForeignKey('Book', related_name='first_time_authors') 7 8 def __unicode__(self): 9 return self.name 10 11 class Book(models.Model): 12 title = models.CharField(max_length=255) 13 14 authors = models.ManyToManyField(Author, related_name='books') 15 16 def __unicode__(self): 17 return self.title -
new file tests/modeltests/prefetch_related/tests.py
diff --git a/tests/modeltests/prefetch_related/tests.py b/tests/modeltests/prefetch_related/tests.py new file mode 100644
- + 1 from django.test import TestCase 2 3 from models import Author, Book 4 5 6 class PrefetchRelatedTests(TestCase): 7 8 def setUp(self): 9 10 self.book1 = Book.objects.create(title="Poems") 11 self.book2 = Book.objects.create(title="Jane Eyre") 12 self.book3 = Book.objects.create(title="Wuthering Heights") 13 14 self.author1 = Author.objects.create(name="Charlotte", 15 first_book=self.book1) 16 self.author2 = Author.objects.create(name="Anne", 17 first_book=self.book1) 18 self.author3 = Author.objects.create(name="Emily", 19 first_book=self.book1) 20 21 self.book1.authors.add(self.author1) 22 self.book1.authors.add(self.author2) 23 self.book1.authors.add(self.author3) 24 self.book2.authors.add(self.author1) 25 self.book3.authors.add(self.author3) 26 27 def test_m2m_forward(self): 28 with self.assertNumQueries(2): 29 lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')] 30 31 normal_lists = [list(b.authors.all()) for b in Book.objects.all()] 32 self.assertEqual(lists, normal_lists) 33 34 35 def test_m2m_reverse(self): 36 with self.assertNumQueries(2): 37 lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')] 38 39 normal_lists = [list(a.books.all()) for a in Author.objects.all()] 40 self.assertEqual(lists, normal_lists) 41 42 def test_foreignkey_reverse(self): 43 with self.assertNumQueries(2): 44 lists = [list(b.first_time_authors.all()) 45 for b in Book.objects.prefetch_related('first_time_authors')] 46 47 self.assertQuerysetEqual(self.book2.authors.all(), [u"<Author: Charlotte>"]) 48 49 def test_survives_clone(self): 50 with self.assertNumQueries(2): 51 lists = [list(b.first_time_authors.all()) 52 for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)] 53 54 def test_len(self): 55 with self.assertNumQueries(2): 56 qs = Book.objects.prefetch_related('first_time_authors') 57 length = len(qs) 58 lists = [list(b.first_time_authors.all()) 59 for b in qs] 60 61 def test_bool(self): 62 with self.assertNumQueries(2): 63 qs = Book.objects.prefetch_related('first_time_authors') 64 x = bool(qs) 65 lists = [list(b.first_time_authors.all()) 66 for b in qs] 67 68 def test_clear(self): 69 with self.assertNumQueries(4): 70 with_prefetch = Author.objects.prefetch_related('books') 71 without_prefetch = with_prefetch.prefetch_related() 72 lists = [list(a.books.all()) for a in without_prefetch]