Ticket #16937: prefetch.diff
File prefetch.diff, 18.0 KB (added by , 13 years ago) |
---|
-
django/db/models/fields/related.py
diff -r 41991155a39e django/db/models/fields/related.py
a b 421 421 db = self._db or router.db_for_read(rel_model, instance=instance) 422 422 return superclass.get_query_set(self).using(db).filter(**(self.core_filters)) 423 423 424 def all(self): 425 try: 426 return instance._prefetched_objects_cache[rel_field.related_query_name()] 427 except (AttributeError, KeyError): 428 return super(RelatedManager, self).all() 429 424 430 def add(self, *objs): 425 431 for obj in objs: 426 432 if not isinstance(obj, self.model): … … 474 480 through = rel.through 475 481 class ManyRelatedManager(superclass): 476 482 def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None, 477 source_field_name=None, target_field_name=None, reverse=False): 483 source_field_name=None, target_field_name=None, reverse=False, 484 prefetch_cache_name=None): 478 485 super(ManyRelatedManager, self).__init__() 479 486 self.core_filters = core_filters 480 487 self.model = model … … 485 492 self.through = through 486 493 self._pk_val = self.instance.pk 487 494 self.reverse = reverse 495 self.prefetch_cache_name = prefetch_cache_name 488 496 if self._pk_val is None: 489 497 raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__) 490 498 … … 492 500 db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance) 493 501 return superclass.get_query_set(self).using(db)._next_is_sticky().filter(**(self.core_filters)) 494 502 503 def all(self): 504 try: 505 return self.instance._prefetched_objects_cache[self.prefetch_cache_name] 506 except (AttributeError, KeyError): 507 return super(ManyRelatedManager, self).all() 508 495 509 # If the ManyToMany relation has an intermediary model, 496 510 # the add and remove methods do not exist. 497 511 if rel.through._meta.auto_created: … … 669 683 manager = RelatedManager( 670 684 model=rel_model, 671 685 core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()}, 686 prefetch_cache_name=self.related.field.related_query_name(), 672 687 instance=instance, 673 688 symmetrical=False, 674 689 source_field_name=self.related.field.m2m_reverse_field_name(), … … 721 736 manager = RelatedManager( 722 737 model=rel_model, 723 738 core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()}, 739 prefetch_cache_name=self.field.name, 724 740 instance=instance, 725 741 symmetrical=self.field.rel.symmetrical, 726 742 source_field_name=self.field.m2m_field_name(), -
django/db/models/manager.py
diff -r 41991155a39e django/db/models/manager.py
a b 172 172 def select_related(self, *args, **kwargs): 173 173 return self.get_query_set().select_related(*args, **kwargs) 174 174 175 def prefetch_related(self, *args, **kwargs): 176 return self.get_query_set().prefetch_related(*args, **kwargs) 177 175 178 def values(self, *args, **kwargs): 176 179 return self.get_query_set().values(*args, **kwargs) 177 180 -
django/db/models/query.py
diff -r 41991155a39e django/db/models/query.py
a b 36 36 self._iter = None 37 37 self._sticky_filter = False 38 38 self._for_write = False 39 self._prefetch_related = set() 39 40 40 41 ######################## 41 42 # PYTHON MAGIC METHODS # … … 81 82 self._result_cache = list(self.iterator()) 82 83 elif self._iter: 83 84 self._result_cache.extend(self._iter) 85 if self._prefetch_related: 86 self._prefetch_related_objects() 84 87 return len(self._result_cache) 85 88 86 89 def __iter__(self): 90 if self._prefetch_related: 91 # We need all the results in order to be able to do the prefetch 92 # in one go. To minimize code duplication, we use the __len__ 93 # code path which also forces this, and also does the prefetch 94 len(self) 95 87 96 if self._result_cache is None: 88 97 self._iter = self.iterator() 89 98 self._result_cache = [] … … 106 115 self._fill_cache() 107 116 108 117 def __nonzero__(self): 118 if self._prefetch_related: 119 # We need all the results in order to be able to do the prefetch 120 # in one go. To minimize code duplication, we use the __len__ 121 # code path which also forces this, and also does the prefetch 122 len(self) 123 109 124 if self._result_cache is not None: 110 125 return bool(self._result_cache) 111 126 try: … … 526 541 return self.query.has_results(using=self.db) 527 542 return bool(self._result_cache) 528 543 544 def _prefetch_related_objects(self): 545 from django.db import connections 546 # Here we access some internals of how related managers work in order to 547 # fix a common efficiency problem with M2M and M2O lookups. 548 549 # This method can only be called once the result cache has been filled. 550 obj = None 551 for obj in self._result_cache: 552 if not hasattr(obj, '_prefetched_objects_cache'): 553 obj._prefetched_objects_cache = {} 554 555 if obj is None: 556 return # nothing to do 557 558 559 for attname in self._prefetch_related: 560 field, model, direct, m2m = self.model._meta.get_field_by_name(attname) 561 562 # A new related manager is created every time the attribute is 563 # accessed, so we can alter the following relmanager with safety 564 relmanager = getattr(obj, attname) 565 566 if m2m: 567 # Use a modified RelatedManager to build the query 568 569 # Collect values to filter on 570 pks = [obj._get_pk_val() for obj in self._result_cache] 571 572 if direct: 573 field_name = field.related_query_name() 574 else: 575 field_name = field.field.name 576 relmanager.core_filters = {'%s__pk__in' % field_name: pks} 577 578 # M2M: need to annotate the query in order to get the PK of the 579 # primary model that the secondary model was actually related 580 # to. For ForeignKeys, this information is in the secondary 581 # table already. 582 583 # We know that there will already be a join on the join table, 584 # so we can just add the select. 585 join_table = relmanager.through._meta.db_table 586 pk_col = "%s_id" % relmanager.source_field_name 587 connection = connections[self.db] 588 qn = connection.ops.quote_name 589 qs = relmanager.extra(select={'_prefetch_related_pk':'%s.%s' % (qn(join_table), qn(pk_col))}) 590 591 # Execute query 592 all_related_objects = list(qs) 593 594 # Now decorate _result_cache with relevant prefetched data 595 for obj in self._result_cache: 596 qs = getattr(obj, attname).all() 597 pk_val = obj._get_pk_val() 598 qs._result_cache = [rel_obj for rel_obj in all_related_objects 599 if rel_obj._prefetch_related_pk == pk_val] 600 obj._prefetched_objects_cache[attname] = qs 601 602 else: 603 # Reverse ForeignKey 604 605 # Use a modified RelatedManager to build the query 606 rel_field = field.field 607 rel_attname = rel_field.rel.get_related_field().name 608 rel_vals = [getattr(obj, rel_attname) for obj in self._result_cache] 609 relmanager.core_filters = {'%s__%s__in' % (rel_field.name, 610 rel_attname): 611 rel_vals} 612 613 # Execute query 614 all_related_objects = list(relmanager.all()) 615 616 # Now decorate _result_cache with relevant prefetched data 617 for obj in self._result_cache: 618 qs = getattr(obj, attname).all() 619 field_val = getattr(obj, rel_attname) 620 other_attname = rel_field.get_attname() 621 qs._result_cache = [rel_obj for rel_obj in all_related_objects 622 if getattr(rel_obj, other_attname) == field_val] 623 obj._prefetched_objects_cache[attname] = qs 624 625 529 626 ################################################## 530 627 # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS # 531 628 ################################################## … … 649 746 obj.query.max_depth = depth 650 747 return obj 651 748 749 def prefetch_related(self, *fields): 750 """ 751 Returns a new QuerySet instance that will prefetch Many-To-One 752 and Many-To-Many related objects when the QuerySet is evaluated. 753 754 The fields specified must be attributes that return a RelatedManager of 755 some kind when used on instances of the evaluated QuerySet. 756 757 These RelatedManagers will be modified so that their 'all()' method will 758 return a QuerySet whose cache is already filled with objects that were 759 looked up in a single batch, rather than one query per object in the 760 current QuerySet. 761 762 When prefetch_related() is called more than once, the list of fields to 763 prefetch is added to. Call prefetch_related() with no arguments to 764 clears the list. 765 """ 766 if fields: 767 new_fields = self._prefetch_related.union(set(fields)) 768 else: 769 new_fields = set() 770 return self._clone(_prefetch_related=new_fields) 771 652 772 def dup_select_related(self, other): 653 773 """ 654 774 Copies the related selection status from the QuerySet 'other' to the … … 798 918 query.filter_is_sticky = True 799 919 c = klass(model=self.model, query=query, using=self._db) 800 920 c._for_write = self._for_write 921 c._prefetch_related = self._prefetch_related 801 922 c.__dict__.update(kwargs) 802 923 if setup and hasattr(c, '_setup_query'): 803 924 c._setup_query() … … 941 1062 c.aggregate_names = self.aggregate_names 942 1063 if setup and hasattr(c, '_setup_query'): 943 1064 c._setup_query() 1065 # prefetch_related doesn't make sense for a ValuesQuerySet 1066 c._prefetch_related = None 1067 944 1068 return c 945 1069 946 1070 def _merge_sanity_check(self, other): … … 1051 1175 c._kind = self._kind 1052 1176 if setup and hasattr(c, '_setup_query'): 1053 1177 c._setup_query() 1178 # prefetch_related doesn't make sense for a DateQuerySet 1179 self._prefetch_related = None 1054 1180 return c 1055 1181 1056 1182 -
docs/ref/models/querysets.txt
diff -r 41991155a39e docs/ref/models/querysets.txt
a b 690 690 A :class:`~django.db.models.OneToOneField` is not traversed in the reverse 691 691 direction if you are performing a depth-based ``select_related()`` call. 692 692 693 prefetch_related 694 ~~~~~~~~~~~~~~~~ 695 696 .. method:: prefetch_related(*fields) 697 698 .. versionadded:: 1.4 699 700 Returns a ``QuerySet`` that will automatically retrieve, in a single batch, 701 related many-to-many and many-to-one objects for the specified fields. 702 703 This is similar to ``select_related`` for 'many related objects' case, with the 704 following important differences: 705 706 * ``prefetch_related`` issues a separate query to get the related objects. 707 This is done as soon as the QuerySet begins to be evaluated. 708 709 * It is 'single depth' only, and doesn't support join syntax. 710 711 The fields that must be supplied to this method can be any attributes on the 712 model instances which represent related queries that return multiple 713 objects. This includes attributes representing the 'many' side of ``ForeignKey`` 714 relationships and ``ManyToManyField`` attributes. 715 716 For example, suppose you have these models:: 717 718 class Topping(models.Model): 719 name = models.CharField(max_length=30) 720 721 class Pizza(models.Model): 722 name = models.CharField(max_length=50) 723 toppings = models.ManyToManyField(Topping) 724 725 def __unicode__(self): 726 return u"%s (%s)" % (self.name, u", ".join([topping.name 727 for topping in self.toppings.all()])) 728 729 and run this code:: 730 731 >>> Pizza.objects.all() 732 [u"Hawaiian (ham, pineaapple)", u"Seafood (prawns, smoked salmon)"... 733 734 The problem with this code is that it will run a query on the Toppings table for 735 **every** item in the Pizza ``QuerySet``. Using ``prefetch_related``, this can 736 be reduced to two: 737 738 >>> pizzas = Pizza.objects.all().prefetch_related('toppings') 739 740 All the relevant toppings will be fetched in a single query, and used to make a 741 ``QuerySet`` that has a pre-filled cache of the relevant results. This 742 ``QuerySet`` is then used in the ``self.toppings.all()`` call. 743 744 Please note the following: 745 746 * use of ``prefetch_related`` will mean that it **always** runs the two 747 queries - even if you never use the related objects - and it always 748 fully populates the result cache on the primary ``QuerySet``. 749 750 * **Only** the 'all()' method of the related manager is affected. If you want to 751 do additional filtering of the related objects using ``filter``, for example, 752 this will create a new ``QuerySet``, ignoring the pre-filled ``QuerySet``, and 753 will do that filtering in the database as always. 754 755 Therefore you should only use ``prefetch_related`` when you are sure that the 756 related objects are needed (or will be needed in the normal case), since you are 757 adding overhead of issuing an additional database query creating (many) model 758 instances. Where appropriate you could use this in a manager or default manager. 759 760 Chaining ``prefetch_related`` will accumulate the fields that should have this 761 behaviour applied. To clear any ``prefetch_related`` behaviour, call the method 762 with no arguments. 763 764 693 765 extra 694 766 ~~~~~ 695 767 -
new file tests/modeltests/prefetch_related/models.py
diff -r 41991155a39e tests/modeltests/prefetch_related/models.py
- + 1 from django.db import models 2 3 4 class Author(models.Model): 5 name = models.CharField(max_length=50) 6 first_book = models.ForeignKey('Book', related_name='first_time_authors') 7 8 9 class Book(models.Model): 10 title = models.CharField(max_length=255) 11 12 authors = models.ManyToManyField(Author, related_name='books') 13 -
new file tests/modeltests/prefetch_related/tests.py
diff -r 41991155a39e tests/modeltests/prefetch_related/tests.py
- + 1 from django.test import TestCase 2 3 from models import Author, Book 4 5 6 class PrefetchRelatedTests(TestCase): 7 8 def setUp(self): 9 10 self.book1 = Book.objects.create(title="Poems") 11 self.book2 = Book.objects.create(title="Jane Eyre") 12 self.book3 = Book.objects.create(title="Wuthering Heights") 13 14 self.author1 = Author.objects.create(name="Charlotte", 15 first_book=self.book1) 16 self.author2 = Author.objects.create(name="Anne", 17 first_book=self.book1) 18 self.author3 = Author.objects.create(name="Emily", 19 first_book=self.book1) 20 21 self.book1.authors.add(self.author1) 22 self.book1.authors.add(self.author2) 23 self.book1.authors.add(self.author3) 24 self.book2.authors.add(self.author1) 25 self.book3.authors.add(self.author3) 26 27 def test_m2m_forward(self): 28 with self.assertNumQueries(2): 29 lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')] 30 31 normal_lists = [list(b.authors.all()) for b in Book.objects.all()] 32 self.assertEqual(lists, normal_lists) 33 34 35 def test_m2m_reverse(self): 36 with self.assertNumQueries(2): 37 lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')] 38 39 normal_lists = [list(a.books.all()) for a in Author.objects.all()] 40 self.assertEqual(lists, normal_lists) 41 42 def test_foreignkey_reverse(self): 43 with self.assertNumQueries(2): 44 lists = [list(b.first_time_authors.all()) 45 for b in Book.objects.prefetch_related('first_time_authors')] 46 47 def test_survives_clone(self): 48 with self.assertNumQueries(2): 49 lists = [list(b.first_time_authors.all()) 50 for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)] 51 52 def test_len(self): 53 with self.assertNumQueries(2): 54 qs = Book.objects.prefetch_related('first_time_authors') 55 length = len(qs) 56 lists = [list(b.first_time_authors.all()) 57 for b in qs] 58 59 def test_bool(self): 60 with self.assertNumQueries(2): 61 qs = Book.objects.prefetch_related('first_time_authors') 62 x = bool(qs) 63 lists = [list(b.first_time_authors.all()) 64 for b in qs] 65 66 def test_clear(self): 67 with self.assertNumQueries(4): 68 with_prefetch = Author.objects.prefetch_related('books') 69 without_prefetch = with_prefetch.prefetch_related() 70 lists = [list(a.books.all()) for a in without_prefetch]