Ticket #16937: prefetch_1.3.1.diff
File prefetch_1.3.1.diff, 23.5 KB (added by , 13 years ago) |
---|
-
django/db/models/fields/related.py
diff -rupN Django-1.3.1/django/db/models/fields/related.py Django-1.3.1_prefetch/django/db/models/fields/related.py
old new class ForeignRelatedObjectsDescriptor(ob 422 422 db = self._db or router.db_for_read(rel_model, instance=instance) 423 423 return superclass.get_query_set(self).using(db).filter(**(self.core_filters)) 424 424 425 def get_prefetch_query_set(self, instances): 426 """ 427 Return a queryset that does the bulk lookup needed 428 by prefetch_related functionality. 429 """ 430 if not instances: 431 return self.model.objects.none() 432 433 db = self._db or router.db_for_read(self.model, instance=instances[0]) 434 query = {'%s__%s__in' % (rel_field.name, attname): 435 [getattr(obj, attname) for obj in instances]} 436 return super(RelatedManager, self).get_query_set().using(db).filter(**query) 437 438 def select_matching_instances(self, obj, related_objects): 439 field_val = getattr(obj, attname) 440 other_attname = rel_field.get_attname() 441 return [rel_obj for rel_obj in related_objects 442 if getattr(rel_obj, other_attname) == field_val] 443 444 def all(self): 445 try: 446 return self.instance._prefetched_objects_cache[rel_field.related_query_name()] 447 except (AttributeError, KeyError): 448 return super(RelatedManager, self).all() 449 425 450 def add(self, *objs): 426 451 for obj in objs: 427 452 if not isinstance(obj, self.model): … … def create_many_related_manager(supercla 476 501 and adds behavior for many-to-many related objects.""" 477 502 through = rel.through 478 503 class ManyRelatedManager(superclass): 479 def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None,480 join_table=None, source_field_name=None, target_field_name=None,481 reverse=False):504 def __init__(self, model=None, query_field_name=None, instance=None, symmetrical=None, 505 source_field_name=None, target_field_name=None, reverse=False, 506 join_table=None, prefetch_cache_name=None): 482 507 super(ManyRelatedManager, self).__init__() 483 self.core_filters = core_filters 508 self.query_field_name = query_field_name 509 self.core_filters = {'%s__pk' % query_field_name: instance._get_pk_val()} 484 510 self.model = model 485 511 self.symmetrical = symmetrical 486 512 self.instance = instance … … def create_many_related_manager(supercla 489 515 self.through = through 490 516 self._pk_val = self.instance.pk 491 517 self.reverse = reverse 518 self.prefetch_cache_name = prefetch_cache_name 492 519 if self._pk_val is None: 493 520 raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__) 494 521 … … def create_many_related_manager(supercla 496 523 db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance) 497 524 return superclass.get_query_set(self).using(db)._next_is_sticky().filter(**(self.core_filters)) 498 525 526 def get_prefetch_query_set(self, instances): 527 if not instances: 528 return self.model.objects.none() 529 530 from django.db import connections 531 532 db = self._db or router.db_for_read(self.model, instance=instances[0]) 533 query = {'%s__pk__in' % self.query_field_name: 534 [obj._get_pk_val() for obj in instances]} 535 qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query) 536 537 # M2M: need to annotate the query in order to get the PK of the 538 # primary model that the secondary model was actually related to. 539 540 # We know that there will already be a join on the join table, so we 541 # can just add the select. 542 join_table = self.through._meta.db_table 543 pk_col = "%s_id" % self.source_field_name 544 connection = connections[db] 545 qn = connection.ops.quote_name 546 qs = qs.extra(select={'_prefetch_related_pk': 547 '%s.%s' % (qn(join_table), qn(pk_col))}) 548 return qs 549 550 def select_matching_instances(self, obj, related_objects): 551 pk_val = obj._get_pk_val() 552 return [rel_obj for rel_obj in related_objects 553 if rel_obj._prefetch_related_pk == pk_val] 554 555 def all(self): 556 try: 557 return self.instance._prefetched_objects_cache[self.prefetch_cache_name] 558 except (AttributeError, KeyError): 559 return super(ManyRelatedManager, self).all() 560 499 561 # If the ManyToMany relation has an intermediary model, 500 562 # the add and remove methods do not exist. 501 563 if rel.through._meta.auto_created: … … class ManyRelatedObjectsDescriptor(objec 671 733 672 734 manager = RelatedManager( 673 735 model=rel_model, 674 core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()}, 736 query_field_name=self.related.field.name, 737 prefetch_cache_name=self.related.field.related_query_name(), 675 738 instance=instance, 676 739 symmetrical=False, 677 740 source_field_name=self.related.field.m2m_reverse_field_name(), … … class ReverseManyRelatedObjectsDescripto 723 786 724 787 manager = RelatedManager( 725 788 model=rel_model, 726 core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()}, 789 query_field_name=self.field.related_query_name(), 790 prefetch_cache_name=self.field.name, 727 791 instance=instance, 728 792 symmetrical=self.field.rel.symmetrical, 729 793 source_field_name=self.field.m2m_field_name(), -
django/db/models/manager.py
diff -rupN Django-1.3.1/django/db/models/manager.py Django-1.3.1_prefetch/django/db/models/manager.py
old new class Manager(object): 167 167 def select_related(self, *args, **kwargs): 168 168 return self.get_query_set().select_related(*args, **kwargs) 169 169 170 def prefetch_related(self, *args, **kwargs): 171 return self.get_query_set().prefetch_related(*args, **kwargs) 172 170 173 def values(self, *args, **kwargs): 171 174 return self.get_query_set().values(*args, **kwargs) 172 175 -
django/db/models/query.py
diff -rupN Django-1.3.1/django/db/models/query.py Django-1.3.1_prefetch/django/db/models/query.py
old new class QuerySet(object): 37 37 self._iter = None 38 38 self._sticky_filter = False 39 39 self._for_write = False 40 self._prefetch_related = set() 41 self._prefetch_done = False 40 42 41 43 ######################## 42 44 # PYTHON MAGIC METHODS # … … class QuerySet(object): 82 84 self._result_cache = list(self.iterator()) 83 85 elif self._iter: 84 86 self._result_cache.extend(self._iter) 87 if self._prefetch_related and not self._prefetch_done: 88 self._prefetch_related_objects() 85 89 return len(self._result_cache) 86 90 87 91 def __iter__(self): 92 if self._prefetch_related: 93 # We need all the results in order to be able to do the prefetch 94 # in one go. To minimize code duplication, we use the __len__ 95 # code path which also forces this, and also does the prefetch 96 len(self) 97 88 98 if self._result_cache is None: 89 99 self._iter = self.iterator() 90 100 self._result_cache = [] … … class QuerySet(object): 107 117 self._fill_cache() 108 118 109 119 def __nonzero__(self): 120 if self._prefetch_related: 121 # We need all the results in order to be able to do the prefetch 122 # in one go. To minimize code duplication, we use the __len__ 123 # code path which also forces this, and also does the prefetch 124 len(self) 125 110 126 if self._result_cache is not None: 111 127 return bool(self._result_cache) 112 128 try: … … class QuerySet(object): 496 512 return self.query.has_results(using=self.db) 497 513 return bool(self._result_cache) 498 514 515 def _prefetch_related_objects(self): 516 # This method can only be called once the result cache has been filled. 517 prefetch_related_objects(self._result_cache, self._prefetch_related) 518 self._prefetch_done = True 519 499 520 ################################################## 500 521 # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS # 501 522 ################################################## … … class QuerySet(object): 607 628 obj.query.max_depth = depth 608 629 return obj 609 630 631 def prefetch_related(self, *fields): 632 """ 633 Returns a new QuerySet instance that will prefetch Many-To-One 634 and Many-To-Many related objects when the QuerySet is evaluated. 635 636 The fields specified must be attributes that return a RelatedManager of 637 some kind when used on instances of the evaluated QuerySet. 638 639 These RelatedManagers will be modified so that their 'all()' method will 640 return a QuerySet whose cache is already filled with objects that were 641 looked up in a single batch, rather than one query per object in the 642 current QuerySet. 643 644 When prefetch_related() is called more than once, the list of fields to 645 prefetch is added to. Call prefetch_related() with no arguments to 646 clears the list. 647 """ 648 if fields: 649 new_fields = self._prefetch_related.union(set(fields)) 650 else: 651 new_fields = set() 652 return self._clone(_prefetch_related=new_fields) 653 610 654 def dup_select_related(self, other): 611 655 """ 612 656 Copies the related selection status from the QuerySet 'other' to the … … class QuerySet(object): 756 800 query.filter_is_sticky = True 757 801 c = klass(model=self.model, query=query, using=self._db) 758 802 c._for_write = self._for_write 803 c._prefetch_related = self._prefetch_related 759 804 c.__dict__.update(kwargs) 760 805 if setup and hasattr(c, '_setup_query'): 761 806 c._setup_query() … … def insert_query(model, values, return_i 1434 1479 query = sql.InsertQuery(model) 1435 1480 query.insert_values(values, raw_values) 1436 1481 return query.get_compiler(using=using).execute_sql(return_id) 1482 1483 1484 def prefetch_related_objects(result_cache, fields): 1485 """ 1486 Populates prefetched objects caches for a list of results 1487 from a QuerySet 1488 """ 1489 from django.db.models.sql.constants import LOOKUP_SEP 1490 1491 if len(result_cache) == 0: 1492 return # nothing to do 1493 1494 model = result_cache[0].__class__ 1495 1496 # We need to be able to dynamically add to the list of prefetch_related 1497 # fields that we look up (see below). So we need some book keeping to 1498 # ensure we don't do duplicate work. 1499 done_fields = set() # list of fields like foo__bar__baz 1500 done_lookups = {} # dictionary of things like 'foo__bar': [results] 1501 fields = list(fields) 1502 1503 # We may expand fields, so need a loop that allows for that 1504 i = 0 1505 while i < len(fields): 1506 # 'field' can span several relationships, and so represent multiple 1507 # lookups. 1508 field = fields[i] 1509 1510 if field in done_fields: 1511 # We've done exactly this already, skip the whole thing 1512 i += 1 1513 continue 1514 done_fields.add(field) 1515 1516 # Top level, the list of objects to decorate is the the result cache 1517 # from the primary QuerySet. It won't be for deeper levels. 1518 obj_list = result_cache 1519 1520 attrs = field.split(LOOKUP_SEP) 1521 for level, attr in enumerate(attrs): 1522 # Prepare main instances 1523 if len(obj_list) == 0: 1524 break 1525 1526 good_objects = True 1527 for obj in obj_list: 1528 if not hasattr(obj, '_prefetched_objects_cache'): 1529 try: 1530 obj._prefetched_objects_cache = {} 1531 except AttributeError: 1532 # Must be in a QuerySet subclass that is not returning 1533 # Model instances, either in Django or 3rd 1534 # party. prefetch_related() doesn't make sense, so quit 1535 # now. 1536 good_objects = False 1537 break 1538 if not good_objects: 1539 break 1540 1541 # Descend down tree 1542 try: 1543 rel_obj = getattr(obj_list[0], attr) 1544 except AttributeError: 1545 raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid " 1546 "parameter to prefetch_related()" % 1547 (attr, obj_list[0].__class__.__name__, field)) 1548 1549 can_prefetch = hasattr(rel_obj, 'get_prefetch_query_set') 1550 if level == len(attrs) - 1 and not can_prefetch: 1551 # Last one, this *must* resolve to a related manager. 1552 raise ValueError("'%s' does not resolve to a supported 'many related" 1553 " manager' for model %s - this is an invalid" 1554 " parameter to prefetch_related()." 1555 % (field, model.__name__)) 1556 1557 if can_prefetch: 1558 # Check we didn't do this already 1559 lookup = LOOKUP_SEP.join(attrs[0:level+1]) 1560 if lookup in done_lookups: 1561 obj_list = done_lookups[lookup] 1562 else: 1563 relmanager = rel_obj 1564 obj_list, additional_prf = _prefetch_one_level(obj_list, relmanager, attr) 1565 for f in additional_prf: 1566 new_prf = LOOKUP_SEP.join([lookup, f]) 1567 fields.append(new_prf) 1568 done_lookups[lookup] = obj_list 1569 else: 1570 # Assume we've got some singly related object. We replace 1571 # the current list of parent objects with that list. 1572 obj_list = [getattr(obj, attr) for obj in obj_list] 1573 1574 i += 1 1575 1576 1577 def _prefetch_one_level(instances, relmanager, attname): 1578 """ 1579 Runs prefetches on all instances using the manager relmanager, 1580 assigning results to queryset against instance.attname. 1581 1582 The prefetched objects are returned, along with any additional 1583 prefetches that must be done due to prefetch_related fields 1584 found from default managers. 1585 """ 1586 mainqs = relmanager.get_prefetch_query_set(instances) 1587 # We have to handle the possibility that the default manager itself added 1588 # prefetch_related fields to the QuerySet we just got back. We don't want to 1589 # trigger the prefetch_related functionality by evaluating the query. 1590 # Rather, we need to merge in the prefetch_related fields. 1591 additional_prf = list(getattr(mainqs, '_prefetch_related', [])) 1592 if additional_prf: 1593 mainqs = mainqs.prefetch_related() 1594 all_related_objects = list(mainqs) 1595 for obj in instances: 1596 qs = getattr(obj, attname).all() 1597 qs._result_cache = relmanager.select_matching_instances(obj, all_related_objects) 1598 # We don't want the individual qs doing prefetch_related now, since we 1599 # have merged this into the current work. 1600 qs._prefetch_done = True 1601 obj._prefetched_objects_cache[attname] = qs 1602 return all_related_objects, additional_prf -
docs/ref/models/querysets.txt
diff -rupN Django-1.3.1/docs/ref/models/querysets.txt Django-1.3.1_prefetch/docs/ref/models/querysets.txt
old new related object. 689 689 ``OneToOneFields`` will not be traversed in the reverse direction if you 690 690 are performing a depth-based ``select_related``. 691 691 692 prefetch_related 693 ~~~~~~~~~~~~~~~~ 694 695 .. method:: prefetch_related(*fields) 696 697 .. versionadded:: 1.4 698 699 Returns a ``QuerySet`` that will automatically retrieve, in a single batch, 700 related many-to-many and many-to-one objects for the specified fields. 701 702 This is similar to ``select_related`` for the 'many related objects' case, with 703 the following important differences: 704 705 * ``prefetch_related`` causes a separate query to be issued for each set of 706 related objects that you request. This query is done as soon as the QuerySet 707 begins to be evaluated. 708 709 This is in contrast to ``select_related``, which modifies the original query 710 with joins in order to get the related objects in the same query as the main 711 objects. 712 713 * It is 'single depth' only, and doesn't support join syntax. 714 715 The fields that must be supplied to this method can be any attributes on the 716 model instances which represent related queries that return multiple 717 objects. This includes attributes representing the 'many' side of ``ForeignKey`` 718 relationships and ``ManyToManyField`` attributes. 719 720 For example, suppose you have these models:: 721 722 class Topping(models.Model): 723 name = models.CharField(max_length=30) 724 725 class Pizza(models.Model): 726 name = models.CharField(max_length=50) 727 toppings = models.ManyToManyField(Topping) 728 729 def __unicode__(self): 730 return u"%s (%s)" % (self.name, u", ".join([topping.name 731 for topping in self.toppings.all()])) 732 733 and run this code:: 734 735 >>> Pizza.objects.all() 736 [u"Hawaiian (ham, pineaapple)", u"Seafood (prawns, smoked salmon)"... 737 738 The problem with this code is that it will run a query on the Toppings table for 739 **every** item in the Pizza ``QuerySet``. Using ``prefetch_related``, this can 740 be reduced to two: 741 742 >>> pizzas = Pizza.objects.all().prefetch_related('toppings') 743 744 All the relevant toppings will be fetched in a single query, and used to make 745 ``QuerySets`` that have a pre-filled cache of the relevant results. These 746 ``QuerySets`` are then used in the ``self.toppings.all()`` calls. 747 748 Please note that use of ``prefetch_related`` will mean that the additional 749 queries run will **always** be executed - even if you never use the related 750 objects - and it always fully populates the result cache on the primary 751 ``QuerySet`` (which can sometimes be avoided in other cases). 752 753 Remember that, as always with QuerySets, any subsequent chained methods will 754 ignore previously cached results, and retrieve data in a fresh database 755 query. So, if you write the following: 756 757 >>> pizzas = Pizza.objects.prefetch_related('toppings') 758 >>> [list(pizza.topppings.filter(spicy=True) for pizza in pizzas] 759 760 ...then the fact that `pizza.toppings.all()` has been prefetched will not help 761 you - in fact it hurts preformance, since you have done a database query that 762 you haven't used. So use this feature with caution! 763 764 Chaining ``prefetch_related`` calls will accumulate the fields that should have 765 this behaviour applied. To clear any ``prefetch_related`` behaviour, call the 766 method with no arguments. 767 768 692 769 extra 693 770 ~~~~~ 694 771 -
tests/modeltests/prefetch_related/models.py
diff -rupN Django-1.3.1/tests/modeltests/prefetch_related/models.py Django-1.3.1_prefetch/tests/modeltests/prefetch_related/models.py
old new 1 from django.db import models 2 3 4 class Author(models.Model): 5 name = models.CharField(max_length=50) 6 first_book = models.ForeignKey('Book', related_name='first_time_authors') 7 8 def __unicode__(self): 9 return self.name 10 11 class Book(models.Model): 12 title = models.CharField(max_length=255) 13 14 authors = models.ManyToManyField(Author, related_name='books') 15 16 def __unicode__(self): 17 return self.title -
tests/modeltests/prefetch_related/tests.py
diff -rupN Django-1.3.1/tests/modeltests/prefetch_related/tests.py Django-1.3.1_prefetch/tests/modeltests/prefetch_related/tests.py
old new 1 from django.test import TestCase 2 3 from models import Author, Book 4 5 6 class PrefetchRelatedTests(TestCase): 7 8 def setUp(self): 9 10 self.book1 = Book.objects.create(title="Poems") 11 self.book2 = Book.objects.create(title="Jane Eyre") 12 self.book3 = Book.objects.create(title="Wuthering Heights") 13 14 self.author1 = Author.objects.create(name="Charlotte", 15 first_book=self.book1) 16 self.author2 = Author.objects.create(name="Anne", 17 first_book=self.book1) 18 self.author3 = Author.objects.create(name="Emily", 19 first_book=self.book1) 20 21 self.book1.authors.add(self.author1) 22 self.book1.authors.add(self.author2) 23 self.book1.authors.add(self.author3) 24 self.book2.authors.add(self.author1) 25 self.book3.authors.add(self.author3) 26 27 def test_m2m_forward(self): 28 with self.assertNumQueries(2): 29 lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')] 30 31 normal_lists = [list(b.authors.all()) for b in Book.objects.all()] 32 self.assertEqual(lists, normal_lists) 33 34 35 def test_m2m_reverse(self): 36 with self.assertNumQueries(2): 37 lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')] 38 39 normal_lists = [list(a.books.all()) for a in Author.objects.all()] 40 self.assertEqual(lists, normal_lists) 41 42 def test_foreignkey_reverse(self): 43 with self.assertNumQueries(2): 44 lists = [list(b.first_time_authors.all()) 45 for b in Book.objects.prefetch_related('first_time_authors')] 46 47 self.assertQuerysetEqual(self.book2.authors.all(), [u"<Author: Charlotte>"]) 48 49 def test_survives_clone(self): 50 with self.assertNumQueries(2): 51 lists = [list(b.first_time_authors.all()) 52 for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)] 53 54 def test_len(self): 55 with self.assertNumQueries(2): 56 qs = Book.objects.prefetch_related('first_time_authors') 57 length = len(qs) 58 lists = [list(b.first_time_authors.all()) 59 for b in qs] 60 61 def test_bool(self): 62 with self.assertNumQueries(2): 63 qs = Book.objects.prefetch_related('first_time_authors') 64 x = bool(qs) 65 lists = [list(b.first_time_authors.all()) 66 for b in qs] 67 68 def test_clear(self): 69 with self.assertNumQueries(4): 70 with_prefetch = Author.objects.prefetch_related('books') 71 without_prefetch = with_prefetch.prefetch_related() 72 lists = [list(a.books.all()) for a in without_prefetch]