Ticket #17001: #17001-prefetch_extensions.diff
File #17001-prefetch_extensions.diff, 25.5 KB (added by , 12 years ago) |
---|
-
django/contrib/contenttypes/generic.py
class GenericForeignKey(object): 62 62 # This should never happen. I love comments like this, don't you? 63 63 raise Exception("Impossible arguments to GFK.get_content_type!") 64 64 65 def get_prefetch_query_set(self, instances): 65 def get_prefetch_query_set(self, instances, custom_qs=None): 66 if custom_qs is not None: 67 raise ValueError("Custom queryset can't be used for this lookup") 66 68 # For efficiency, group the instances by content type and then do one 67 69 # query per model 68 70 fk_dict = defaultdict(set) … … def create_generic_related_manager(superclass): 320 322 db = self._db or router.db_for_read(self.model, instance=self.instance) 321 323 return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters) 322 324 323 def get_prefetch_query_set(self, instances): 324 db = self._db or router.db_for_read(self.model, instance=instances[0]) 325 def get_prefetch_query_set(self, instances, custom_qs=None): 326 if not instances: 327 return self.model._default_manager.none() 325 328 query = { 326 329 '%s__pk' % self.content_type_field_name: self.content_type.id, 327 330 '%s__in' % self.object_id_field_name: 328 331 set(obj._get_pk_val() for obj in instances) 329 } 330 qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query) 332 } 333 if custom_qs is not None: 334 qs = custom_qs.filter(**query) 335 else: 336 db = self._db or router.db_for_read(self.model, instance=instances[0]) 337 qs = super(GenericRelatedObjectManager, self).get_query_set()\ 338 .using(db).filter(**query) 331 339 return (qs, 332 340 attrgetter(self.object_id_field_name), 333 341 lambda obj: obj._get_pk_val(), 334 342 False, 335 343 self.prefetch_cache_name) 336 344 345 346 def all(self): 347 try: 348 return self.instance._prefetched_objects_cache[self.prefetch_cache_name] 349 except (AttributeError, KeyError): 350 return super(GenericRelatedObjectManager, self).all() 351 352 337 353 def add(self, *objs): 338 354 for obj in objs: 339 355 if not isinstance(obj, self.model): -
django/db/models/__init__.py
from django.db import connection 4 4 from django.db.models.loading import get_apps, get_app, get_models, get_model, register_models 5 5 from django.db.models.query import Q 6 6 from django.db.models.expressions import F 7 from django.db.models.related import R 7 8 from django.db.models.manager import Manager 8 9 from django.db.models.base import Model 9 10 from django.db.models.aggregates import * 10 11 from django.db.models.fields import * 11 12 from django.db.models.fields.subclassing import SubfieldBase 12 13 from django.db.models.fields.files import FileField, ImageField 13 from django.db.models.fields.related import ForeignKey, OneToOneField, ManyToManyField, ManyToOneRel, ManyToManyRel, OneToOneRel 14 from django.db.models.deletion import CASCADE, PROTECT, SET, SET_NULL, SET_DEFAULT, DO_NOTHING, ProtectedError 14 from django.db.models.fields.related import (ForeignKey, OneToOneField, 15 ManyToManyField, ManyToOneRel, ManyToManyRel, OneToOneRel) 16 from django.db.models.deletion import (CASCADE, PROTECT, SET, SET_NULL, 17 SET_DEFAULT, DO_NOTHING, ProtectedError) 15 18 from django.db.models import signals 16 19 from django.utils.decorators import wraps 17 20 -
django/db/models/fields/related.py
class SingleRelatedObjectDescriptor(object): 236 236 db = router.db_for_read(self.related.model, **db_hints) 237 237 return self.related.model._base_manager.using(db) 238 238 239 def get_prefetch_query_set(self, instances): 239 def get_prefetch_query_set(self, instances, custom_qs=None): 240 if custom_qs is not None: 241 # TODO: This error message is too SQLish, and might be downright 242 # wrong. 243 raise ValueError( 244 "Custom querysets can't be used for one-to-one relations") 245 240 246 vals = set(instance._get_pk_val() for instance in instances) 241 247 params = {'%s__pk__in' % self.related.field.name: vals} 242 248 return (self.get_query_set(), … … class ReverseSingleRelatedObjectDescriptor(object): 315 321 else: 316 322 return QuerySet(self.field.rel.to).using(db) 317 323 318 def get_prefetch_query_set(self, instances): 324 def get_prefetch_query_set(self, instances, custom_qs=None): 325 if custom_qs is not None: 326 # TODO: This error message is too SQLish, and I am not even sure 327 # this desriptor is used for m2o... 328 raise ValueError( 329 "Custom querysets can't be used for many-to-one relations") 330 319 331 vals = set(getattr(instance, self.field.attname) for instance in instances) 320 332 other_field = self.field.rel.get_related_field() 321 333 if other_field.rel: … … class ForeignRelatedObjectsDescriptor(object): 460 472 db = self._db or router.db_for_read(self.model, instance=self.instance) 461 473 return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters) 462 474 463 def get_prefetch_query_set(self, instances): 464 db = self._db or router.db_for_read(self.model, instance=instances[0]) 475 def get_prefetch_query_set(self, instances, custom_qs=None): 476 """ 477 Return a queryset that does the bulk lookup needed 478 by prefetch_related functionality. 479 """ 465 480 query = {'%s__%s__in' % (rel_field.name, attname): 466 set(getattr(obj, attname) for obj in instances)} 467 qs = super(RelatedManager, self).get_query_set().using(db).filter(**query) 481 set(getattr(obj, attname) for obj in instances)} 482 if custom_qs is not None: 483 qs = custom_qs.filter(**query) 484 else: 485 db = self._db or router.db_for_read(self.model, instance=instances[0]) 486 qs = super(RelatedManager, self).get_query_set().\ 487 using(db).filter(**query) 468 488 return (qs, 469 489 attrgetter(rel_field.get_attname()), 470 490 attrgetter(attname), 471 491 False, 472 492 rel_field.related_query_name()) 473 493 494 def all(self): 495 try: 496 return self.instance._prefetched_objects_cache[rel_field.related_query_name()] 497 except (AttributeError, KeyError): 498 return super(RelatedManager, self).all() 499 474 500 def add(self, *objs): 475 501 for obj in objs: 476 502 if not isinstance(obj, self.model): … … def create_many_related_manager(superclass, rel): 542 568 db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance) 543 569 return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters) 544 570 545 def get_prefetch_query_set(self, instances ):571 def get_prefetch_query_set(self, instances, custom_qs=None): 546 572 instance = instances[0] 547 573 from django.db import connections 548 574 db = self._db or router.db_for_read(instance.__class__, instance=instance) 549 query = {'%s__pk__in' % self.query_field_name: 550 set(obj._get_pk_val() for obj in instances)} 551 qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query) 575 query = {'%s__pk__in' % self.query_field_name: 576 set(obj._get_pk_val() for obj in instances)} 577 578 if custom_qs is not None: 579 qs = custom_qs._next_is_sticky().filter(**query) 580 else: 581 qs = (super(ManyRelatedManager, self).get_query_set().using(db) 582 ._next_is_sticky().filter(**query)) 552 583 553 584 # M2M: need to annotate the query in order to get the primary model 554 # that the secondary model was actually related to. We know that 555 # there will already be a join on the join table, so we can just add 556 # the select. 585 # that the secondary model was actually related to. 586 587 # We know that there will already be a join on the join table, so we 588 # can just add the select. 557 589 558 590 # For non-autocreated 'through' models, can't assume we are 559 591 # dealing with PK values. 592 593 # TODO: This is at the wrong level of abstraction. We should not 594 # be generating SQL here, but instead maybe pass this information 595 # to the connection. NoSQL camp will have problems with this, for 596 # example. 560 597 fk = self.through._meta.get_field(self.source_field_name) 561 598 source_col = fk.column 562 599 join_table = self.through._meta.db_table 563 connection = connections[db] 600 if custom_qs is not None: 601 connection = connections[custom_qs.db] 602 else: 603 connection = connections[db] 604 564 605 qn = connection.ops.quote_name 565 606 qs = qs.extra(select={'_prefetch_related_val': 566 607 '%s.%s' % (qn(join_table), qn(source_col))}) … … def create_many_related_manager(superclass, rel): 570 611 False, 571 612 self.prefetch_cache_name) 572 613 614 def all(self): 615 try: 616 return self.instance._prefetched_objects_cache[self.prefetch_cache_name] 617 except (AttributeError, KeyError): 618 return super(ManyRelatedManager, self).all() 619 573 620 # If the ManyToMany relation has an intermediary model, 574 621 # the add and remove methods do not exist. 575 622 if rel.through._meta.auto_created: -
django/db/models/query.py
from django.db.models.query_utils import (Q, select_related_descend, 12 12 deferred_class_factory, InvalidQuery) 13 13 from django.db.models.deletion import Collector 14 14 from django.db.models import sql 15 from django.db.models.related import R 15 16 from django.utils.functional import partition 16 17 17 18 # Used to control how many objects are worked with at once in some cases (e.g. … … def insert_query(model, objs, fields, return_id=False, raw=False, using=None): 1592 1593 query.insert_values(fields, objs, raw=raw) 1593 1594 return query.get_compiler(using=using).execute_sql(return_id) 1594 1595 1596 def prl_to_r_objs(lookups, prefix=None): 1597 """ 1598 This little helper function will convert a list containing R objects or 1599 normal lookups into all R objects list. 1600 """ 1601 from django.db.models.sql.constants import LOOKUP_SEP 1602 if prefix is None: 1603 return [isinstance(lup, R) and lup or R(lup) for lup in lookups] 1604 ret = [] 1605 for lup in lookups: 1606 if isinstance(lup, R): 1607 r_obj = lup._new_prefixed(prefix) 1608 else: 1609 r_obj = R(prefix + LOOKUP_SEP + lup) 1610 ret.append(r_obj) 1611 return ret 1595 1612 1596 1613 def prefetch_related_objects(result_cache, related_lookups): 1597 1614 """ … … def prefetch_related_objects(result_cache, related_lookups): 1604 1621 1605 1622 if len(result_cache) == 0: 1606 1623 return # nothing to do 1607 1624 r_objs = prl_to_r_objs(related_lookups) 1608 1625 model = result_cache[0].__class__ 1609 1626 1610 1627 # We need to be able to dynamically add to the list of prefetch_related 1611 1628 # lookups that we look up (see below). So we need some book keeping to 1612 1629 # ensure we don't do duplicate work. 1613 done_lookups = set() # list of lookups like foo__bar__baz1630 seen_lookups = set() # list of lookups like foo__bar__baz 1614 1631 done_queries = {} # dictionary of things like 'foo__bar': [results] 1615 1632 1616 1633 auto_lookups = [] # we add to this as we go through. 1617 1634 followed_descriptors = set() # recursion protection 1618 1635 1619 all_lookups = itertools.chain(related_lookups, auto_lookups) 1620 for lookup in all_lookups: 1621 if lookup in done_lookups: 1636 # For R-objects, we have two different lookups: 1637 # - lookup: This is the related object attribute name 1638 # - lookup_refpath: This is to be used when this R-object is referenced 1639 # in chained prefetches. 1640 # One way to explain these would be to say lookup is how we go forward, 1641 # lookup_refpath is what happened in the past. 1642 1643 r_objs = itertools.chain(r_objs, auto_lookups) 1644 for r_obj in r_objs: 1645 if r_obj.lookup_refpath in seen_lookups: 1622 1646 # We've done exactly this already, skip the whole thing 1623 1647 continue 1624 done_lookups.add(lookup)1648 seen_lookups.add(r_obj.lookup_refpath) 1625 1649 1626 1650 # Top level, the list of objects to decorate is the the result cache 1627 1651 # from the primary QuerySet. It won't be for deeper levels. 1628 1652 obj_list = result_cache 1629 1653 1630 attrs = lookup.split(LOOKUP_SEP)1654 attrs = r_obj.lookup.split(LOOKUP_SEP) 1631 1655 for level, attr in enumerate(attrs): 1632 1656 # Prepare main instances 1633 1657 if len(obj_list) == 0: … … def prefetch_related_objects(result_cache, related_lookups): 1655 1679 1656 1680 # We assume that objects retrieved are homogenous (which is the premise 1657 1681 # of prefetch_related), so what applies to first object applies to all. 1682 # TODO: Make sure this is really true for objects coming from generic 1683 # relations. 1658 1684 first_obj = obj_list[0] 1659 prefetcher, descriptor, attr_found, is_fetched = get_prefetcher(first_obj, attr) 1685 prefetcher, descriptor, attr_found, is_fetched = \ 1686 get_prefetcher(first_obj, attr) 1660 1687 1661 1688 if not attr_found: 1662 1689 raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid " 1663 1690 "parameter to prefetch_related()" % 1664 (attr, first_obj.__class__.__name__, lookup)) 1691 (attr, first_obj.__class__.__name__, 1692 r_obj.lookup)) 1665 1693 1666 1694 if level == len(attrs) - 1 and prefetcher is None: 1667 1695 # Last one, this *must* resolve to something that supports 1668 1696 # prefetching, otherwise there is no point adding it and the 1669 1697 # developer asking for it has made a mistake. 1670 raise ValueError("'%s' does not resolve to a item that supports "1698 raise ValueError("'%s' does not resolve to an item that supports " 1671 1699 "prefetching - this is an invalid parameter to " 1672 "prefetch_related()." % lookup)1700 "prefetch_related()." % r_obj.lookup) 1673 1701 1674 1702 if prefetcher is not None and not is_fetched: 1675 # Check we didn't do this already 1676 current_lookup = LOOKUP_SEP.join(attrs[0:level+1]) 1703 current_lookup = r_obj.get_current_lookup(level) 1677 1704 if current_lookup in done_queries: 1678 1705 obj_list = done_queries[current_lookup] 1679 1706 else: 1680 obj_list, additional_prl = prefetch_one_level(obj_list, prefetcher, attr) 1707 obj_list, additional_prl = prefetch_one_level( 1708 obj_list, prefetcher, r_obj, level) 1681 1709 # We need to ensure we don't keep adding lookups from the 1682 1710 # same relationships to stop infinite recursion. So, if we 1683 1711 # are already on an automatically added lookup, don't add 1684 1712 # the new lookups from relationships we've seen already. 1685 if not ( lookupin auto_lookups and1713 if not (r_obj in auto_lookups and 1686 1714 descriptor in followed_descriptors): 1687 for f in additional_prl:1688 new_prl = LOOKUP_SEP.join([current_lookup, f])1689 auto_lookups.append(new_prl)1690 1715 done_queries[current_lookup] = obj_list 1716 additional_prl = prl_to_r_objs(additional_prl, 1717 current_lookup) 1718 auto_lookups.extend(additional_prl) 1691 1719 followed_descriptors.add(descriptor) 1720 1721 elif isinstance(getattr(obj_list[0], attr), list): 1722 # The current part of the lookup relates to a r_obj.to_attr 1723 # defined previous fetch. This means that obj.attr is a list 1724 # of related objects, and thus we must turn the obj.attr lists 1725 # into a single related object list. 1726 new_list = [] 1727 for obj in obj_list: 1728 new_list.extend(getattr(obj, attr)) 1729 obj_list = new_list 1692 1730 else: 1693 1731 # Either a singly related object that has already been fetched 1694 1732 # (e.g. via select_related), or hopefully some other property 1695 1733 # that doesn't support prefetching but needs to be traversed. 1696 1734 1697 1735 # We replace the current list of parent objects with that list. 1736 # TODO: Check what happens if attr resolves to local field? 1737 # User typoing rel_attr_id instead of rel_attr? AND there are 1738 # multiple parts in the path left. 1698 1739 obj_list = [getattr(obj, attr) for obj in obj_list] 1699 1740 1700 1741 # Filter out 'None' so that we can continue with nullable … … def get_prefetcher(instance, attr): 1724 1765 try: 1725 1766 rel_obj = getattr(instance, attr) 1726 1767 attr_found = True 1768 # If we are following a r_obj lookup path which leads us through 1769 # a previous fetch with to_attr, then we might end up into a list 1770 # instead of related qs. This means the objects are already 1771 # fetched. 1772 if isinstance(rel_obj, list): 1773 is_fetched = True 1727 1774 except AttributeError: 1728 1775 pass 1729 1776 else: … … def get_prefetcher(instance, attr): 1745 1792 return prefetcher, rel_obj_descriptor, attr_found, is_fetched 1746 1793 1747 1794 1748 def prefetch_one_level(instances, prefetcher, attname):1795 def prefetch_one_level(instances, prefetcher, r_obj, level): 1749 1796 """ 1750 1797 Helper function for prefetch_related_objects 1751 1798 … … def prefetch_one_level(instances, prefetcher, attname): 1768 1815 # The 'values to be matched' must be hashable as they will be used 1769 1816 # in a dictionary. 1770 1817 1818 # to_attr is the name of the attribute we will be fetching into, to_list 1819 # is False if to_attr refers to related manager. If it refers to related 1820 # manager, we will be caching in rel_manager.all(), otherwise in a list. 1821 to_attr, to_list = r_obj.get_to_attr(level) 1822 1771 1823 rel_qs, rel_obj_attr, instance_attr, single, cache_name =\ 1772 prefetcher.get_prefetch_query_set(instances) 1824 prefetcher.get_prefetch_query_set(instances, custom_qs=r_obj.qs if to_list else None) 1825 1773 1826 # We have to handle the possibility that the default manager itself added 1774 1827 # prefetch_related lookups to the QuerySet we just got back. We don't want to 1775 1828 # trigger the prefetch_related functionality by evaluating the query. … … def prefetch_one_level(instances, prefetcher, attname): 1791 1844 rel_obj_cache[rel_attr_val].append(rel_obj) 1792 1845 1793 1846 for obj in instances: 1847 # TODO: in this case we could set the reverse attribute if the relation 1848 # is o2o. Both this and the TODO below are handled by select_related 1849 # in the get_cached_row iterator construction. Maybe that code could 1850 # be generalized and shared. 1794 1851 instance_attr_val = instance_attr(obj) 1795 1852 vals = rel_obj_cache.get(instance_attr_val, []) 1796 1853 if single: … … def prefetch_one_level(instances, prefetcher, attname): 1800 1857 else: 1801 1858 # Multi, attribute represents a manager with an .all() method that 1802 1859 # returns a QuerySet 1803 qs = getattr(obj, attname).all() 1804 qs._result_cache = vals 1805 # We don't want the individual qs doing prefetch_related now, since we 1806 # have merged this into the current work. 1807 qs._prefetch_done = True 1808 obj._prefetched_objects_cache[cache_name] = qs 1860 # TODO: we could set the reverse relation, so that if user does 1861 # access the just fetched relation in the reverse order, we would 1862 # not need to do a query. We can't do this for m2m, of course. 1863 if to_list: 1864 setattr(obj, to_attr, vals) 1865 else: 1866 # Cache in the QuerySet.all(). 1867 qs = getattr(obj, to_attr).all() 1868 qs._result_cache = vals 1869 # We don't want the individual qs doing prefetch_related now, 1870 # since we have merged this into the current work. 1871 qs._prefetch_done = True 1872 obj._prefetched_objects_cache[cache_name] = qs 1809 1873 return all_related_objects, additional_prl -
django/db/models/related.py
1 1 from django.utils.encoding import smart_unicode 2 2 from django.db.models.fields import BLANK_CHOICE_DASH 3 from django.db.models.sql.constants import LOOKUP_SEP 3 4 4 5 class BoundRelatedObject(object): 5 6 def __init__(self, related_object, field_mapping, original): … … class RelatedObject(object): 36 37 {'%s__isnull' % self.parent_model._meta.module_name: False}) 37 38 lst = [(x._get_pk_val(), smart_unicode(x)) for x in queryset] 38 39 return first_choice + lst 39 40 40 41 def get_db_prep_lookup(self, lookup_type, value, connection, prepared=False): 41 42 # Defer to the actual field definition for db prep 42 43 return self.field.get_db_prep_lookup(lookup_type, value, … … class RelatedObject(object): 67 68 68 69 def get_cache_name(self): 69 70 return "_%s_cache" % self.get_accessor_name() 71 72 73 # Not knowing a better place for this, I just planted R here. 74 # Feel free to move this to a better place or remove this comment. 75 class R(object): 76 """ 77 A class used for passing options to .prefetch_related. Note that instances 78 of this class should be considered immutable. 79 """ 80 81 # For R-objects, we have two different internal lookup paths: 82 # - lookup: This is the related object attribute name 83 # - lookup_refpath: This is to be used when this R-object is referenced 84 # in chained prefetches. 85 # Check out the source of R-objects to see what is happening there. 86 # 87 # The difference is needed, because when we chain R-objects with to_attr 88 # defined, the lookup_path (how we got here) and lookup_refpath (how to 89 # get forward from here) will be different. For example: 90 # R('foo', to_attr='foolst') -> lookup_path = foo, that is we are going 91 # to prefetch through relation foo. 92 # 93 # If there would be another qs produced by R, the lookup_refpath would 94 # need to be 'foolst__nextpart'. Otherwise we can't distinguish between 95 # two different prefetch_related lookups to 'foo' (perhaps with custom 96 # querysets). 97 # 98 # Luckily the user does not need to know anything about this. 99 100 def __init__(self, lookup, to_attr=None, qs=None): 101 if qs is not None and not to_attr: 102 raise ValueError('When custom qs is defined, to_attr ' 103 'must also be defined') 104 self.lookup = lookup 105 self.to_attr = to_attr 106 self.qs = qs._clone() if qs is not None else None 107 108 def _new_prefixed(self, prefix): 109 """ 110 _new_internal is to be used when prefetches are chained internally. 111 The returned R-object is identical to self, except lookup_path 112 is prefixed with prefix. 113 """ 114 new_lookup = prefix + LOOKUP_SEP + self.lookup 115 return R(new_lookup, to_attr=self.to_attr, qs=self.qs) 116 117 def __unicode__(self): 118 return ("lookup: %s, to_attr: %s, qs: %s" % 119 (self.lookup, self.to_attr or None, self.qs)) 120 121 def __repr__(self): 122 return '<%s: %s>' % (self.__class__.__name__, unicode(self)) 123 124 def __eq__(self, other): 125 if isinstance(other, R): 126 return self.lookup_refpath == other.lookup_refpath 127 return False 128 129 def _lookup_refpath(self): 130 if self.to_attr is None: 131 return self.lookup 132 else: 133 path, sep, last_part = self.lookup.rpartition(LOOKUP_SEP) 134 return path + sep + self.to_attr 135 lookup_refpath = property(_lookup_refpath) 136 137 def get_current_lookup(self, level): 138 """ 139 Returns the first level + 1 parts of the self.lookup_refpath 140 """ 141 parts = self.lookup_refpath.split(LOOKUP_SEP) 142 return LOOKUP_SEP.join(parts[0:level + 1]) 143 144 def get_to_attr(self, level): 145 """ 146 Returns information about into what attribute should the results be 147 fetched, and if that attribute is related object manager, or will the 148 objects be fetched into a list. 149 """ 150 parts = self.lookup_refpath.split(LOOKUP_SEP) 151 if self.to_attr is None or level < len(parts) - 1: 152 return parts[level], False 153 else: 154 return self.to_attr, True