Ticket #17001: #17001-prefetch_extensions.diff

File #17001-prefetch_extensions.diff, 25.5 KB (added by German M. Bravo, 12 years ago)

Fixed issue with multiple level prefetch

  • django/contrib/contenttypes/generic.py

    class GenericForeignKey(object):  
    6262            # This should never happen. I love comments like this, don't you?
    6363            raise Exception("Impossible arguments to GFK.get_content_type!")
    6464
    65     def get_prefetch_query_set(self, instances):
     65    def get_prefetch_query_set(self, instances, custom_qs=None):
     66        if custom_qs is not None:
     67            raise ValueError("Custom queryset can't be used for this lookup")
    6668        # For efficiency, group the instances by content type and then do one
    6769        # query per model
    6870        fk_dict = defaultdict(set)
    def create_generic_related_manager(superclass):  
    320322                db = self._db or router.db_for_read(self.model, instance=self.instance)
    321323                return super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**self.core_filters)
    322324
    323         def get_prefetch_query_set(self, instances):
    324             db = self._db or router.db_for_read(self.model, instance=instances[0])
     325        def get_prefetch_query_set(self, instances, custom_qs=None):
     326            if not instances:
     327                return self.model._default_manager.none()
    325328            query = {
    326329                '%s__pk' % self.content_type_field_name: self.content_type.id,
    327330                '%s__in' % self.object_id_field_name:
    328331                    set(obj._get_pk_val() for obj in instances)
    329                 }
    330             qs = super(GenericRelatedObjectManager, self).get_query_set().using(db).filter(**query)
     332            }
     333            if custom_qs is not None:
     334                qs = custom_qs.filter(**query)
     335            else:
     336                db = self._db or router.db_for_read(self.model, instance=instances[0])
     337                qs = super(GenericRelatedObjectManager, self).get_query_set()\
     338                         .using(db).filter(**query)
    331339            return (qs,
    332340                    attrgetter(self.object_id_field_name),
    333341                    lambda obj: obj._get_pk_val(),
    334342                    False,
    335343                    self.prefetch_cache_name)
    336344
     345
     346        def all(self):
     347            try:
     348                return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
     349            except (AttributeError, KeyError):
     350                return super(GenericRelatedObjectManager, self).all()
     351
     352
    337353        def add(self, *objs):
    338354            for obj in objs:
    339355                if not isinstance(obj, self.model):
  • django/db/models/__init__.py

    from django.db import connection  
    44from django.db.models.loading import get_apps, get_app, get_models, get_model, register_models
    55from django.db.models.query import Q
    66from django.db.models.expressions import F
     7from django.db.models.related import R
    78from django.db.models.manager import Manager
    89from django.db.models.base import Model
    910from django.db.models.aggregates import *
    1011from django.db.models.fields import *
    1112from django.db.models.fields.subclassing import SubfieldBase
    1213from django.db.models.fields.files import FileField, ImageField
    13 from django.db.models.fields.related import ForeignKey, OneToOneField, ManyToManyField, ManyToOneRel, ManyToManyRel, OneToOneRel
    14 from django.db.models.deletion import CASCADE, PROTECT, SET, SET_NULL, SET_DEFAULT, DO_NOTHING, ProtectedError
     14from django.db.models.fields.related import (ForeignKey, OneToOneField,
     15        ManyToManyField, ManyToOneRel, ManyToManyRel, OneToOneRel)
     16from django.db.models.deletion import (CASCADE, PROTECT, SET, SET_NULL,
     17        SET_DEFAULT, DO_NOTHING, ProtectedError)
    1518from django.db.models import signals
    1619from django.utils.decorators import wraps
    1720
  • django/db/models/fields/related.py

    class SingleRelatedObjectDescriptor(object):  
    236236        db = router.db_for_read(self.related.model, **db_hints)
    237237        return self.related.model._base_manager.using(db)
    238238
    239     def get_prefetch_query_set(self, instances):
     239    def get_prefetch_query_set(self, instances, custom_qs=None):
     240        if custom_qs is not None:
     241            # TODO: This error message is too SQLish, and might be downright
     242            # wrong.
     243            raise ValueError(
     244                "Custom querysets can't be used for one-to-one relations")
     245
    240246        vals = set(instance._get_pk_val() for instance in instances)
    241247        params = {'%s__pk__in' % self.related.field.name: vals}
    242248        return (self.get_query_set(),
    class ReverseSingleRelatedObjectDescriptor(object):  
    315321        else:
    316322            return QuerySet(self.field.rel.to).using(db)
    317323
    318     def get_prefetch_query_set(self, instances):
     324    def get_prefetch_query_set(self, instances, custom_qs=None):
     325        if custom_qs is not None:
     326            # TODO: This error message is too SQLish, and I am not even sure
     327            # this desriptor is used for m2o...
     328            raise ValueError(
     329                "Custom querysets can't be used for many-to-one relations")
     330
    319331        vals = set(getattr(instance, self.field.attname) for instance in instances)
    320332        other_field = self.field.rel.get_related_field()
    321333        if other_field.rel:
    class ForeignRelatedObjectsDescriptor(object):  
    460472                    db = self._db or router.db_for_read(self.model, instance=self.instance)
    461473                    return super(RelatedManager, self).get_query_set().using(db).filter(**self.core_filters)
    462474
    463             def get_prefetch_query_set(self, instances):
    464                 db = self._db or router.db_for_read(self.model, instance=instances[0])
     475            def get_prefetch_query_set(self, instances, custom_qs=None):
     476                """
     477                Return a queryset that does the bulk lookup needed
     478                by prefetch_related functionality.
     479                """
    465480                query = {'%s__%s__in' % (rel_field.name, attname):
    466                              set(getattr(obj, attname) for obj in instances)}
    467                 qs = super(RelatedManager, self).get_query_set().using(db).filter(**query)
     481                            set(getattr(obj, attname) for obj in instances)}
     482                if custom_qs is not None:
     483                    qs = custom_qs.filter(**query)
     484                else:
     485                    db = self._db or router.db_for_read(self.model, instance=instances[0])
     486                    qs = super(RelatedManager, self).get_query_set().\
     487                                    using(db).filter(**query)
    468488                return (qs,
    469489                        attrgetter(rel_field.get_attname()),
    470490                        attrgetter(attname),
    471491                        False,
    472492                        rel_field.related_query_name())
    473493
     494            def all(self):
     495                try:
     496                    return self.instance._prefetched_objects_cache[rel_field.related_query_name()]
     497                except (AttributeError, KeyError):
     498                    return super(RelatedManager, self).all()
     499
    474500            def add(self, *objs):
    475501                for obj in objs:
    476502                    if not isinstance(obj, self.model):
    def create_many_related_manager(superclass, rel):  
    542568                db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance)
    543569                return super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**self.core_filters)
    544570
    545         def get_prefetch_query_set(self, instances):
     571        def get_prefetch_query_set(self, instances, custom_qs=None):
    546572            instance = instances[0]
    547573            from django.db import connections
    548574            db = self._db or router.db_for_read(instance.__class__, instance=instance)
    549             query = {'%s__pk__in' % self.query_field_name:
    550                          set(obj._get_pk_val() for obj in instances)}
    551             qs = super(ManyRelatedManager, self).get_query_set().using(db)._next_is_sticky().filter(**query)
     575            query = {'%s__pk__in' % self.query_field_name:
     576                         set(obj._get_pk_val() for obj in instances)}
     577
     578            if custom_qs is not None:
     579                qs = custom_qs._next_is_sticky().filter(**query)
     580            else:
     581                qs = (super(ManyRelatedManager, self).get_query_set().using(db)
     582                      ._next_is_sticky().filter(**query))
    552583
    553584            # M2M: need to annotate the query in order to get the primary model
    554             # that the secondary model was actually related to. We know that
    555             # there will already be a join on the join table, so we can just add
    556             # the select.
     585            # that the secondary model was actually related to.
     586
     587            # We know that there will already be a join on the join table, so we
     588            # can just add the select.
    557589
    558590            # For non-autocreated 'through' models, can't assume we are
    559591            # dealing with PK values.
     592
     593            # TODO: This is at the wrong level of abstraction. We should not
     594            # be generating SQL here, but instead maybe pass this information
     595            # to the connection. NoSQL camp will have problems with this, for
     596            # example.
    560597            fk = self.through._meta.get_field(self.source_field_name)
    561598            source_col = fk.column
    562599            join_table = self.through._meta.db_table
    563             connection = connections[db]
     600            if custom_qs is not None:
     601                connection = connections[custom_qs.db]
     602            else:
     603                connection = connections[db]
     604
    564605            qn = connection.ops.quote_name
    565606            qs = qs.extra(select={'_prefetch_related_val':
    566607                                      '%s.%s' % (qn(join_table), qn(source_col))})
    def create_many_related_manager(superclass, rel):  
    570611                    False,
    571612                    self.prefetch_cache_name)
    572613
     614        def all(self):
     615            try:
     616                return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
     617            except (AttributeError, KeyError):
     618                return super(ManyRelatedManager, self).all()
     619
    573620        # If the ManyToMany relation has an intermediary model,
    574621        # the add and remove methods do not exist.
    575622        if rel.through._meta.auto_created:
  • django/db/models/query.py

    from django.db.models.query_utils import (Q, select_related_descend,  
    1212    deferred_class_factory, InvalidQuery)
    1313from django.db.models.deletion import Collector
    1414from django.db.models import sql
     15from django.db.models.related import R
    1516from django.utils.functional import partition
    1617
    1718# Used to control how many objects are worked with at once in some cases (e.g.
    def insert_query(model, objs, fields, return_id=False, raw=False, using=None):  
    15921593    query.insert_values(fields, objs, raw=raw)
    15931594    return query.get_compiler(using=using).execute_sql(return_id)
    15941595
     1596def prl_to_r_objs(lookups, prefix=None):
     1597    """
     1598    This little helper function will convert a list containing R objects or
     1599    normal lookups into all R objects list.
     1600    """
     1601    from django.db.models.sql.constants import LOOKUP_SEP
     1602    if prefix is None:
     1603        return [isinstance(lup, R) and lup or R(lup) for lup in lookups]
     1604    ret = []
     1605    for lup in lookups:
     1606        if isinstance(lup, R):
     1607            r_obj = lup._new_prefixed(prefix)
     1608        else:
     1609            r_obj = R(prefix + LOOKUP_SEP + lup)
     1610        ret.append(r_obj)
     1611    return ret
    15951612
    15961613def prefetch_related_objects(result_cache, related_lookups):
    15971614    """
    def prefetch_related_objects(result_cache, related_lookups):  
    16041621
    16051622    if len(result_cache) == 0:
    16061623        return # nothing to do
    1607 
     1624    r_objs = prl_to_r_objs(related_lookups)
    16081625    model = result_cache[0].__class__
    16091626
    16101627    # We need to be able to dynamically add to the list of prefetch_related
    16111628    # lookups that we look up (see below).  So we need some book keeping to
    16121629    # ensure we don't do duplicate work.
    1613     done_lookups = set() # list of lookups like foo__bar__baz
     1630    seen_lookups = set() # list of lookups like foo__bar__baz
    16141631    done_queries = {}    # dictionary of things like 'foo__bar': [results]
    16151632
    16161633    auto_lookups = [] # we add to this as we go through.
    16171634    followed_descriptors = set() # recursion protection
    16181635
    1619     all_lookups = itertools.chain(related_lookups, auto_lookups)
    1620     for lookup in all_lookups:
    1621         if lookup in done_lookups:
     1636    # For R-objects, we have two different lookups:
     1637    #   - lookup: This is the related object attribute name
     1638    #   - lookup_refpath: This is to be used when this R-object is referenced
     1639    #     in chained prefetches.
     1640    # One way to explain these would be to say lookup is how we go forward,
     1641    # lookup_refpath is what happened in the past.
     1642
     1643    r_objs = itertools.chain(r_objs, auto_lookups)
     1644    for r_obj in r_objs:
     1645        if r_obj.lookup_refpath in seen_lookups:
    16221646            # We've done exactly this already, skip the whole thing
    16231647            continue
    1624         done_lookups.add(lookup)
     1648        seen_lookups.add(r_obj.lookup_refpath)
    16251649
    16261650        # Top level, the list of objects to decorate is the the result cache
    16271651        # from the primary QuerySet. It won't be for deeper levels.
    16281652        obj_list = result_cache
    16291653
    1630         attrs = lookup.split(LOOKUP_SEP)
     1654        attrs = r_obj.lookup.split(LOOKUP_SEP)
    16311655        for level, attr in enumerate(attrs):
    16321656            # Prepare main instances
    16331657            if len(obj_list) == 0:
    def prefetch_related_objects(result_cache, related_lookups):  
    16551679
    16561680            # We assume that objects retrieved are homogenous (which is the premise
    16571681            # of prefetch_related), so what applies to first object applies to all.
     1682            # TODO: Make sure this is really true for objects coming from generic
     1683            # relations.
    16581684            first_obj = obj_list[0]
    1659             prefetcher, descriptor, attr_found, is_fetched = get_prefetcher(first_obj, attr)
     1685            prefetcher, descriptor, attr_found, is_fetched = \
     1686                get_prefetcher(first_obj, attr)
    16601687
    16611688            if not attr_found:
    16621689                raise AttributeError("Cannot find '%s' on %s object, '%s' is an invalid "
    16631690                                     "parameter to prefetch_related()" %
    1664                                      (attr, first_obj.__class__.__name__, lookup))
     1691                                     (attr, first_obj.__class__.__name__,
     1692                                      r_obj.lookup))
    16651693
    16661694            if level == len(attrs) - 1 and prefetcher is None:
    16671695                # Last one, this *must* resolve to something that supports
    16681696                # prefetching, otherwise there is no point adding it and the
    16691697                # developer asking for it has made a mistake.
    1670                 raise ValueError("'%s' does not resolve to a item that supports "
     1698                raise ValueError("'%s' does not resolve to an item that supports "
    16711699                                 "prefetching - this is an invalid parameter to "
    1672                                  "prefetch_related()." % lookup)
     1700                                 "prefetch_related()." % r_obj.lookup)
    16731701
    16741702            if prefetcher is not None and not is_fetched:
    1675                 # Check we didn't do this already
    1676                 current_lookup = LOOKUP_SEP.join(attrs[0:level+1])
     1703                current_lookup = r_obj.get_current_lookup(level)
    16771704                if current_lookup in done_queries:
    16781705                    obj_list = done_queries[current_lookup]
    16791706                else:
    1680                     obj_list, additional_prl = prefetch_one_level(obj_list, prefetcher, attr)
     1707                    obj_list, additional_prl = prefetch_one_level(
     1708                            obj_list, prefetcher, r_obj, level)
    16811709                    # We need to ensure we don't keep adding lookups from the
    16821710                    # same relationships to stop infinite recursion. So, if we
    16831711                    # are already on an automatically added lookup, don't add
    16841712                    # the new lookups from relationships we've seen already.
    1685                     if not (lookup in auto_lookups and
     1713                    if not (r_obj in auto_lookups and
    16861714                            descriptor in followed_descriptors):
    1687                         for f in additional_prl:
    1688                             new_prl = LOOKUP_SEP.join([current_lookup, f])
    1689                             auto_lookups.append(new_prl)
    16901715                        done_queries[current_lookup] = obj_list
     1716                        additional_prl = prl_to_r_objs(additional_prl,
     1717                                                       current_lookup)
     1718                        auto_lookups.extend(additional_prl)
    16911719                    followed_descriptors.add(descriptor)
     1720
     1721            elif isinstance(getattr(obj_list[0], attr), list):
     1722                # The current part of the lookup relates to a r_obj.to_attr
     1723                # defined previous fetch. This means that obj.attr is a list
     1724                # of related objects, and thus we must turn the obj.attr lists
     1725                # into a single related object list.
     1726                new_list = []
     1727                for obj in obj_list:
     1728                    new_list.extend(getattr(obj, attr))
     1729                obj_list = new_list
    16921730            else:
    16931731                # Either a singly related object that has already been fetched
    16941732                # (e.g. via select_related), or hopefully some other property
    16951733                # that doesn't support prefetching but needs to be traversed.
    16961734
    16971735                # We replace the current list of parent objects with that list.
     1736                # TODO: Check what happens if attr resolves to local field?
     1737                # User typoing rel_attr_id instead of rel_attr? AND there are
     1738                # multiple parts in the path left.
    16981739                obj_list = [getattr(obj, attr) for obj in obj_list]
    16991740
    17001741                # Filter out 'None' so that we can continue with nullable
    def get_prefetcher(instance, attr):  
    17241765        try:
    17251766            rel_obj = getattr(instance, attr)
    17261767            attr_found = True
     1768            # If we are following a r_obj lookup path which leads us through
     1769            # a previous fetch with to_attr, then we might end up into a list
     1770            # instead of related qs. This means the objects are already
     1771            # fetched.
     1772            if isinstance(rel_obj, list):
     1773                is_fetched = True
    17271774        except AttributeError:
    17281775            pass
    17291776    else:
    def get_prefetcher(instance, attr):  
    17451792    return prefetcher, rel_obj_descriptor, attr_found, is_fetched
    17461793
    17471794
    1748 def prefetch_one_level(instances, prefetcher, attname):
     1795def prefetch_one_level(instances, prefetcher, r_obj, level):
    17491796    """
    17501797    Helper function for prefetch_related_objects
    17511798
    def prefetch_one_level(instances, prefetcher, attname):  
    17681815    # The 'values to be matched' must be hashable as they will be used
    17691816    # in a dictionary.
    17701817
     1818    # to_attr is the name of the attribute we will be fetching into, to_list
     1819    # is False if to_attr refers to related manager. If it refers to related
     1820    # manager, we will be caching in rel_manager.all(), otherwise in a list.
     1821    to_attr, to_list = r_obj.get_to_attr(level)
     1822
    17711823    rel_qs, rel_obj_attr, instance_attr, single, cache_name =\
    1772         prefetcher.get_prefetch_query_set(instances)
     1824        prefetcher.get_prefetch_query_set(instances, custom_qs=r_obj.qs if to_list else None)
     1825
    17731826    # We have to handle the possibility that the default manager itself added
    17741827    # prefetch_related lookups to the QuerySet we just got back. We don't want to
    17751828    # trigger the prefetch_related functionality by evaluating the query.
    def prefetch_one_level(instances, prefetcher, attname):  
    17911844        rel_obj_cache[rel_attr_val].append(rel_obj)
    17921845
    17931846    for obj in instances:
     1847        # TODO: in this case we could set the reverse attribute if the relation
     1848        # is o2o. Both this and the TODO below are handled by select_related
     1849        # in the get_cached_row iterator construction. Maybe that code could
     1850        # be generalized and shared.
    17941851        instance_attr_val = instance_attr(obj)
    17951852        vals = rel_obj_cache.get(instance_attr_val, [])
    17961853        if single:
    def prefetch_one_level(instances, prefetcher, attname):  
    18001857        else:
    18011858            # Multi, attribute represents a manager with an .all() method that
    18021859            # returns a QuerySet
    1803             qs = getattr(obj, attname).all()
    1804             qs._result_cache = vals
    1805             # We don't want the individual qs doing prefetch_related now, since we
    1806             # have merged this into the current work.
    1807             qs._prefetch_done = True
    1808             obj._prefetched_objects_cache[cache_name] = qs
     1860            # TODO: we could set the reverse relation, so that if user does
     1861            # access the just fetched relation in the reverse order, we would
     1862            # not need to do a query. We can't do this for m2m, of course.
     1863            if to_list:
     1864                setattr(obj, to_attr, vals)
     1865            else:
     1866                # Cache in the QuerySet.all().
     1867                qs = getattr(obj, to_attr).all()
     1868                qs._result_cache = vals
     1869                # We don't want the individual qs doing prefetch_related now,
     1870                # since we have merged this into the current work.
     1871                qs._prefetch_done = True
     1872                obj._prefetched_objects_cache[cache_name] = qs
    18091873    return all_related_objects, additional_prl
  • django/db/models/related.py

     
    11from django.utils.encoding import smart_unicode
    22from django.db.models.fields import BLANK_CHOICE_DASH
     3from django.db.models.sql.constants import LOOKUP_SEP
    34
    45class BoundRelatedObject(object):
    56    def __init__(self, related_object, field_mapping, original):
    class RelatedObject(object):  
    3637                {'%s__isnull' % self.parent_model._meta.module_name: False})
    3738        lst = [(x._get_pk_val(), smart_unicode(x)) for x in queryset]
    3839        return first_choice + lst
    39        
     40
    4041    def get_db_prep_lookup(self, lookup_type, value, connection, prepared=False):
    4142        # Defer to the actual field definition for db prep
    4243        return self.field.get_db_prep_lookup(lookup_type, value,
    class RelatedObject(object):  
    6768
    6869    def get_cache_name(self):
    6970        return "_%s_cache" % self.get_accessor_name()
     71
     72
     73# Not knowing a better place for this, I just planted R here.
     74# Feel free to move this to a better place or remove this comment.
     75class R(object):
     76    """
     77    A class used for passing options to .prefetch_related. Note that instances
     78    of this class should be considered immutable.
     79    """
     80
     81    # For R-objects, we have two different internal lookup paths:
     82    #   - lookup: This is the related object attribute name
     83    #   - lookup_refpath: This is to be used when this R-object is referenced
     84    #     in chained prefetches.
     85    # Check out the source of R-objects to see what is happening there.
     86    #
     87    # The difference is needed, because when we chain R-objects with to_attr
     88    # defined, the lookup_path (how we got here) and lookup_refpath (how to
     89    # get forward from here) will be different. For example:
     90    # R('foo', to_attr='foolst') -> lookup_path = foo, that is we are going
     91    # to prefetch through relation foo.
     92    #
     93    # If there would be another qs produced by R, the lookup_refpath would
     94    # need to be 'foolst__nextpart'. Otherwise we can't distinguish between
     95    # two different prefetch_related lookups to 'foo' (perhaps with custom
     96    # querysets).
     97    #
     98    # Luckily the user does not need to know anything about this.
     99
     100    def __init__(self, lookup, to_attr=None, qs=None):
     101        if qs is not None and not to_attr:
     102            raise ValueError('When custom qs is defined, to_attr '
     103                             'must also be defined')
     104        self.lookup = lookup
     105        self.to_attr = to_attr
     106        self.qs = qs._clone() if qs is not None else None
     107
     108    def _new_prefixed(self, prefix):
     109        """
     110        _new_internal is to be used when prefetches are chained internally.
     111        The returned R-object is identical to self, except lookup_path
     112        is prefixed with prefix.
     113        """
     114        new_lookup = prefix + LOOKUP_SEP + self.lookup
     115        return R(new_lookup, to_attr=self.to_attr, qs=self.qs)
     116
     117    def __unicode__(self):
     118        return ("lookup: %s, to_attr: %s, qs: %s" %
     119            (self.lookup, self.to_attr or None, self.qs))
     120
     121    def __repr__(self):
     122        return '<%s: %s>' % (self.__class__.__name__, unicode(self))
     123
     124    def __eq__(self, other):
     125        if isinstance(other, R):
     126            return self.lookup_refpath == other.lookup_refpath
     127        return False
     128
     129    def _lookup_refpath(self):
     130        if self.to_attr is None:
     131            return self.lookup
     132        else:
     133            path, sep, last_part = self.lookup.rpartition(LOOKUP_SEP)
     134            return path + sep + self.to_attr
     135    lookup_refpath = property(_lookup_refpath)
     136
     137    def get_current_lookup(self, level):
     138        """
     139        Returns the first level + 1 parts of the self.lookup_refpath
     140        """
     141        parts = self.lookup_refpath.split(LOOKUP_SEP)
     142        return LOOKUP_SEP.join(parts[0:level + 1])
     143
     144    def get_to_attr(self, level):
     145        """
     146        Returns information about into what attribute should the results be
     147        fetched, and if that attribute is related object manager, or will the
     148        objects be fetched into a list.
     149        """
     150        parts = self.lookup_refpath.split(LOOKUP_SEP)
     151        if self.to_attr is None or level < len(parts) - 1:
     152            return parts[level], False
     153        else:
     154            return self.to_attr, True
Back to Top