Ticket #16937: prefetch.diff

File prefetch.diff, 18.0 KB (added by Luke Plant, 13 years ago)

Patch implementing ticket, with docs and tests

  • django/db/models/fields/related.py

    diff -r 41991155a39e django/db/models/fields/related.py
    a b  
    421421                db = self._db or router.db_for_read(rel_model, instance=instance)
    422422                return superclass.get_query_set(self).using(db).filter(**(self.core_filters))
    423423
     424            def all(self):
     425                try:
     426                    return instance._prefetched_objects_cache[rel_field.related_query_name()]
     427                except (AttributeError, KeyError):
     428                    return super(RelatedManager, self).all()
     429
    424430            def add(self, *objs):
    425431                for obj in objs:
    426432                    if not isinstance(obj, self.model):
     
    474480    through = rel.through
    475481    class ManyRelatedManager(superclass):
    476482        def __init__(self, model=None, core_filters=None, instance=None, symmetrical=None,
    477                      source_field_name=None, target_field_name=None, reverse=False):
     483                     source_field_name=None, target_field_name=None, reverse=False,
     484                     prefetch_cache_name=None):
    478485            super(ManyRelatedManager, self).__init__()
    479486            self.core_filters = core_filters
    480487            self.model = model
     
    485492            self.through = through
    486493            self._pk_val = self.instance.pk
    487494            self.reverse = reverse
     495            self.prefetch_cache_name = prefetch_cache_name
    488496            if self._pk_val is None:
    489497                raise ValueError("%r instance needs to have a primary key value before a many-to-many relationship can be used." % instance.__class__.__name__)
    490498
     
    492500            db = self._db or router.db_for_read(self.instance.__class__, instance=self.instance)
    493501            return superclass.get_query_set(self).using(db)._next_is_sticky().filter(**(self.core_filters))
    494502
     503        def all(self):
     504            try:
     505                return self.instance._prefetched_objects_cache[self.prefetch_cache_name]
     506            except (AttributeError, KeyError):
     507                return super(ManyRelatedManager, self).all()
     508
    495509        # If the ManyToMany relation has an intermediary model,
    496510        # the add and remove methods do not exist.
    497511        if rel.through._meta.auto_created:
     
    669683        manager = RelatedManager(
    670684            model=rel_model,
    671685            core_filters={'%s__pk' % self.related.field.name: instance._get_pk_val()},
     686            prefetch_cache_name=self.related.field.related_query_name(),
    672687            instance=instance,
    673688            symmetrical=False,
    674689            source_field_name=self.related.field.m2m_reverse_field_name(),
     
    721736        manager = RelatedManager(
    722737            model=rel_model,
    723738            core_filters={'%s__pk' % self.field.related_query_name(): instance._get_pk_val()},
     739            prefetch_cache_name=self.field.name,
    724740            instance=instance,
    725741            symmetrical=self.field.rel.symmetrical,
    726742            source_field_name=self.field.m2m_field_name(),
  • django/db/models/manager.py

    diff -r 41991155a39e django/db/models/manager.py
    a b  
    172172    def select_related(self, *args, **kwargs):
    173173        return self.get_query_set().select_related(*args, **kwargs)
    174174
     175    def prefetch_related(self, *args, **kwargs):
     176        return self.get_query_set().prefetch_related(*args, **kwargs)
     177
    175178    def values(self, *args, **kwargs):
    176179        return self.get_query_set().values(*args, **kwargs)
    177180
  • django/db/models/query.py

    diff -r 41991155a39e django/db/models/query.py
    a b  
    3636        self._iter = None
    3737        self._sticky_filter = False
    3838        self._for_write = False
     39        self._prefetch_related = set()
    3940
    4041    ########################
    4142    # PYTHON MAGIC METHODS #
     
    8182                self._result_cache = list(self.iterator())
    8283        elif self._iter:
    8384            self._result_cache.extend(self._iter)
     85        if self._prefetch_related:
     86            self._prefetch_related_objects()
    8487        return len(self._result_cache)
    8588
    8689    def __iter__(self):
     90        if self._prefetch_related:
     91            # We need all the results in order to be able to do the prefetch
     92            # in one go. To minimize code duplication, we use the __len__
     93            # code path which also forces this, and also does the prefetch
     94            len(self)
     95
    8796        if self._result_cache is None:
    8897            self._iter = self.iterator()
    8998            self._result_cache = []
     
    106115                self._fill_cache()
    107116
    108117    def __nonzero__(self):
     118        if self._prefetch_related:
     119            # We need all the results in order to be able to do the prefetch
     120            # in one go. To minimize code duplication, we use the __len__
     121            # code path which also forces this, and also does the prefetch
     122            len(self)
     123
    109124        if self._result_cache is not None:
    110125            return bool(self._result_cache)
    111126        try:
     
    526541            return self.query.has_results(using=self.db)
    527542        return bool(self._result_cache)
    528543
     544    def _prefetch_related_objects(self):
     545        from django.db import connections
     546        # Here we access some internals of how related managers work in order to
     547        # fix a common efficiency problem with M2M and M2O lookups.
     548
     549        # This method can only be called once the result cache has been filled.
     550        obj = None
     551        for obj in self._result_cache:
     552            if not hasattr(obj, '_prefetched_objects_cache'):
     553                obj._prefetched_objects_cache = {}
     554
     555        if obj is None:
     556            return # nothing to do
     557
     558
     559        for attname in self._prefetch_related:
     560            field, model, direct, m2m = self.model._meta.get_field_by_name(attname)
     561
     562            # A new related manager is created every time the attribute is
     563            # accessed, so we can alter the following relmanager with safety
     564            relmanager = getattr(obj, attname)
     565
     566            if m2m:
     567                # Use a modified RelatedManager to build the query
     568
     569                # Collect values to filter on
     570                pks = [obj._get_pk_val() for obj in self._result_cache]
     571
     572                if direct:
     573                    field_name = field.related_query_name()
     574                else:
     575                    field_name = field.field.name
     576                relmanager.core_filters = {'%s__pk__in' % field_name: pks}
     577
     578                # M2M: need to annotate the query in order to get the PK of the
     579                # primary model that the secondary model was actually related
     580                # to. For ForeignKeys, this information is in the secondary
     581                # table already.
     582
     583                # We know that there will already be a join on the join table,
     584                # so we can just add the select.
     585                join_table = relmanager.through._meta.db_table
     586                pk_col = "%s_id" % relmanager.source_field_name
     587                connection = connections[self.db]
     588                qn = connection.ops.quote_name
     589                qs = relmanager.extra(select={'_prefetch_related_pk':'%s.%s' % (qn(join_table), qn(pk_col))})
     590
     591                # Execute query
     592                all_related_objects = list(qs)
     593
     594                # Now decorate _result_cache with relevant prefetched data
     595                for obj in self._result_cache:
     596                    qs = getattr(obj, attname).all()
     597                    pk_val = obj._get_pk_val()
     598                    qs._result_cache = [rel_obj for rel_obj in all_related_objects
     599                                        if rel_obj._prefetch_related_pk == pk_val]
     600                    obj._prefetched_objects_cache[attname] = qs
     601
     602            else:
     603                # Reverse ForeignKey
     604
     605                # Use a modified RelatedManager to build the query
     606                rel_field = field.field
     607                rel_attname = rel_field.rel.get_related_field().name
     608                rel_vals = [getattr(obj, rel_attname) for obj in self._result_cache]
     609                relmanager.core_filters = {'%s__%s__in' % (rel_field.name,
     610                                                           rel_attname):
     611                                               rel_vals}
     612
     613                # Execute query
     614                all_related_objects = list(relmanager.all())
     615
     616                # Now decorate _result_cache with relevant prefetched data
     617                for obj in self._result_cache:
     618                    qs = getattr(obj, attname).all()
     619                    field_val = getattr(obj, rel_attname)
     620                    other_attname = rel_field.get_attname()
     621                    qs._result_cache = [rel_obj for rel_obj in all_related_objects
     622                                        if getattr(rel_obj, other_attname) == field_val]
     623                    obj._prefetched_objects_cache[attname] = qs
     624
     625
    529626    ##################################################
    530627    # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
    531628    ##################################################
     
    649746            obj.query.max_depth = depth
    650747        return obj
    651748
     749    def prefetch_related(self, *fields):
     750        """
     751        Returns a new QuerySet instance that will prefetch Many-To-One
     752        and Many-To-Many related objects when the QuerySet is evaluated.
     753
     754        The fields specified must be attributes that return a RelatedManager of
     755        some kind when used on instances of the evaluated QuerySet.
     756
     757        These RelatedManagers will be modified so that their 'all()' method will
     758        return a QuerySet whose cache is already filled with objects that were
     759        looked up in a single batch, rather than one query per object in the
     760        current QuerySet.
     761
     762        When prefetch_related() is called more than once, the list of fields to
     763        prefetch is added to. Call prefetch_related() with no arguments to
     764        clears the list.
     765        """
     766        if fields:
     767            new_fields = self._prefetch_related.union(set(fields))
     768        else:
     769            new_fields = set()
     770        return self._clone(_prefetch_related=new_fields)
     771
    652772    def dup_select_related(self, other):
    653773        """
    654774        Copies the related selection status from the QuerySet 'other' to the
     
    798918            query.filter_is_sticky = True
    799919        c = klass(model=self.model, query=query, using=self._db)
    800920        c._for_write = self._for_write
     921        c._prefetch_related = self._prefetch_related
    801922        c.__dict__.update(kwargs)
    802923        if setup and hasattr(c, '_setup_query'):
    803924            c._setup_query()
     
    9411062        c.aggregate_names = self.aggregate_names
    9421063        if setup and hasattr(c, '_setup_query'):
    9431064            c._setup_query()
     1065        # prefetch_related doesn't make sense for a ValuesQuerySet
     1066        c._prefetch_related = None
     1067
    9441068        return c
    9451069
    9461070    def _merge_sanity_check(self, other):
     
    10511175        c._kind = self._kind
    10521176        if setup and hasattr(c, '_setup_query'):
    10531177            c._setup_query()
     1178        # prefetch_related doesn't make sense for a DateQuerySet
     1179        self._prefetch_related = None
    10541180        return c
    10551181
    10561182
  • docs/ref/models/querysets.txt

    diff -r 41991155a39e docs/ref/models/querysets.txt
    a b  
    690690A :class:`~django.db.models.OneToOneField` is not traversed in the reverse
    691691direction if you are performing a depth-based ``select_related()`` call.
    692692
     693prefetch_related
     694~~~~~~~~~~~~~~~~
     695
     696.. method:: prefetch_related(*fields)
     697
     698.. versionadded:: 1.4
     699
     700Returns a ``QuerySet`` that will automatically retrieve, in a single batch,
     701related many-to-many and many-to-one objects for the specified fields.
     702
     703This is similar to ``select_related`` for 'many related objects' case, with the
     704following important differences:
     705
     706* ``prefetch_related`` issues a separate query to get the related objects.
     707  This is done as soon as the QuerySet begins to be evaluated.
     708
     709* It is 'single depth' only, and doesn't support join syntax.
     710
     711The fields that must be supplied to this method can be any attributes on the
     712model instances which represent related queries that return multiple
     713objects. This includes attributes representing the 'many' side of ``ForeignKey``
     714relationships and ``ManyToManyField`` attributes.
     715
     716For example, suppose you have these models::
     717
     718    class Topping(models.Model):
     719        name = models.CharField(max_length=30)
     720
     721    class Pizza(models.Model):
     722        name = models.CharField(max_length=50)
     723        toppings = models.ManyToManyField(Topping)
     724
     725        def __unicode__(self):
     726            return u"%s (%s)" % (self.name, u", ".join([topping.name
     727                                                        for topping in self.toppings.all()]))
     728
     729and run this code::
     730
     731    >>> Pizza.objects.all()
     732    [u"Hawaiian (ham, pineaapple)", u"Seafood (prawns, smoked salmon)"...
     733
     734The problem with this code is that it will run a query on the Toppings table for
     735**every** item in the Pizza ``QuerySet``.  Using ``prefetch_related``, this can
     736be reduced to two:
     737
     738    >>> pizzas = Pizza.objects.all().prefetch_related('toppings')
     739
     740All the relevant toppings will be fetched in a single query, and used to make a
     741``QuerySet`` that has a pre-filled cache of the relevant results. This
     742``QuerySet`` is then used in the ``self.toppings.all()`` call.
     743
     744Please note the following:
     745
     746* use of ``prefetch_related`` will mean that it **always** runs the two
     747  queries - even if you never use the related objects - and it always
     748  fully populates the result cache on the primary ``QuerySet``.
     749
     750* **Only** the 'all()' method of the related manager is affected. If you want to
     751  do additional filtering of the related objects using ``filter``, for example,
     752  this will create a new ``QuerySet``, ignoring the pre-filled ``QuerySet``, and
     753  will do that filtering in the database as always.
     754
     755Therefore you should only use ``prefetch_related`` when you are sure that the
     756related objects are needed (or will be needed in the normal case), since you are
     757adding overhead of issuing an additional database query creating (many) model
     758instances. Where appropriate you could use this in a manager or default manager.
     759
     760Chaining ``prefetch_related`` will accumulate the fields that should have this
     761behaviour applied. To clear any ``prefetch_related`` behaviour, call the method
     762with no arguments.
     763
     764
    693765extra
    694766~~~~~
    695767
  • new file tests/modeltests/prefetch_related/models.py

    diff -r 41991155a39e tests/modeltests/prefetch_related/models.py
    - +  
     1from django.db import models
     2
     3
     4class Author(models.Model):
     5    name = models.CharField(max_length=50)
     6    first_book = models.ForeignKey('Book', related_name='first_time_authors')
     7
     8
     9class Book(models.Model):
     10    title = models.CharField(max_length=255)
     11
     12    authors = models.ManyToManyField(Author, related_name='books')
     13
  • new file tests/modeltests/prefetch_related/tests.py

    diff -r 41991155a39e tests/modeltests/prefetch_related/tests.py
    - +  
     1from django.test import TestCase
     2
     3from models import Author, Book
     4
     5
     6class PrefetchRelatedTests(TestCase):
     7
     8    def setUp(self):
     9
     10        self.book1 = Book.objects.create(title="Poems")
     11        self.book2 = Book.objects.create(title="Jane Eyre")
     12        self.book3 = Book.objects.create(title="Wuthering Heights")
     13
     14        self.author1 = Author.objects.create(name="Charlotte",
     15                                             first_book=self.book1)
     16        self.author2 = Author.objects.create(name="Anne",
     17                                             first_book=self.book1)
     18        self.author3 = Author.objects.create(name="Emily",
     19                                             first_book=self.book1)
     20
     21        self.book1.authors.add(self.author1)
     22        self.book1.authors.add(self.author2)
     23        self.book1.authors.add(self.author3)
     24        self.book2.authors.add(self.author1)
     25        self.book3.authors.add(self.author3)
     26
     27    def test_m2m_forward(self):
     28        with self.assertNumQueries(2):
     29            lists = [list(b.authors.all()) for b in Book.objects.prefetch_related('authors')]
     30
     31        normal_lists = [list(b.authors.all()) for b in Book.objects.all()]
     32        self.assertEqual(lists, normal_lists)
     33
     34
     35    def test_m2m_reverse(self):
     36        with self.assertNumQueries(2):
     37            lists = [list(a.books.all()) for a in Author.objects.prefetch_related('books')]
     38
     39        normal_lists = [list(a.books.all()) for a in Author.objects.all()]
     40        self.assertEqual(lists, normal_lists)
     41
     42    def test_foreignkey_reverse(self):
     43        with self.assertNumQueries(2):
     44            lists = [list(b.first_time_authors.all())
     45                     for b in Book.objects.prefetch_related('first_time_authors')]
     46
     47    def test_survives_clone(self):
     48        with self.assertNumQueries(2):
     49            lists = [list(b.first_time_authors.all())
     50                     for b in Book.objects.prefetch_related('first_time_authors').exclude(id=1000)]
     51
     52    def test_len(self):
     53        with self.assertNumQueries(2):
     54            qs = Book.objects.prefetch_related('first_time_authors')
     55            length = len(qs)
     56            lists = [list(b.first_time_authors.all())
     57                     for b in qs]
     58
     59    def test_bool(self):
     60        with self.assertNumQueries(2):
     61            qs = Book.objects.prefetch_related('first_time_authors')
     62            x = bool(qs)
     63            lists = [list(b.first_time_authors.all())
     64                     for b in qs]
     65
     66    def test_clear(self):
     67        with self.assertNumQueries(4):
     68            with_prefetch = Author.objects.prefetch_related('books')
     69            without_prefetch = with_prefetch.prefetch_related()
     70            lists = [list(a.books.all()) for a in without_prefetch]
Back to Top