Ticket #1219: bulk-delete.patch

File bulk-delete.patch, 19.4 KB (added by Russell Keith-Magee, 19 years ago)

Patch implementing bulk delete in post-descriptor magic-removal

  • django/db/models/base.py

    33from django.db.models.fields import AutoField, ImageField
    44from django.db.models.fields.related import OneToOne, ManyToOne
    55from django.db.models.related import RelatedObject
    6 from django.db.models.query import orderlist2sql
     6from django.db.models.query import orderlist2sql, delete_objects
    77from django.db.models.options import Options, AdminOptions
    88from django.db import connection, backend
    99from django.db.models import signals
    4949        register_models(new_class._meta.app_label, new_class)
    5050        return new_class
    52 def cmp_cls(x, y):
    53     for field in x._meta.fields:
    54         if field.rel and not field.null and field.rel.to == y:
    55             return -1
    56     for field in y._meta.fields:
    57         if field.rel and not field.null and field.rel.to == x:
    58             return 1
    59     return 0
    6152class Model(object):
    6253    __metaclass__ = ModelBase
    188179    save.alters_data = True
    190     def __collect_sub_objects(self, seen_objs):
     181    def _collect_sub_objects(self, seen_objs):
    191182        """
    192183        Recursively populates seen_objs with all objects related to this object.
    193184        When done, seen_objs will be in the format:
    207198                except ObjectDoesNotExist:
    208199                    pass
    209200                else:
    210                     sub_obj.__collect_sub_objects(seen_objs)
     201                    sub_obj._collect_sub_objects(seen_objs)
    211202            else:
    212203                for sub_obj in getattr(self, rel_opts_name).all():
    213                     sub_obj.__collect_sub_objects(seen_objs)
     204                    sub_obj._collect_sub_objects(seen_objs)
    215206    def delete(self):
    216207        assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)
     209        # Find all the objects than need to be deleted
    217210        seen_objs = {}
    218         self.__collect_sub_objects(seen_objs)
    220         seen_classes = set(seen_objs.keys())
    221         ordered_classes = list(seen_classes)
    222         ordered_classes.sort(cmp_cls)
    224         cursor = connection.cursor()
    226         for cls in ordered_classes:
    227             seen_objs[cls] = seen_objs[cls].items()
    228             seen_objs[cls].sort()
    229             for pk_val, instance in seen_objs[cls]:
    230                 dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
    232                 for related in cls._meta.get_all_related_many_to_many_objects():
    233                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    234                         (backend.quote_name(related.field.get_m2m_db_table(related.opts)),
    235                         backend.quote_name(cls._meta.object_name.lower() + '_id')),
    236                         [pk_val])
    237                 for f in cls._meta.many_to_many:
    238                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    239                         (backend.quote_name(f.get_m2m_db_table(cls._meta)),
    240                         backend.quote_name(cls._meta.object_name.lower() + '_id')),
    241                         [pk_val])
    242                 for field in cls._meta.fields:
    243                     if field.rel and field.null and field.rel.to in seen_classes:
    244                         cursor.execute("UPDATE %s SET %s=NULL WHERE %s=%%s" % \
    245                             (backend.quote_name(cls._meta.db_table), backend.quote_name(field.column),
    246                             backend.quote_name(cls._meta.pk.column)), [pk_val])
    247                         setattr(instance, field.attname, None)
    249         for cls in ordered_classes:
    250             seen_objs[cls].reverse()
    251             for pk_val, instance in seen_objs[cls]:
    252                 cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    253                     (backend.quote_name(cls._meta.db_table), backend.quote_name(cls._meta.pk.column)),
    254                     [pk_val])
    255                 setattr(instance, cls._meta.pk.attname, None)
    256                 dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
    258         connection.commit()
     211        self._collect_sub_objects(seen_objs)
     213        # Actually delete the objects
     214        delete_objects(seen_objs)
    260216    delete.alters_data = True
    262218    def _get_FIELD_display(self, field):
  • django/db/models/manager.py

    5757    def dates(self, *args, **kwargs):
    5858        return self.get_query_set().dates(*args, **kwargs)
    60     def delete(self, *args, **kwargs):
    61         return self.get_query_set().delete(*args, **kwargs)
    6360    def distinct(self, *args, **kwargs):
    6461        return self.get_query_set().distinct(*args, **kwargs)
  • django/db/models/query.py

    11from django.db import backend, connection
    22from django.db.models.fields import DateField, FieldDoesNotExist
     3from django.db.models import signals
     4from django.dispatch import dispatcher
    35from django.utils.datastructures import SortedDict
    47import operator
    125128        extra_select = self._select.items()
    127130        cursor = connection.cursor()
    128         select, sql, params = self._get_sql_clause(True)
     131        select, sql, params = self._get_sql_clause()
    129132        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
    130133        fill_cache = self._select_related
    131134        index_end = len(self.model._meta.fields)
    149152        counter._offset = None
    150153        counter._limit = None
    151154        counter._select_related = False
    152         select, sql, params = counter._get_sql_clause(True)
     155        select, sql, params = counter._get_sql_clause()
    153156        cursor = connection.cursor()
    154157        cursor.execute("SELECT COUNT(*)" + sql, params)
    155158        return cursor.fetchone()[0]
    171174        assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model"
    172175        return self._clone(_limit=1, _order_by=('-'+latest_by,)).get()
    174     def delete(self, *args, **kwargs):
     177    def delete(self):
    175178        """
    176         Deletes the records with the given kwargs. If no kwargs are given,
    177         deletes records in the current QuerySet.
     179        Deletes the records in the current QuerySet.
    178180        """
    179         # Remove the DELETE_ALL argument, if it exists.
    180         delete_all = kwargs.pop('DELETE_ALL', False)
     181        del_query = self._clone()       
    182         # Check for at least one query argument.
    183         if not kwargs and not delete_all:
    184             raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data."
    186         if kwargs:
    187             del_query = self.filter(*args, **kwargs)
    188         else:
    189             del_query = self._clone()
    190183        # disable non-supported fields
    191184        del_query._select_related = False
    192         del_query._select = {}
    193185        del_query._order_by = []
    194186        del_query._offset = None
    195187        del_query._limit = None
    197         # Perform the SQL delete
    198         cursor = connection.cursor()
    199         _, sql, params = del_query._get_sql_clause(False)
    200         cursor.execute("DELETE " + sql, params)
     189        # Collect all the objects to be deleted, and all the objects that are related to
     190        # the objects that are to be deleted
     191        seen_objs = {}
     192        for object in del_query:
     193            object._collect_sub_objects(seen_objs)
     195        # Delete the objects   
     196        delete_objects(seen_objs)
    202198    ##################################################
    204200    ##################################################
    297293            self._result_cache = list(self.iterator())
    298294        return self._result_cache
    300     def _get_sql_clause(self, allow_joins):
     296    def _get_sql_clause(self):
    301297        opts = self.model._meta
    303299        # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z.
    325321        # Start composing the body of the SQL statement.
    326322        sql = [" FROM", backend.quote_name(opts.db_table)]
    328         # Check if extra tables are allowed. If not, throw an error
    329         if (tables or joins) and not allow_joins:
    330             raise TypeError, "Joins are not allowed in this type of query"
    332324        # Compose the join dictionary into SQL describing the joins.
    333325        if joins:
    334326            sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition)
    407399            field_names = [f.attname for f in self.model._meta.fields]
    409401        cursor = connection.cursor()
    410         select, sql, params = self._get_sql_clause(True)
     402        select, sql, params = self._get_sql_clause()
    411403        select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
    412404        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
    413405        while 1:
    429421        if self._field.null:
    430422            date_query._where.append('%s.%s IS NOT NULL' % \
    431423                (backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column)))
    432         select, sql, params = self._get_sql_clause(True)
     424        select, sql, params = self._get_sql_clause()
    433425        sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \
    434426            (backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table),
    435427            backend.quote_name(self._field.column))), sql, self._order)
    762754        params.extend(field.get_db_prep_lookup(clause, value))
    764756    return tables, joins, where, params
     758def compare_models(x, y):
     759    "Comparator for Models that puts models in an order where dependencies are easily resolved."
     760    for field in x._meta.fields:
     761        if field.rel and not field.null and field.rel.to == y:
     762            return -1
     763    for field in y._meta.fields:
     764        if field.rel and not field.null and field.rel.to == x:
     765            return 1
     766    return 0
     768def delete_objects(seen_objs):
     769    "Iterate through a list of seen classes, and remove any instances that are referred to"
     770    seen_classes = set(seen_objs.keys())
     771    ordered_classes = list(seen_classes)
     772    ordered_classes.sort(compare_models)
     774    cursor = connection.cursor()
     776    for cls in ordered_classes:
     777        seen_objs[cls] = seen_objs[cls].items()
     778        seen_objs[cls].sort()
     780        # Pre notify all instances to be deleted
     781        for pk_val, instance in seen_objs[cls]:
     782            dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
     784        pk_list = [pk for pk,instance in seen_objs[cls]]
     785        for related in cls._meta.get_all_related_many_to_many_objects():
     786            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     787                (backend.quote_name(related.field.get_m2m_db_table(related.opts)),
     788                    backend.quote_name(cls._meta.object_name.lower() + '_id'),
     789                    ','.join('%s' for pk in pk_list)),
     790                pk_list)
     791        for f in cls._meta.many_to_many:
     792            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     793                (backend.quote_name(f.get_m2m_db_table(cls._meta)),
     794                    backend.quote_name(cls._meta.object_name.lower() + '_id'),
     795                    ','.join(['%s' for pk in pk_list])),
     796                pk_list)
     797        for field in cls._meta.fields:
     798            if field.rel and field.null and field.rel.to in seen_classes:
     799                cursor.execute("UPDATE %s SET %s=NULL WHERE %s IN (%s)" % \
     800                    (backend.quote_name(cls._meta.db_table),
     801                        backend.quote_name(field.column),
     802                        backend.quote_name(cls._meta.pk.column),
     803                        ','.join(['%s' for pk in pk_list])),
     804                    pk_list)
     806    # Now delete the actual data
     807    for cls in ordered_classes:
     808        seen_objs[cls].reverse()
     809        pk_list = [pk for pk,instance in seen_objs[cls]]
     811        cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     812            (backend.quote_name(cls._meta.db_table),
     813                backend.quote_name(cls._meta.pk.column),
     814                ','.join(['%s' for pk in pk_list])),
     815            pk_list)
     817        # Last cleanup; set NULLs where there once was a reference to the object,
     818        # NULL the primary key of the found objects, and perform post-notification.
     819        for pk_val, instance in seen_objs[cls]:
     820            for field in cls._meta.fields:
     821                if field.rel and field.null and field.rel.to in seen_classes:
     822                    setattr(instance, field.attname, None)
     824            setattr(instance, cls._meta.pk.attname, None)
     825            dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
     827    connection.commit()
  • tests/modeltests/basic/models.py

    99class Article(models.Model):
    1010    headline = models.CharField(maxlength=100, default='Default headline')
    1111    pub_date = models.DateTimeField()
     13    def __repr__(self):
     14        return self.headline
    1315API_TESTS = """
    1517# No articles are in the system yet.
    4143# the article is represented by "<Article object>", because we haven't given
    4244# the Article model a __repr__() method.
    4345>>> Article.objects.all()
    44 [<Article object>]
     46[Area woman programs in Python]
    4648# Django provides a rich database lookup API.
    4749>>> Article.objects.get(id__exact=1)
    48 <Article object>
     50Area woman programs in Python
    4951>>> Article.objects.get(headline__startswith='Area woman')
    50 <Article object>
     52Area woman programs in Python
    5153>>> Article.objects.get(pub_date__year=2005)
    52 <Article object>
     54Area woman programs in Python
    5355>>> Article.objects.get(pub_date__year=2005, pub_date__month=7)
    54 <Article object>
     56Area woman programs in Python
    5557>>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28)
    56 <Article object>
     58Area woman programs in Python
    5860# The "__exact" lookup type can be omitted, as a shortcut.
    5961>>> Article.objects.get(id=1)
    60 <Article object>
     62Area woman programs in Python
    6163>>> Article.objects.get(headline='Area woman programs in Python')
    62 <Article object>
     64Area woman programs in Python
    6466>>> Article.objects.filter(pub_date__year=2005)
    65 [<Article object>]
     67[Area woman programs in Python]
    6668>>> Article.objects.filter(pub_date__year=2004)
    6870>>> Article.objects.filter(pub_date__year=2005, pub_date__month=7)
    69 [<Article object>]
     71[Area woman programs in Python]
    7173# Django raises an Article.DoesNotExist exception for get() if the parameters
    7274# don't match any object.
    8486# shortcut for primary-key exact lookups.
    8587# The following is identical to articles.get(id=1).
    8688>>> Article.objects.get(pk=1)
    87 <Article object>
     89Area woman programs in Python
    8991# Model instances of the same type and same ID are considered equal.
    9092>>> a = Article.objects.get(pk=1)
    235237# You can get items using index and slice notation.
    236238>>> Article.objects.all()[0]
    237 <Article object>
     239Area woman programs in Python
    238240>>> Article.objects.all()[1:3]
    239 [<Article object>, <Article object>]
     241[Second article, Third article]
    240242>>> s3 = Article.objects.filter(id__exact=3)
    241243>>> (s1 | s2 | s3)[::2]
    242 [<Article object>, <Article object>]
     244[Area woman programs in Python, Third article]
    244246# An Article instance doesn't have access to the "objects" attribute.
    245247# That's only available on the class.
    254256AttributeError: Manager isn't accessible via Article instances
    256258# Bulk delete test: How many objects before and after the delete?
    257 >>> Article.objects.count()
    258 8L
    259 >>> Article.objects.delete(id__lte=4)
    260 >>> Article.objects.count()
    261 4L
     259>>> Article.objects.all()
     260[Area woman programs in Python, Second article, Third article, Fourth article, Article 6, Default headline, Article 7, Updated article 8]
     261>>> Article.objects.filter(id__lte=4).delete()
     262>>> Article.objects.all()
     263[Article 6, Default headline, Article 7, Updated article 8]
    263 >>> Article.objects.delete()
    264 Traceback (most recent call last):
    265     ...
    266 TypeError: SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data.
    268 >>> Article.objects.delete(DELETE_ALL=True)
    269 >>> Article.objects.count()
    270 0L
    274267from django.conf import settings
  • tests/modeltests/many_to_many/models.py

    162162>>> p2.article_set.all().order_by('headline')
    163163[Oxygen-free diet works wonders]
     165# Recreate the article and Publication we just deleted.
     166>>> p1 = Publication(id=None, title='The Python Journal')
     167>>> p1.save()
     168>>> a2 = Article(id=None, headline='NASA uses Python')
     169>>> a2.save()
     170>>> a2.publications.add(p1, p2, p3)
     172# Bulk delete some Publications - references to deleted publications should go
     173>>> Publication.objects.filter(title__startswith='Science').delete()
     174>>> Publication.objects.all()
     175[Highlights for Children, The Python Journal]
     176>>> Article.objects.all()
     177[Django lets you build Web apps easily, NASA finds intelligent life on Earth, Oxygen-free diet works wonders, NASA uses Python]
     178>>> a2.publications.all()
     179[The Python Journal]
     181# Bulk delete some articles - references to deleted objects should go
     182>>> Article.objects.filter(headline__startswith='Django').delete()
     183>>> p1.article_set.all()
     184[NASA uses Python]
  • tests/modeltests/many_to_one/models.py

    9595# The underlying query only makes one join when a related table is referenced twice.
    9696>>> query = Article.objects.filter(reporter__first_name__exact='John', reporter__last_name__exact='Smith')
    97 >>> null, sql, null = query._get_sql_clause(True)
     97>>> null, sql, null = query._get_sql_clause()
    9898>>> sql.count('INNER JOIN')
    163163>>> Reporter.objects.filter(article__reporter__first_name__startswith='John').distinct()
    164164[John Smith]
    166 # Deletes that require joins are prohibited.
    167 >>> Article.objects.delete(reporter__first_name__startswith='Jo')
    168 Traceback (most recent call last):
    169     ...
    170 TypeError: Joins are not allowed in this type of query
    172166# If you delete a reporter, his articles will be deleted.
    173167>>> Article.objects.order_by('headline')
    174168[John's second story, Paul's story, This is a test, This is a test, This is a test]
    175169>>> Reporter.objects.order_by('first_name')
    176170[John Smith, Paul Jones]
    177 >>> r.delete()
     171>>> r2.delete()
    178172>>> Article.objects.order_by('headline')
    179 [Paul's story]
     173[John's second story, This is a test, This is a test, This is a test]
    180174>>> Reporter.objects.order_by('first_name')
    181 [Paul Jones]
     175[John Smith]
     177# Deletes using a join in the query
     178>>> Reporter.objects.filter(article__headline__startswith='This').delete()
     179>>> Reporter.objects.all()
     181>>> Article.objects.all()
