Ticket #1219: bulk-delete.patch

File bulk-delete.patch, 19.4 KB (added by Russell Keith-Magee, 19 years ago)

Patch implementing bulk delete in post-descriptor magic-removal

  • django/db/models/base.py

     
    33from django.db.models.fields import AutoField, ImageField
    44from django.db.models.fields.related import OneToOne, ManyToOne
    55from django.db.models.related import RelatedObject
    6 from django.db.models.query import orderlist2sql
     6from django.db.models.query import orderlist2sql, delete_objects
    77from django.db.models.options import Options, AdminOptions
    88from django.db import connection, backend
    99from django.db.models import signals
     
    4949        register_models(new_class._meta.app_label, new_class)
    5050        return new_class
    5151
    52 def cmp_cls(x, y):
    53     for field in x._meta.fields:
    54         if field.rel and not field.null and field.rel.to == y:
    55             return -1
    56     for field in y._meta.fields:
    57         if field.rel and not field.null and field.rel.to == x:
    58             return 1
    59     return 0
    60 
    6152class Model(object):
    6253    __metaclass__ = ModelBase
    6354
     
    187178
    188179    save.alters_data = True
    189180
    190     def __collect_sub_objects(self, seen_objs):
     181    def _collect_sub_objects(self, seen_objs):
    191182        """
    192183        Recursively populates seen_objs with all objects related to this object.
    193184        When done, seen_objs will be in the format:
     
    207198                except ObjectDoesNotExist:
    208199                    pass
    209200                else:
    210                     sub_obj.__collect_sub_objects(seen_objs)
     201                    sub_obj._collect_sub_objects(seen_objs)
    211202            else:
    212203                for sub_obj in getattr(self, rel_opts_name).all():
    213                     sub_obj.__collect_sub_objects(seen_objs)
     204                    sub_obj._collect_sub_objects(seen_objs)
    214205
    215206    def delete(self):
    216207        assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)
     208       
     209        # Find all the objects than need to be deleted
    217210        seen_objs = {}
    218         self.__collect_sub_objects(seen_objs)
    219 
    220         seen_classes = set(seen_objs.keys())
    221         ordered_classes = list(seen_classes)
    222         ordered_classes.sort(cmp_cls)
    223 
    224         cursor = connection.cursor()
    225 
    226         for cls in ordered_classes:
    227             seen_objs[cls] = seen_objs[cls].items()
    228             seen_objs[cls].sort()
    229             for pk_val, instance in seen_objs[cls]:
    230                 dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
    231 
    232                 for related in cls._meta.get_all_related_many_to_many_objects():
    233                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    234                         (backend.quote_name(related.field.get_m2m_db_table(related.opts)),
    235                         backend.quote_name(cls._meta.object_name.lower() + '_id')),
    236                         [pk_val])
    237                 for f in cls._meta.many_to_many:
    238                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    239                         (backend.quote_name(f.get_m2m_db_table(cls._meta)),
    240                         backend.quote_name(cls._meta.object_name.lower() + '_id')),
    241                         [pk_val])
    242                 for field in cls._meta.fields:
    243                     if field.rel and field.null and field.rel.to in seen_classes:
    244                         cursor.execute("UPDATE %s SET %s=NULL WHERE %s=%%s" % \
    245                             (backend.quote_name(cls._meta.db_table), backend.quote_name(field.column),
    246                             backend.quote_name(cls._meta.pk.column)), [pk_val])
    247                         setattr(instance, field.attname, None)
    248 
    249         for cls in ordered_classes:
    250             seen_objs[cls].reverse()
    251             for pk_val, instance in seen_objs[cls]:
    252                 cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    253                     (backend.quote_name(cls._meta.db_table), backend.quote_name(cls._meta.pk.column)),
    254                     [pk_val])
    255                 setattr(instance, cls._meta.pk.attname, None)
    256                 dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
    257 
    258         connection.commit()
    259 
     211        self._collect_sub_objects(seen_objs)
     212       
     213        # Actually delete the objects
     214        delete_objects(seen_objs)
     215       
    260216    delete.alters_data = True
    261217
    262218    def _get_FIELD_display(self, field):
  • django/db/models/manager.py

     
    5757    def dates(self, *args, **kwargs):
    5858        return self.get_query_set().dates(*args, **kwargs)
    5959
    60     def delete(self, *args, **kwargs):
    61         return self.get_query_set().delete(*args, **kwargs)
    62 
    6360    def distinct(self, *args, **kwargs):
    6461        return self.get_query_set().distinct(*args, **kwargs)
    6562
  • django/db/models/query.py

     
    11from django.db import backend, connection
    22from django.db.models.fields import DateField, FieldDoesNotExist
     3from django.db.models import signals
     4from django.dispatch import dispatcher
    35from django.utils.datastructures import SortedDict
     6
    47import operator
    58
    69LOOKUP_SEPARATOR = '__'
     
    125128        extra_select = self._select.items()
    126129
    127130        cursor = connection.cursor()
    128         select, sql, params = self._get_sql_clause(True)
     131        select, sql, params = self._get_sql_clause()
    129132        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
    130133        fill_cache = self._select_related
    131134        index_end = len(self.model._meta.fields)
     
    149152        counter._offset = None
    150153        counter._limit = None
    151154        counter._select_related = False
    152         select, sql, params = counter._get_sql_clause(True)
     155        select, sql, params = counter._get_sql_clause()
    153156        cursor = connection.cursor()
    154157        cursor.execute("SELECT COUNT(*)" + sql, params)
    155158        return cursor.fetchone()[0]
     
    171174        assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model"
    172175        return self._clone(_limit=1, _order_by=('-'+latest_by,)).get()
    173176
    174     def delete(self, *args, **kwargs):
     177    def delete(self):
    175178        """
    176         Deletes the records with the given kwargs. If no kwargs are given,
    177         deletes records in the current QuerySet.
     179        Deletes the records in the current QuerySet.
    178180        """
    179         # Remove the DELETE_ALL argument, if it exists.
    180         delete_all = kwargs.pop('DELETE_ALL', False)
     181        del_query = self._clone()       
    181182
    182         # Check for at least one query argument.
    183         if not kwargs and not delete_all:
    184             raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data."
    185 
    186         if kwargs:
    187             del_query = self.filter(*args, **kwargs)
    188         else:
    189             del_query = self._clone()
    190183        # disable non-supported fields
    191184        del_query._select_related = False
    192         del_query._select = {}
    193185        del_query._order_by = []
    194186        del_query._offset = None
    195187        del_query._limit = None
    196188
    197         # Perform the SQL delete
    198         cursor = connection.cursor()
    199         _, sql, params = del_query._get_sql_clause(False)
    200         cursor.execute("DELETE " + sql, params)
    201 
     189        # Collect all the objects to be deleted, and all the objects that are related to
     190        # the objects that are to be deleted
     191        seen_objs = {}
     192        for object in del_query:
     193            object._collect_sub_objects(seen_objs)
     194       
     195        # Delete the objects   
     196        delete_objects(seen_objs)
     197       
    202198    ##################################################
    203199    # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
    204200    ##################################################
     
    297293            self._result_cache = list(self.iterator())
    298294        return self._result_cache
    299295
    300     def _get_sql_clause(self, allow_joins):
     296    def _get_sql_clause(self):
    301297        opts = self.model._meta
    302298
    303299        # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z.
     
    325321        # Start composing the body of the SQL statement.
    326322        sql = [" FROM", backend.quote_name(opts.db_table)]
    327323
    328         # Check if extra tables are allowed. If not, throw an error
    329         if (tables or joins) and not allow_joins:
    330             raise TypeError, "Joins are not allowed in this type of query"
    331 
    332324        # Compose the join dictionary into SQL describing the joins.
    333325        if joins:
    334326            sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition)
     
    407399            field_names = [f.attname for f in self.model._meta.fields]
    408400
    409401        cursor = connection.cursor()
    410         select, sql, params = self._get_sql_clause(True)
     402        select, sql, params = self._get_sql_clause()
    411403        select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
    412404        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
    413405        while 1:
     
    429421        if self._field.null:
    430422            date_query._where.append('%s.%s IS NOT NULL' % \
    431423                (backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column)))
    432         select, sql, params = self._get_sql_clause(True)
     424        select, sql, params = self._get_sql_clause()
    433425        sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \
    434426            (backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table),
    435427            backend.quote_name(self._field.column))), sql, self._order)
     
    762754        params.extend(field.get_db_prep_lookup(clause, value))
    763755
    764756    return tables, joins, where, params
     757
     758def compare_models(x, y):
     759    "Comparator for Models that puts models in an order where dependencies are easily resolved."
     760    for field in x._meta.fields:
     761        if field.rel and not field.null and field.rel.to == y:
     762            return -1
     763    for field in y._meta.fields:
     764        if field.rel and not field.null and field.rel.to == x:
     765            return 1
     766    return 0
     767
     768def delete_objects(seen_objs):
     769    "Iterate through a list of seen classes, and remove any instances that are referred to"
     770    seen_classes = set(seen_objs.keys())
     771    ordered_classes = list(seen_classes)
     772    ordered_classes.sort(compare_models)
     773
     774    cursor = connection.cursor()
     775     
     776    for cls in ordered_classes:
     777        seen_objs[cls] = seen_objs[cls].items()
     778        seen_objs[cls].sort()
     779   
     780        # Pre notify all instances to be deleted
     781        for pk_val, instance in seen_objs[cls]:
     782            dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
     783
     784        pk_list = [pk for pk,instance in seen_objs[cls]]
     785        for related in cls._meta.get_all_related_many_to_many_objects():
     786            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     787                (backend.quote_name(related.field.get_m2m_db_table(related.opts)),
     788                    backend.quote_name(cls._meta.object_name.lower() + '_id'),
     789                    ','.join('%s' for pk in pk_list)),
     790                pk_list)
     791        for f in cls._meta.many_to_many:
     792            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     793                (backend.quote_name(f.get_m2m_db_table(cls._meta)),
     794                    backend.quote_name(cls._meta.object_name.lower() + '_id'),
     795                    ','.join(['%s' for pk in pk_list])),
     796                pk_list)
     797        for field in cls._meta.fields:
     798            if field.rel and field.null and field.rel.to in seen_classes:
     799                cursor.execute("UPDATE %s SET %s=NULL WHERE %s IN (%s)" % \
     800                    (backend.quote_name(cls._meta.db_table),
     801                        backend.quote_name(field.column),
     802                        backend.quote_name(cls._meta.pk.column),
     803                        ','.join(['%s' for pk in pk_list])),
     804                    pk_list)
     805
     806    # Now delete the actual data
     807    for cls in ordered_classes:
     808        seen_objs[cls].reverse()
     809        pk_list = [pk for pk,instance in seen_objs[cls]]
     810       
     811        cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     812            (backend.quote_name(cls._meta.db_table),
     813                backend.quote_name(cls._meta.pk.column),
     814                ','.join(['%s' for pk in pk_list])),
     815            pk_list)
     816               
     817        # Last cleanup; set NULLs where there once was a reference to the object,
     818        # NULL the primary key of the found objects, and perform post-notification.
     819        for pk_val, instance in seen_objs[cls]:
     820            for field in cls._meta.fields:
     821                if field.rel and field.null and field.rel.to in seen_classes:
     822                    setattr(instance, field.attname, None)
     823
     824            setattr(instance, cls._meta.pk.attname, None)
     825            dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
     826
     827    connection.commit()
  • tests/modeltests/basic/models.py

     
    99class Article(models.Model):
    1010    headline = models.CharField(maxlength=100, default='Default headline')
    1111    pub_date = models.DateTimeField()
    12 
     12   
     13    def __repr__(self):
     14        return self.headline
    1315API_TESTS = """
    1416
    1517# No articles are in the system yet.
     
    4143# the article is represented by "<Article object>", because we haven't given
    4244# the Article model a __repr__() method.
    4345>>> Article.objects.all()
    44 [<Article object>]
     46[Area woman programs in Python]
    4547
    4648# Django provides a rich database lookup API.
    4749>>> Article.objects.get(id__exact=1)
    48 <Article object>
     50Area woman programs in Python
    4951>>> Article.objects.get(headline__startswith='Area woman')
    50 <Article object>
     52Area woman programs in Python
    5153>>> Article.objects.get(pub_date__year=2005)
    52 <Article object>
     54Area woman programs in Python
    5355>>> Article.objects.get(pub_date__year=2005, pub_date__month=7)
    54 <Article object>
     56Area woman programs in Python
    5557>>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28)
    56 <Article object>
     58Area woman programs in Python
    5759
    5860# The "__exact" lookup type can be omitted, as a shortcut.
    5961>>> Article.objects.get(id=1)
    60 <Article object>
     62Area woman programs in Python
    6163>>> Article.objects.get(headline='Area woman programs in Python')
    62 <Article object>
     64Area woman programs in Python
    6365
    6466>>> Article.objects.filter(pub_date__year=2005)
    65 [<Article object>]
     67[Area woman programs in Python]
    6668>>> Article.objects.filter(pub_date__year=2004)
    6769[]
    6870>>> Article.objects.filter(pub_date__year=2005, pub_date__month=7)
    69 [<Article object>]
     71[Area woman programs in Python]
    7072
    7173# Django raises an Article.DoesNotExist exception for get() if the parameters
    7274# don't match any object.
     
    8486# shortcut for primary-key exact lookups.
    8587# The following is identical to articles.get(id=1).
    8688>>> Article.objects.get(pk=1)
    87 <Article object>
     89Area woman programs in Python
    8890
    8991# Model instances of the same type and same ID are considered equal.
    9092>>> a = Article.objects.get(pk=1)
     
    234236
    235237# You can get items using index and slice notation.
    236238>>> Article.objects.all()[0]
    237 <Article object>
     239Area woman programs in Python
    238240>>> Article.objects.all()[1:3]
    239 [<Article object>, <Article object>]
     241[Second article, Third article]
    240242>>> s3 = Article.objects.filter(id__exact=3)
    241243>>> (s1 | s2 | s3)[::2]
    242 [<Article object>, <Article object>]
     244[Area woman programs in Python, Third article]
    243245
    244246# An Article instance doesn't have access to the "objects" attribute.
    245247# That's only available on the class.
     
    254256AttributeError: Manager isn't accessible via Article instances
    255257
    256258# Bulk delete test: How many objects before and after the delete?
    257 >>> Article.objects.count()
    258 8L
    259 >>> Article.objects.delete(id__lte=4)
    260 >>> Article.objects.count()
    261 4L
     259>>> Article.objects.all()
     260[Area woman programs in Python, Second article, Third article, Fourth article, Article 6, Default headline, Article 7, Updated article 8]
     261>>> Article.objects.filter(id__lte=4).delete()
     262>>> Article.objects.all()
     263[Article 6, Default headline, Article 7, Updated article 8]
    262264
    263 >>> Article.objects.delete()
    264 Traceback (most recent call last):
    265     ...
    266 TypeError: SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data.
    267 
    268 >>> Article.objects.delete(DELETE_ALL=True)
    269 >>> Article.objects.count()
    270 0L
    271 
    272265"""
    273266
    274267from django.conf import settings
  • tests/modeltests/many_to_many/models.py

     
    162162>>> p2.article_set.all().order_by('headline')
    163163[Oxygen-free diet works wonders]
    164164
     165# Recreate the article and Publication we just deleted.
     166>>> p1 = Publication(id=None, title='The Python Journal')
     167>>> p1.save()
     168>>> a2 = Article(id=None, headline='NASA uses Python')
     169>>> a2.save()
     170>>> a2.publications.add(p1, p2, p3)
    165171
     172# Bulk delete some Publications - references to deleted publications should go
     173>>> Publication.objects.filter(title__startswith='Science').delete()
     174>>> Publication.objects.all()
     175[Highlights for Children, The Python Journal]
     176>>> Article.objects.all()
     177[Django lets you build Web apps easily, NASA finds intelligent life on Earth, Oxygen-free diet works wonders, NASA uses Python]
     178>>> a2.publications.all()
     179[The Python Journal]
    166180
     181# Bulk delete some articles - references to deleted objects should go
     182>>> Article.objects.filter(headline__startswith='Django').delete()
     183>>> p1.article_set.all()
     184[NASA uses Python]
     185
     186
    167187"""
  • tests/modeltests/many_to_one/models.py

     
    9494
    9595# The underlying query only makes one join when a related table is referenced twice.
    9696>>> query = Article.objects.filter(reporter__first_name__exact='John', reporter__last_name__exact='Smith')
    97 >>> null, sql, null = query._get_sql_clause(True)
     97>>> null, sql, null = query._get_sql_clause()
    9898>>> sql.count('INNER JOIN')
    99991
    100100
     
    163163>>> Reporter.objects.filter(article__reporter__first_name__startswith='John').distinct()
    164164[John Smith]
    165165
    166 # Deletes that require joins are prohibited.
    167 >>> Article.objects.delete(reporter__first_name__startswith='Jo')
    168 Traceback (most recent call last):
    169     ...
    170 TypeError: Joins are not allowed in this type of query
    171 
    172166# If you delete a reporter, his articles will be deleted.
    173167>>> Article.objects.order_by('headline')
    174168[John's second story, Paul's story, This is a test, This is a test, This is a test]
    175169>>> Reporter.objects.order_by('first_name')
    176170[John Smith, Paul Jones]
    177 >>> r.delete()
     171>>> r2.delete()
    178172>>> Article.objects.order_by('headline')
    179 [Paul's story]
     173[John's second story, This is a test, This is a test, This is a test]
    180174>>> Reporter.objects.order_by('first_name')
    181 [Paul Jones]
     175[John Smith]
    182176
     177# Deletes using a join in the query
     178>>> Reporter.objects.filter(article__headline__startswith='This').delete()
     179>>> Reporter.objects.all()
     180[]
     181>>> Article.objects.all()
     182[]
     183
    183184"""
Back to Top