Ticket #1219: bulk-delete-v2.patch

File bulk-delete-v2.patch, 20.0 KB (added by Russell Keith-Magee, 19 years ago)

Patch implementing bulk delete in post-descriptor magic-removal

  • django/db/models/base.py

     
    33from django.db.models.fields import AutoField, ImageField
    44from django.db.models.fields.related import OneToOne, ManyToOne
    55from django.db.models.related import RelatedObject
    6 from django.db.models.query import orderlist2sql
     6from django.db.models.query import orderlist2sql, delete_objects
    77from django.db.models.options import Options, AdminOptions
    88from django.db import connection, backend
    99from django.db.models import signals
     
    4949        register_models(new_class._meta.app_label, new_class)
    5050        return new_class
    5151
    52 def cmp_cls(x, y):
    53     for field in x._meta.fields:
    54         if field.rel and not field.null and field.rel.to == y:
    55             return -1
    56     for field in y._meta.fields:
    57         if field.rel and not field.null and field.rel.to == x:
    58             return 1
    59     return 0
    60 
    6152class Model(object):
    6253    __metaclass__ = ModelBase
    6354
     
    187178
    188179    save.alters_data = True
    189180
    190     def __collect_sub_objects(self, seen_objs):
     181    def _collect_sub_objects(self, seen_objs):
    191182        """
    192183        Recursively populates seen_objs with all objects related to this object.
    193184        When done, seen_objs will be in the format:
     
    207198                except ObjectDoesNotExist:
    208199                    pass
    209200                else:
    210                     sub_obj.__collect_sub_objects(seen_objs)
     201                    sub_obj._collect_sub_objects(seen_objs)
    211202            else:
    212203                for sub_obj in getattr(self, rel_opts_name).all():
    213                     sub_obj.__collect_sub_objects(seen_objs)
     204                    sub_obj._collect_sub_objects(seen_objs)
    214205
    215206    def delete(self):
    216207        assert self._get_pk_val() is not None, "%s object can't be deleted because its %s attribute is set to None." % (self._meta.object_name, self._meta.pk.attname)
     208       
     209        # Find all the objects than need to be deleted
    217210        seen_objs = {}
    218         self.__collect_sub_objects(seen_objs)
    219 
    220         seen_classes = set(seen_objs.keys())
    221         ordered_classes = list(seen_classes)
    222         ordered_classes.sort(cmp_cls)
    223 
    224         cursor = connection.cursor()
    225 
    226         for cls in ordered_classes:
    227             seen_objs[cls] = seen_objs[cls].items()
    228             seen_objs[cls].sort()
    229             for pk_val, instance in seen_objs[cls]:
    230                 dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
    231 
    232                 for related in cls._meta.get_all_related_many_to_many_objects():
    233                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    234                         (backend.quote_name(related.field.get_m2m_db_table(related.opts)),
    235                         backend.quote_name(cls._meta.object_name.lower() + '_id')),
    236                         [pk_val])
    237                 for f in cls._meta.many_to_many:
    238                     cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    239                         (backend.quote_name(f.get_m2m_db_table(cls._meta)),
    240                         backend.quote_name(cls._meta.object_name.lower() + '_id')),
    241                         [pk_val])
    242                 for field in cls._meta.fields:
    243                     if field.rel and field.null and field.rel.to in seen_classes:
    244                         cursor.execute("UPDATE %s SET %s=NULL WHERE %s=%%s" % \
    245                             (backend.quote_name(cls._meta.db_table), backend.quote_name(field.column),
    246                             backend.quote_name(cls._meta.pk.column)), [pk_val])
    247                         setattr(instance, field.attname, None)
    248 
    249         for cls in ordered_classes:
    250             seen_objs[cls].reverse()
    251             for pk_val, instance in seen_objs[cls]:
    252                 cursor.execute("DELETE FROM %s WHERE %s=%%s" % \
    253                     (backend.quote_name(cls._meta.db_table), backend.quote_name(cls._meta.pk.column)),
    254                     [pk_val])
    255                 setattr(instance, cls._meta.pk.attname, None)
    256                 dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
    257 
    258         connection.commit()
    259 
     211        self._collect_sub_objects(seen_objs)
     212       
     213        # Actually delete the objects
     214        delete_objects(seen_objs)
     215       
    260216    delete.alters_data = True
    261217
    262218    def _get_FIELD_display(self, field):
  • django/db/models/manager.py

     
    5757    def dates(self, *args, **kwargs):
    5858        return self.get_query_set().dates(*args, **kwargs)
    5959
    60     def delete(self, *args, **kwargs):
    61         return self.get_query_set().delete(*args, **kwargs)
    62 
    6360    def distinct(self, *args, **kwargs):
    6461        return self.get_query_set().distinct(*args, **kwargs)
    6562
  • django/db/models/query.py

     
    11from django.db import backend, connection
    22from django.db.models.fields import DateField, FieldDoesNotExist
     3from django.db.models import signals
     4from django.dispatch import dispatcher
    35from django.utils.datastructures import SortedDict
     6
    47import operator
    58
    69LOOKUP_SEPARATOR = '__'
     
    125128        extra_select = self._select.items()
    126129
    127130        cursor = connection.cursor()
    128         select, sql, params = self._get_sql_clause(True)
     131        select, sql, params = self._get_sql_clause()
    129132        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
    130133        fill_cache = self._select_related
    131134        index_end = len(self.model._meta.fields)
     
    149152        counter._offset = None
    150153        counter._limit = None
    151154        counter._select_related = False
    152         select, sql, params = counter._get_sql_clause(True)
     155        select, sql, params = counter._get_sql_clause()
    153156        cursor = connection.cursor()
    154157        cursor.execute("SELECT COUNT(*)" + sql, params)
    155158        return cursor.fetchone()[0]
     
    171174        assert bool(latest_by), "latest() requires either a field_name parameter or 'get_latest_by' in the model"
    172175        return self._clone(_limit=1, _order_by=('-'+latest_by,)).get()
    173176
    174     def delete(self, *args, **kwargs):
     177    def delete(self):
    175178        """
    176         Deletes the records with the given kwargs. If no kwargs are given,
    177         deletes records in the current QuerySet.
     179        Deletes the records in the current QuerySet.
    178180        """
    179         # Remove the DELETE_ALL argument, if it exists.
    180         delete_all = kwargs.pop('DELETE_ALL', False)
     181        del_query = self._clone()       
    181182
    182         # Check for at least one query argument.
    183         if not kwargs and not delete_all:
    184             raise TypeError, "SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data."
    185 
    186         if kwargs:
    187             del_query = self.filter(*args, **kwargs)
    188         else:
    189             del_query = self._clone()
    190183        # disable non-supported fields
    191184        del_query._select_related = False
    192         del_query._select = {}
    193185        del_query._order_by = []
    194186        del_query._offset = None
    195187        del_query._limit = None
    196188
    197         # Perform the SQL delete
    198         cursor = connection.cursor()
    199         _, sql, params = del_query._get_sql_clause(False)
    200         cursor.execute("DELETE " + sql, params)
    201 
     189        # Collect all the objects to be deleted, and all the objects that are related to
     190        # the objects that are to be deleted
     191        seen_objs = {}
     192        for object in del_query:
     193            object._collect_sub_objects(seen_objs)
     194       
     195        # Delete the objects   
     196        delete_objects(seen_objs)
     197       
     198        # Clear the result cache, in case this QuerySet gets reused.
     199        self._result_cache = None
     200    delete.alters_data = True
     201       
    202202    ##################################################
    203203    # PUBLIC METHODS THAT RETURN A QUERYSET SUBCLASS #
    204204    ##################################################
     
    297297            self._result_cache = list(self.iterator())
    298298        return self._result_cache
    299299
    300     def _get_sql_clause(self, allow_joins):
     300    def _get_sql_clause(self):
    301301        opts = self.model._meta
    302302
    303303        # Construct the fundamental parts of the query: SELECT X FROM Y WHERE Z.
     
    325325        # Start composing the body of the SQL statement.
    326326        sql = [" FROM", backend.quote_name(opts.db_table)]
    327327
    328         # Check if extra tables are allowed. If not, throw an error
    329         if (tables or joins) and not allow_joins:
    330             raise TypeError, "Joins are not allowed in this type of query"
    331 
    332328        # Compose the join dictionary into SQL describing the joins.
    333329        if joins:
    334330            sql.append(" ".join(["%s %s AS %s ON %s" % (join_type, table, alias, condition)
     
    407403            field_names = [f.attname for f in self.model._meta.fields]
    408404
    409405        cursor = connection.cursor()
    410         select, sql, params = self._get_sql_clause(True)
     406        select, sql, params = self._get_sql_clause()
    411407        select = ['%s.%s' % (backend.quote_name(self.model._meta.db_table), backend.quote_name(c)) for c in columns]
    412408        cursor.execute("SELECT " + (self._distinct and "DISTINCT " or "") + ",".join(select) + sql, params)
    413409        while 1:
     
    429425        if self._field.null:
    430426            date_query._where.append('%s.%s IS NOT NULL' % \
    431427                (backend.quote_name(self.model._meta.db_table), backend.quote_name(self._field.column)))
    432         select, sql, params = self._get_sql_clause(True)
     428        select, sql, params = self._get_sql_clause()
    433429        sql = 'SELECT %s %s GROUP BY 1 ORDER BY 1 %s' % \
    434430            (backend.get_date_trunc_sql(self._kind, '%s.%s' % (backend.quote_name(self.model._meta.db_table),
    435431            backend.quote_name(self._field.column))), sql, self._order)
     
    762758        params.extend(field.get_db_prep_lookup(clause, value))
    763759
    764760    return tables, joins, where, params
     761
     762def compare_models(x, y):
     763    "Comparator for Models that puts models in an order where dependencies are easily resolved."
     764    for field in x._meta.fields:
     765        if field.rel and not field.null and field.rel.to == y:
     766            return -1
     767    for field in y._meta.fields:
     768        if field.rel and not field.null and field.rel.to == x:
     769            return 1
     770    return 0
     771
     772def delete_objects(seen_objs):
     773    "Iterate through a list of seen classes, and remove any instances that are referred to"
     774    seen_classes = set(seen_objs.keys())
     775    ordered_classes = list(seen_classes)
     776    ordered_classes.sort(compare_models)
     777
     778    cursor = connection.cursor()
     779     
     780    for cls in ordered_classes:
     781        seen_objs[cls] = seen_objs[cls].items()
     782        seen_objs[cls].sort()
     783   
     784        # Pre notify all instances to be deleted
     785        for pk_val, instance in seen_objs[cls]:
     786            dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
     787
     788        pk_list = [pk for pk,instance in seen_objs[cls]]
     789        for related in cls._meta.get_all_related_many_to_many_objects():
     790            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     791                (backend.quote_name(related.field.get_m2m_db_table(related.opts)),
     792                    backend.quote_name(cls._meta.object_name.lower() + '_id'),
     793                    ','.join('%s' for pk in pk_list)),
     794                pk_list)
     795        for f in cls._meta.many_to_many:
     796            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     797                (backend.quote_name(f.get_m2m_db_table(cls._meta)),
     798                    backend.quote_name(cls._meta.object_name.lower() + '_id'),
     799                    ','.join(['%s' for pk in pk_list])),
     800                pk_list)
     801        for field in cls._meta.fields:
     802            if field.rel and field.null and field.rel.to in seen_classes:
     803                cursor.execute("UPDATE %s SET %s=NULL WHERE %s IN (%s)" % \
     804                    (backend.quote_name(cls._meta.db_table),
     805                        backend.quote_name(field.column),
     806                        backend.quote_name(cls._meta.pk.column),
     807                        ','.join(['%s' for pk in pk_list])),
     808                    pk_list)
     809
     810    # Now delete the actual data
     811    for cls in ordered_classes:
     812        seen_objs[cls].reverse()
     813        pk_list = [pk for pk,instance in seen_objs[cls]]
     814       
     815        cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
     816            (backend.quote_name(cls._meta.db_table),
     817                backend.quote_name(cls._meta.pk.column),
     818                ','.join(['%s' for pk in pk_list])),
     819            pk_list)
     820               
     821        # Last cleanup; set NULLs where there once was a reference to the object,
     822        # NULL the primary key of the found objects, and perform post-notification.
     823        for pk_val, instance in seen_objs[cls]:
     824            for field in cls._meta.fields:
     825                if field.rel and field.null and field.rel.to in seen_classes:
     826                    setattr(instance, field.attname, None)
     827
     828            setattr(instance, cls._meta.pk.attname, None)
     829            dispatcher.send(signal=signals.post_delete, sender=cls, instance=instance)
     830
     831    connection.commit()
  • tests/modeltests/basic/models.py

     
    99class Article(models.Model):
    1010    headline = models.CharField(maxlength=100, default='Default headline')
    1111    pub_date = models.DateTimeField()
    12 
     12   
     13    def __repr__(self):
     14        return self.headline
    1315API_TESTS = """
    1416
    1517# No articles are in the system yet.
     
    3739>>> a.headline = 'Area woman programs in Python'
    3840>>> a.save()
    3941
    40 # Article.objects.all() returns all the articles in the database. Note that
    41 # the article is represented by "<Article object>", because we haven't given
    42 # the Article model a __repr__() method.
     42# Article.objects.all() returns all the articles in the database.
    4343>>> Article.objects.all()
    44 [<Article object>]
     44[Area woman programs in Python]
    4545
    4646# Django provides a rich database lookup API.
    4747>>> Article.objects.get(id__exact=1)
    48 <Article object>
     48Area woman programs in Python
    4949>>> Article.objects.get(headline__startswith='Area woman')
    50 <Article object>
     50Area woman programs in Python
    5151>>> Article.objects.get(pub_date__year=2005)
    52 <Article object>
     52Area woman programs in Python
    5353>>> Article.objects.get(pub_date__year=2005, pub_date__month=7)
    54 <Article object>
     54Area woman programs in Python
    5555>>> Article.objects.get(pub_date__year=2005, pub_date__month=7, pub_date__day=28)
    56 <Article object>
     56Area woman programs in Python
    5757
    5858# The "__exact" lookup type can be omitted, as a shortcut.
    5959>>> Article.objects.get(id=1)
    60 <Article object>
     60Area woman programs in Python
    6161>>> Article.objects.get(headline='Area woman programs in Python')
    62 <Article object>
     62Area woman programs in Python
    6363
    6464>>> Article.objects.filter(pub_date__year=2005)
    65 [<Article object>]
     65[Area woman programs in Python]
    6666>>> Article.objects.filter(pub_date__year=2004)
    6767[]
    6868>>> Article.objects.filter(pub_date__year=2005, pub_date__month=7)
    69 [<Article object>]
     69[Area woman programs in Python]
    7070
    7171# Django raises an Article.DoesNotExist exception for get() if the parameters
    7272# don't match any object.
     
    8484# shortcut for primary-key exact lookups.
    8585# The following is identical to articles.get(id=1).
    8686>>> Article.objects.get(pk=1)
    87 <Article object>
     87Area woman programs in Python
    8888
    8989# Model instances of the same type and same ID are considered equal.
    9090>>> a = Article.objects.get(pk=1)
     
    234234
    235235# You can get items using index and slice notation.
    236236>>> Article.objects.all()[0]
    237 <Article object>
     237Area woman programs in Python
    238238>>> Article.objects.all()[1:3]
    239 [<Article object>, <Article object>]
     239[Second article, Third article]
    240240>>> s3 = Article.objects.filter(id__exact=3)
    241241>>> (s1 | s2 | s3)[::2]
    242 [<Article object>, <Article object>]
     242[Area woman programs in Python, Third article]
    243243
    244244# An Article instance doesn't have access to the "objects" attribute.
    245245# That's only available on the class.
     
    254254AttributeError: Manager isn't accessible via Article instances
    255255
    256256# Bulk delete test: How many objects before and after the delete?
    257 >>> Article.objects.count()
    258 8L
    259 >>> Article.objects.delete(id__lte=4)
    260 >>> Article.objects.count()
    261 4L
     257>>> Article.objects.all()
     258[Area woman programs in Python, Second article, Third article, Fourth article, Article 6, Default headline, Article 7, Updated article 8]
     259>>> Article.objects.filter(id__lte=4).delete()
     260>>> Article.objects.all()
     261[Article 6, Default headline, Article 7, Updated article 8]
    262262
    263 >>> Article.objects.delete()
    264 Traceback (most recent call last):
    265     ...
    266 TypeError: SAFETY MECHANISM: Specify DELETE_ALL=True if you actually want to delete all data.
    267 
    268 >>> Article.objects.delete(DELETE_ALL=True)
    269 >>> Article.objects.count()
    270 0L
    271 
    272263"""
    273264
    274265from django.conf import settings
  • tests/modeltests/many_to_many/models.py

     
    162162>>> p2.article_set.all().order_by('headline')
    163163[Oxygen-free diet works wonders]
    164164
     165# Recreate the article and Publication we just deleted.
     166>>> p1 = Publication(id=None, title='The Python Journal')
     167>>> p1.save()
     168>>> a2 = Article(id=None, headline='NASA uses Python')
     169>>> a2.save()
     170>>> a2.publications.add(p1, p2, p3)
    165171
     172# Bulk delete some Publications - references to deleted publications should go
     173>>> Publication.objects.filter(title__startswith='Science').delete()
     174>>> Publication.objects.all()
     175[Highlights for Children, The Python Journal]
     176>>> Article.objects.all()
     177[Django lets you build Web apps easily, NASA finds intelligent life on Earth, Oxygen-free diet works wonders, NASA uses Python]
     178>>> a2.publications.all()
     179[The Python Journal]
    166180
     181# Bulk delete some articles - references to deleted objects should go
     182>>> q = Article.objects.filter(headline__startswith='Django')
     183>>> print q
     184[Django lets you build Web apps easily]
     185>>> q.delete()
     186
     187# After the delete, the QuerySet cache needs to be cleared, and the referenced objects should be gone
     188>>> print q
     189[]
     190>>> p1.article_set.all()
     191[NASA uses Python]
     192
     193
    167194"""
  • tests/modeltests/many_to_one/models.py

     
    9494
    9595# The underlying query only makes one join when a related table is referenced twice.
    9696>>> query = Article.objects.filter(reporter__first_name__exact='John', reporter__last_name__exact='Smith')
    97 >>> null, sql, null = query._get_sql_clause(True)
     97>>> null, sql, null = query._get_sql_clause()
    9898>>> sql.count('INNER JOIN')
    99991
    100100
     
    163163>>> Reporter.objects.filter(article__reporter__first_name__startswith='John').distinct()
    164164[John Smith]
    165165
    166 # Deletes that require joins are prohibited.
    167 >>> Article.objects.delete(reporter__first_name__startswith='Jo')
    168 Traceback (most recent call last):
    169     ...
    170 TypeError: Joins are not allowed in this type of query
    171 
    172166# If you delete a reporter, his articles will be deleted.
    173167>>> Article.objects.order_by('headline')
    174168[John's second story, Paul's story, This is a test, This is a test, This is a test]
    175169>>> Reporter.objects.order_by('first_name')
    176170[John Smith, Paul Jones]
    177 >>> r.delete()
     171>>> r2.delete()
    178172>>> Article.objects.order_by('headline')
    179 [Paul's story]
     173[John's second story, This is a test, This is a test, This is a test]
    180174>>> Reporter.objects.order_by('first_name')
    181 [Paul Jones]
     175[John Smith]
    182176
     177# Deletes using a join in the query
     178>>> Reporter.objects.filter(article__headline__startswith='This').delete()
     179>>> Reporter.objects.all()
     180[]
     181>>> Article.objects.all()
     182[]
     183
    183184"""
Back to Top