Ticket #17: 17.diff

File 17.diff, 19.5 KB (added by Philippe Raoult, 17 years ago)

added tests, and style issues

  • django/db/models/base.py

     
    1515from django.utils.encoding import smart_str, force_unicode, smart_unicode
    1616from django.conf import settings
    1717from itertools import izip
     18from weakref import WeakValueDictionary
    1819import types
    1920import sys
    2021import os
     
    7778        # registered version.
    7879        return get_model(new_class._meta.app_label, name, False)
    7980
     81    def __call__(cls, *args, **kwargs):
     82        """
     83        this method will either create an instance (by calling the default implementation)
     84        or try to retrieve one from the class-wide cache by infering the pk value from
     85        args and kwargs. If instance caching is enabled for this class, the cache is
     86        populated whenever possible (ie when it is possible to infer the pk value). If 'meta__disable_caching'
     87        is set to True in kwargs, then the instance is constructed and we flush
     88        the associated cache entry.
     89        """
     90        def new_instance():
     91            return super(ModelBase, cls).__call__(*args, **kwargs)
     92       
     93        cache_this_instance = cls.instance_caching_enabled()
     94        # we always pop those settings from kwargs, the instance shouldn't see this
     95        if kwargs.pop('meta__disable_caching', False):
     96            # user explicitely requested not to use cache, we flush the cache to prevent inconsitencies
     97            cls._flush_cached_by_key(cls._get_cache_key(args, kwargs))
     98            cache_this_instance = False
     99           
     100        # simplest case, just create a new instance every time
     101        if not cache_this_instance:
     102            return new_instance()
     103       
     104        instance_key = cls._get_cache_key(args, kwargs)
     105        # depending on the arguments, we might not be able to infer the PK, so in that case we create a new instance
     106        if instance_key is None:
     107            cls._instance_cache_nokey_misses += 1
     108            return new_instance()
     109
     110        cached_instance = cls.get_cached_instance(instance_key)
     111        if cached_instance is None:
     112            cached_instance = new_instance()
     113            cls.cache_instance(cached_instance)
     114
     115        return cached_instance
     116
    80117class Model(object):
    81118    __metaclass__ = ModelBase
    82119
     
    97134    def __ne__(self, other):
    98135        return not self.__eq__(other)
    99136
     137    def _get_cache_key(cls, args, kwargs):
     138        """
     139        This method is used by the caching subsystem to infer the PK value from the constructor arguments.
     140        It is used to decide if an instance has to be built or is already in the cache.
     141        """
     142        result = None
     143        pk = cls._meta.pk
     144        # get the index of the pk in the class fields. this should be calculated *once*, but isn't atm
     145        pk_position = cls._meta.fields.index(pk)
     146        if len(args) > pk_position:
     147            # if it's in the args, we can get it easily by index
     148            result = args[pk_position]
     149        elif pk.attname in kwargs:
     150            # retrieve the pk value. Note that we use attname instead of name, to handle the case where the pk is a
     151            # a ForeignKey.
     152            result = kwargs[pk.attname]
     153        elif pk.name != pk.attname and pk.name in kwargs:
     154            # ok we couldn't find the value, but maybe it's a FK and we can find the corresponding object instead
     155            result = kwargs[pk.name]
     156       
     157        if result is not None and isinstance(result, Model):
     158            # if the pk value happens to be a model instance (which can happen wich a FK), we'd rather use its own pk as the key
     159            result = result._get_pk_val()
     160        return result
     161    _get_cache_key = classmethod(_get_cache_key)
     162
     163    def get_cached_instance(cls, id):
     164        """
     165        Method to retrieve a cached instance by pk value. Returns None when not found
     166        (which will always be the case when caching is disabled for this class). Please
     167        note that the lookup will be done even when instance caching is disabled, thus
     168        generating a miss in the stats.
     169        """
     170        result = cls.__instance_cache__.get(id)
     171        if result is None:
     172            cls._instance_cache_misses += 1
     173        else:
     174            cls._instance_cache_hits += 1
     175        return result
     176    get_cached_instance = classmethod(get_cached_instance)
     177
     178    def cache_instance(cls, instance):
     179        """
     180        Method to store an instance in the cache. TODO: add a store counter in the stats
     181        """
     182        if cls.instance_caching_enabled() and instance._get_pk_val() is not None:
     183            cls.__instance_cache__[instance._get_pk_val()] = instance
     184    cache_instance = classmethod(cache_instance)
     185
     186    def _flush_cached_by_key(cls, key):
     187        if cls.__instance_cache__.pop(key, None) is not None:
     188            cls._instance_cache_flushes += 1
     189    _flush_cached_by_key = classmethod(_flush_cached_by_key)
     190       
     191    def flush_cached_instance(cls, instance):
     192        """
     193        Method to flush an instance from the cache. The instance will always be flushed from the cache,
     194        since this is most likely called from delete(), and we want to make sure we don't cache dead objects.
     195        We do not test the pk value because delete() does it and it will fail silently anyway.
     196        """
     197        if cls.instance_caching_enabled():
     198            cls._flush_cached_by_key(instance._get_pk_val())
     199    flush_cached_instance = classmethod(flush_cached_instance)
     200
     201    def instance_caching_enabled(cls):
     202        """
     203        Accessor for the cache settings.
     204        """
     205        # cache is off by default!
     206        return getattr(cls, '_meta__instance_caching', False)
     207    instance_caching_enabled = classmethod(instance_caching_enabled)
     208
     209    def set_instance_caching(cls, enable):
     210        """
     211        Accessor for the cache settings. Note that the cache is flushed and the stats reset when
     212        the settings are switched (ie enabling the cache multiple times will not flush).
     213        """
     214        current_settings = cls.instance_caching_enabled()
     215        cls._meta__instance_caching = enable
     216        # completely flush the cache every time the settings are changed
     217        if enable != current_settings:
     218            cls.__instance_cache__.clear()
     219            cls.instance_caching_stats_reset()
     220    set_instance_caching = classmethod(set_instance_caching)
     221   
     222    def instance_caching_stats_reset(cls):
     223        # also used to init the stats in '_prepare()'
     224        cls._instance_cache_hits = 0
     225        cls._instance_cache_misses = 0
     226        cls._instance_cache_nokey_misses = 0
     227        cls._instance_cache_flushes = 0
     228    instance_caching_stats_reset = classmethod(instance_caching_stats_reset)
     229   
     230    def instance_caching_stats(cls):
     231        return {'enabled': cls.instance_caching_enabled(),
     232                'hits' : cls._instance_cache_hits,
     233                'misses': cls._instance_cache_misses,
     234                'flushes': cls._instance_cache_flushes,
     235                'misses_nokey': cls._instance_cache_nokey_misses,
     236                'cache_size': len(cls.__instance_cache__) }
     237    instance_caching_stats = classmethod(instance_caching_stats)
     238       
    100239    def __init__(self, *args, **kwargs):
    101240        dispatcher.send(signal=signals.pre_init, sender=self.__class__, args=args, kwargs=kwargs)
    102241
     
    197336        if hasattr(cls, 'get_absolute_url'):
    198337            cls.get_absolute_url = curry(get_absolute_url, opts, cls.get_absolute_url)
    199338
     339        cls.__instance_cache__ = WeakValueDictionary()
     340        cls.instance_caching_stats_reset()
     341        # enable the cache according to user preferences (off by default)
     342        # FIXME better interface for setting this value (meta class attribute ?)
     343        cls.set_instance_caching(getattr(cls, 'meta__instance_caching', False))
     344
    200345        dispatcher.send(signal=signals.class_prepared, sender=cls)
    201346
    202347    _prepare = classmethod(_prepare)
     
    261406                setattr(self, self._meta.pk.attname, connection.ops.last_insert_id(cursor, self._meta.db_table, self._meta.pk.column))
    262407        transaction.commit_unless_managed()
    263408
     409        # if we're a new instance that hasn't been written in; save ourself.
     410        self.__class__.cache_instance(self)
     411
    264412        # Run any post-save hooks.
    265413        dispatcher.send(signal=signals.post_save, sender=self.__class__,
    266414                instance=self, created=(not record_exists))
     
    321469        seen_objs = SortedDict()
    322470        self._collect_sub_objects(seen_objs)
    323471
     472        # remove ourself from the cache
     473        self.__class__.flush_cached_instance(self)
    324474        # Actually delete the objects
    325475        delete_objects(seen_objs)
    326476
  • django/db/models/fields/related.py

     
    159159        try:
    160160            return getattr(instance, cache_name)
    161161        except AttributeError:
     162            related_cls = self.field.rel.to
    162163            val = getattr(instance, self.field.attname)
    163164            if val is None:
    164165                # If NULL is an allowed value, return it.
    165166                if self.field.null:
    166167                    return None
    167                 raise self.field.rel.to.DoesNotExist
    168             other_field = self.field.rel.get_related_field()
    169             if other_field.rel:
    170                 params = {'%s__pk' % self.field.rel.field_name: val}
     168                raise related_cls.DoesNotExist
     169            # try to get a cached instance, and if that fails retrieve it from the db
     170            # FIXME TEST THIS i'm not sure val is really the object's pk ...
     171            if related_cls.instance_caching_enabled():
     172                rel_obj = related_cls.get_cached_instance(val)
    171173            else:
    172                 params = {'%s__exact' % self.field.rel.field_name: val}
    173             rel_obj = self.field.rel.to._default_manager.get(**params)
     174                rel_obj = None
     175            if rel_obj is None:
     176                other_field = self.field.rel.get_related_field()
     177                if other_field.rel:
     178                    params = {'%s__pk' % self.field.rel.field_name: val}
     179                else:
     180                    params = {'%s__exact' % self.field.rel.field_name: val}
     181                rel_obj = related_cls._default_manager.get(**params)
    174182            setattr(instance, cache_name, rel_obj)
    175183            return rel_obj
    176184
  • django/db/models/query.py

     
    11341134            dispatcher.send(signal=signals.pre_delete, sender=cls, instance=instance)
    11351135
    11361136        pk_list = [pk for pk,instance in seen_objs[cls]]
     1137        # we wipe the cache now; it's *possible* some form of a __get__ lookup may reintroduce an item after
     1138        # the fact with the same pk (extremely unlikely)
     1139        for instance in seen_objs.values():
     1140            cls.flush_cached_instance(instance)
     1141
    11371142        for related in cls._meta.get_all_related_many_to_many_objects():
    11381143            if not isinstance(related.field, generic.GenericRelation):
    11391144                for offset in range(0, len(pk_list), GET_ITERATOR_CHUNK_SIZE):
     
    11671172    for cls in ordered_classes:
    11681173        seen_objs[cls].reverse()
    11691174        pk_list = [pk for pk,instance in seen_objs[cls]]
     1175        for instance in seen_objs.values():
     1176            cls.flush_cached_instance(instance)
    11701177        for offset in range(0, len(pk_list), GET_ITERATOR_CHUNK_SIZE):
    11711178            cursor.execute("DELETE FROM %s WHERE %s IN (%s)" % \
    11721179                (qn(cls._meta.db_table), qn(cls._meta.pk.column),
  • django/core/serializers/xml_serializer.py

     
    176176                else:
    177177                    value = field.to_python(getInnerText(field_node).strip())
    178178                data[field.name] = value
    179 
     179        # disable caching, make sure the object is fully constructed from our data and not pulled from the cache
     180        data["meta__disable_caching"] = True
    180181        # Return a DeserializedObject so that the m2m data has a place to live.
    181182        return base.DeserializedObject(Model(**data), m2m_data)
    182183
     
    234235        else:
    235236           pass
    236237    return u"".join(inner_text)
    237 
  • django/core/serializers/python.py

     
    8989            # Handle all other fields
    9090            else:
    9191                data[field.name] = field.to_python(field_value)
    92 
     92        # disable caching, make sure the object is fully constructed from our data and not pulled from the cache
     93        data["meta__disable_caching"] = True
    9394        yield base.DeserializedObject(Model(**data), m2m_data)
    9495
    9596def _get_model(model_identifier):
  • tests/modeltests/select_related/models.py

     
    7575        obj.save()
    7676        parent = obj
    7777
     78def set_instance_caching(settings):
     79    for cls in [Domain, Kingdom, Phylum, Klass, Order, Family, Genus]:
     80        cls.set_instance_caching(settings)
    7881__test__ = {'API_TESTS':"""
    7982
    8083# Set up.
     
    147150>>> len(db.connection.queries)
    1481515
    149152
     153# CACHING TESTS
     154>>> Genus.instance_caching_stats()
     155{'hits': 0, 'misses_nokey': 0, 'enabled': False, 'flushes': 0, 'misses': 0, 'cache_size': 0}
     156
     157# ENABLE CACHING ON ALL MODELS IN THE TEST EXCEPT SPECIES
     158>>> set_instance_caching(True)
     159
     160# This should be the same as without caching
     161>>> db.reset_queries()
     162>>> fly = Species.objects.get(name="melanogaster")
     163>>> fly.genus.family.order.klass.phylum.kingdom.domain
     164<Domain: Eukaryota>
     165>>> len(db.connection.queries)
     1668
     167
     168# This should be the same as without caching
     169>>> db.reset_queries()
     170>>> person = Species.objects.select_related().get(name="sapiens")
     171>>> person.genus.family.order.klass.phylum.kingdom.domain
     172<Domain: Eukaryota>
     173>>> len(db.connection.queries)
     1741
     175
     176# now let's see how caching helps
     177>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_2")
     178>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_3")
     179>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_4")
     180>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_5")
     181>>> create_tree("Eukaryota Animalia Chordata Mammalia Primates Hominidae Homo sapiens_6")
     182>>> set_instance_caching(False)
     183>>> db.reset_queries()
     184>>> world = Species.objects.all()
     185>>> geni_of_world = [o.genus for o in world]
     186>>> len(db.connection.queries) # 1 for Species and 9 for the Geni
     18710
     188>>> Genus.instance_caching_stats()
     189{'hits': 0, 'misses_nokey': 0, 'enabled': False, 'flushes': 0, 'misses': 0, 'cache_size': 0}
     190
     191>>> set_instance_caching(True)
     192>>> db.reset_queries()
     193>>> world = Species.objects.all()
     194>>> geni_of_world == [o.genus for o in world]
     195True
     196>>> len(db.connection.queries) # 1 for Species and 4 for the distinct Geni
     1975
     198
     199# here we get 8 misses because ReverseSingleRelatedObjectDescriptor misses twice when the object isn't in the cache
     200>>> Genus.instance_caching_stats() # 4 distinct Geni and 5 rows generating hits.
     201{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 8, 'cache_size': 4}
     202>>> Genus.get_cached_instance(2)
     203<Genus: Homo>
     204>>> Genus.instance_caching_stats()['hits'] # one more hit !
     2056
     206>>> Genus.instance_caching_stats_reset()
     207>>> Genus.instance_caching_stats()
     208{'hits': 0, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4}
     209
     210>>> Genus.instance_caching_stats_reset()
     211>>> Genus.get_cached_instance(2)
     212<Genus: Homo>
     213>>> Genus.instance_caching_stats()
     214{'hits': 1, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4}
     215>>> Genus.objects.get(id=2)
     216<Genus: Homo>
     217>>> Genus.instance_caching_stats()
     218{'hits': 2, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4}
     219>>> Species.objects.get(id=2).genus
     220<Genus: Homo>
     221>>> Genus.instance_caching_stats()
     222{'hits': 3, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4}
     223
     224>>> set_instance_caching(False)
     225>>> set_instance_caching(True)
     226>>> Genus.get_cached_instance(2)
     227>>> Genus.objects.get(id=2)
     228<Genus: Homo>
     229>>> Genus.get_cached_instance(2)
     230<Genus: Homo>
     231>>> Genus.instance_caching_stats_reset()
     232>>> db.reset_queries()
     233>>> world = Species.objects.all()
     234>>> sapiens = world[1]
     235>>> len(db.connection.queries) # 1 for Species and the rest is in the cache, whoa
     2361
     237>>> Genus.instance_caching_stats() # we haven't touched geni yet
     238{'hits': 0, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 0}
     239>>> homo = sapiens.genus
     240>>> Genus.instance_caching_stats() # 2 misses from ReverseSingleRelatedObjectDescriptor even if only one object was retrieved
     241{'hits': 0, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 2, 'cache_size': 1}
     242
     243>>> set_instance_caching(False)
     244>>> Genus.get_cached_instance(2)
     245
     246# This one is tricky, we get() the instance so it gets cached,
     247# then test that instanciating with the same PK retrieves the instance
     248>>> set_instance_caching(True)
     249>>> Genus.instance_caching_enabled()
     250True
     251>>> Species.instance_caching_enabled()
     252False
     253>>> first_homo = Genus.objects.get(id=2)
     254>>> first_homo
     255<Genus: Homo>
     256>>> Genus.instance_caching_stats()['cache_size']
     2571
     258>>> Genus.instance_caching_stats()['hits']
     2590
     260>>> homo = Genus.get_cached_instance(2)
     261>>> homo
     262<Genus: Homo>
     263>>> Genus.instance_caching_stats()['hits']
     2641
     265>>> Genus.instance_caching_stats_reset()
     266>>> kwargs = {'id': 2}
     267>>> Genus._get_cache_key([], kwargs)
     2682
     269>>> Genus(id = 2)
     270<Genus: Homo>
     271>>> Genus.instance_caching_stats()['hits']
     2721
     273>>> Genus.flush_cached_instance(homo)
     274>>> Genus.get_cached_instance(2) == None
     275True
     276>>> Genus.instance_caching_stats()['cache_size']
     2770
     278>>> Genus.instance_caching_stats_reset()
     279>>> first_homo = Genus.objects.get(id=2)
     280>>> Genus.instance_caching_stats()['misses']
     2811
     282
     283## each of the initial species has it own genus but the 5 sapiens dupes will hit the cache
     284#>>> Genus.instance_caching_stats()
     285#{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 4, 'cache_size': 4}
     286#
     287#>>> set_instance_caching(False) # Flushes the cache
     288#>>> set_instance_caching(True)
     289#>>> Genus.instance_caching_stats_reset()
     290#>>> temp = [o.genus for o in (list(Species.objects.all()) + list(Species.objects.all()))]
     291#>>> Genus.instance_caching_stats()
     292#{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 0, 'cache_size': 4}
     293#
     294#>>> set_instance_caching(False) # Flushes the cache
     295#>>> set_instance_caching(True)
     296#>>> db.reset_queries()
     297#>>> Genus.instance_caching_stats_reset()
     298#>>> world = Species.objects.all().select_related()
     299#>>> [o.genus for o in world]
     300#[<Genus: Drosophila>, <Genus: Homo>, <Genus: Pisum>, <Genus: Amanita>, <Genus: Homo>, <Genus: Homo>, <Genus: Homo>, <Genus: Homo>, <Genus: Homo>]
     301#>>> len(db.connection.queries)
     302#1
     303#>>> Genus.instance_caching_stats()
     304#{'hits': 5, 'misses_nokey': 0, 'enabled': True, 'flushes': 0, 'misses': 4, 'cache_size': 4}
     305
    150306# Reset DEBUG to where we found it.
    151307>>> settings.DEBUG = False
    152308"""}
Back to Top