Opened 13 years ago

Closed 13 years ago

#16165 closed Bug (worksforme)

Annotations get crazy when .values has many arguments (MySQL)

Reported by: dev@… Owned by: nobody
Component: Database layer (models, ORM) Version: dev
Severity: Normal Keywords: aggregate, annotate
Cc: Triage Stage: Unreviewed
Has patch: no Needs documentation: no
Needs tests: no Patch needs improvement: no
Easy pickings: no UI/UX: no

Description (last modified by Karen Tracey)

Models in "mars" project:

class BiddingRule(models.Model):
    name = models.CharField(max_length=80)
    ...

class TagList(models.Model):
    name = models.CharField(max_length=50)
    ...

    def __unicode__(self):
        return self.name

class Property(models.Model):
    property = models.CharField(max_length=100)

    def __unicode__(self):
        return self.property

    class Meta:
        ordering = ['property']
        verbose_name_plural = 'properties'


class Account(models.Model):
    """A property has 1..n accounts"""

    username = models.CharField(max_length=200)
    password = models.CharField(max_length=200)
    ENGINE_CHOICES = (
        (1, 'Google'),
        (2, 'Yahoo'),
        (3, 'Bing'),
    )
    property = models.ForeignKey(Property)
    engine = models.IntegerField(choices=ENGINE_CHOICES)


class Campaign(models.Model):
    """An account has 1..n campaigns"""

    campaign = models.CharField(max_length=500)
    engine_id = models.BigIntegerField()
    account = models.ForeignKey(Account)
    STATUS_CHOICES = (
        (1, 'Active'),
        (2, 'Paused'),
        (3, 'Deleted'),
    )
    status = models.IntegerField(choices=STATUS_CHOICES, default=1)
    budget = models.FloatField(default=0.0)
    tags = models.ManyToManyField('TagList')
    visible_tags = models.CharField(max_length=200, blank=True, null=True)
    bidding_rule = models.ForeignKey(BiddingRule, null=True, blank=True)

    class Meta:
        ordering = ['campaign']

    def __unicode__(self):
        return self.campaign


class AdGroup(models.Model):
    """A Campaign has 1..n AdGroups"""

    ad_group = models.CharField(max_length=500)
    engine_id = models.BigIntegerField()
    campaign = models.ForeignKey(Campaign)
    STATUS_CHOICES = (
        (1, 'Active'),
        (2, 'Paused'),
        (3, 'Deleted'),
        (4, 'Draft'),
    )
    status = models.IntegerField(choices=STATUS_CHOICES, default=1)
    max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0'))
    content_max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0'))
    tags = models.ManyToManyField('TagList')
    visible_tags = models.CharField(max_length=200, blank=True, null=True)
    bidding_rule = models.ForeignKey(BiddingRule, null=True, blank=True)

    class Meta:
        ordering = ['ad_group']

class CampaignStat(models.Model):
    """Stats by campaign"""

    date = models.DateField()
    account = models.ForeignKey(Account)
    campaign = models.ForeignKey(Campaign)
    impressions = models.IntegerField(default=0)
    clicks = models.IntegerField(default=0)
    cost = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0'))
    conversions = models.IntegerField(default=0)
    revenue = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0'))
    average_position = models.FloatField(default=0.0)


class AdGroupStat(models.Model):
    """Stats by ad group"""

    date = models.DateField()
    account = models.ForeignKey(Account)
    ad_group = models.ForeignKey(AdGroup)
    impressions = models.IntegerField(default=0)
    clicks = models.IntegerField(default=0)
    cost = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0'))
    conversions = models.IntegerField(default=0)
    revenue = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0'))
    average_position = models.FloatField(default=0.0)

And here is the crazy annotation issue I am running into. Note the GROUP BY section of each query.

>>> from mars.models import *
>>> from django.db.models import Sum
>>> annotations = {
    '_impressions': Sum('impressions'),
    '_clicks': Sum('clicks'),
    '_cost': Sum('cost'),
    '_average_position': Avg('average_position'),
    '_conversions': Sum('conversions'),
    '_revenue': Sum('revenue')
    }
>>> ad_group_values = ['ad_group__id',
    'ad_group__engine_id',
    'ad_group__ad_group',
    'ad_group__campaign__account__engine',
    'ad_group__campaign__campaign',
    'ad_group__max_cpc',
    'ad_group__content_max_cpc',
    'ad_group__status',
    'ad_group__bidding_rule__name',
    'ad_group__visible_tags']
>>> stats = AdGroupStat.objects.values(*ad_group_values)
>>> stats = stats.annotate(**annotations)
>>> print(stats.query)
SELECT 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags`, 
    AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`,
    SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`,
    SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`, 
    SUM(`mars_adgroupstat`.`cost`) AS `_cost`, 
    SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`, 
    SUM(`mars_adgroupstat`.`impressions`) AS `_impressions` 
FROM 
    `mars_adgroupstat` 
INNER JOIN 
    `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) 
INNER JOIN 
    `mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`) 
INNER JOIN 
    `mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`) 
LEFT OUTER JOIN 
    `mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`) 
GROUP BY 
    `mars_adgroupstat`.`id`, 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags`
ORDER BY NULL
>>> stats = AdGroupStat.objects.values(*ad_group_values[:-1])
>>> stats = stats.annotate(**annotations)
>>> print(stats.query)
SELECT 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`,  
    `mars_biddingrule`.`name`,  
    AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`, 
    SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`, 
    SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`, 
    SUM(`mars_adgroupstat`.`cost`) AS `_cost`, 
    SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`, 
    SUM(`mars_adgroupstat`.`impressions`) AS `_impressions` 
FROM 
    `mars_adgroupstat` 
INNER JOIN 
    `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) 
INNER JOIN 
    `mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`) 
INNER JOIN 
    `mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`) 
LEFT OUTER JOIN 
    `mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`) 
GROUP BY 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name` 
ORDER BY NULL
>>> stats = AdGroupStat.objects.values(*ad_group_values[1:])
>>> stats = stats.annotate(**annotations)
>>> print(stats.query)
SELECT 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`,  
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags`, 
    AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`, 
    SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`, 
    SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`, 
    SUM(`mars_adgroupstat`.`cost`) AS `_cost`, 
    SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`, 
    SUM(`mars_adgroupstat`.`impressions`) AS `_impressions` 
FROM 
    `mars_adgroupstat` 
INNER JOIN 
    `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) 
INNER JOIN 
    `mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`) 
INNER JOIN 
    `mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`) 
LEFT OUTER JOIN 
    `mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`) 
GROUP BY 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`,
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`,  
    `mars_adgroup`.`content_max_cpc`,
    `mars_adgroup`.`status`,
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags` 
ORDER BY NULL
>>>


In the first query, using all 10 arguments in .values, the GROUP BY has 11 items as it seems to be adding the local model id:

GROUP BY 
    `mars_adgroupstat`.`id`, 
    `mars_adgroupstat`.`ad_group_id`,  
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags`

In the second or third queries, using 9 arguments in .values, the GROUP BY has 18 items as it seems to be doubling the GROUP BY args:

GROUP BY 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroup`.`visible_tags`

GROUP BY 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_biddingrule`.`name`, 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_account`.`engine`, 
    `mars_campaign`.`campaign`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`,
    `mars_biddingrule`.`name`

The first query does not return what I want, while the second and third return correctly, but look pretty funky. Using MySQL 5.1.41-3ubuntu12.8, Django SVN 16332, Python 2.6.5 on Ubuntu 10.04.

Change History (4)

comment:1 by Karen Tracey, 13 years ago

Description: modified (diff)

Wrapped SQL to allow for easier reading.

comment:2 by Ramiro Morales, 13 years ago

It would be great if the case can be reduced by the OP to a simpler one where the reported issue is still present. That way we could be able to diagnose if this is e.g. a duplicate of #14357.

comment:3 by anonymous, 13 years ago

Simplified:

Models:

class Account(models.Model):
    username = models.CharField(max_length=200)
    password = models.CharField(max_length=200)
    engine = models.IntegerField(choices=ENGINE_CHOICES)


class AdGroup(models.Model):
    ad_group = models.CharField(max_length=500)
    engine_id = models.BigIntegerField()
    status = models.IntegerField(choices=STATUS_CHOICES, default=1)
    max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0'))
    content_max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0'))

    class Meta:
        ordering = ['ad_group']


class AdGroupStat(models.Model):
    date = models.DateField()
    account = models.ForeignKey(Account)
    ad_group = models.ForeignKey(AdGroup)
    impressions = models.IntegerField(default=0)

And here is the crazy annotation issue I am running into:

>>> from mars.models import *
>>> from django.db.models import Sum
>>> ad_group_values = [
    'account__engine',
    'account__username',
    'account__password',
    'ad_group__id',
    'ad_group__engine_id',
    'ad_group__ad_group',
    'ad_group__max_cpc',
    'ad_group__content_max_cpc',
    'ad_group__status'
]
>>> s = AdGroupStat.objects.values(*ad_group_values)
>>> s = s.annotate(Sum('impressions'))
>>> print(s.query)
SELECT 
    `mars_account`.`engine`, 
    `mars_account`.`username`, 
    `mars_account`.`password`, 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    SUM(`mars_adgroupstat`.`impressions`) AS `impressions__sum` 
FROM 
    `mars_adgroupstat` 
INNER JOIN 
    `mars_account` ON (`mars_adgroupstat`.`account_id` = `mars_account`.`id`) 
INNER JOIN 
    `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) 
GROUP BY 
    `mars_account`.`engine`, 
    `mars_account`.`username`, 
    `mars_account`.`password`, 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status` 
ORDER BY NULL
>>> ad_group_values.append('account__id')
>>> s = AdGroupStat.objects.values(*ad_group_values)
>>> s = s.annotate(Sum('impressions'))
>>> print(s.query)
SELECT 
    `mars_account`.`engine`, 
    `mars_account`.`username`, 
    `mars_account`.`password`,
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_adgroupstat`.`account_id`, 
    SUM(`mars_adgroupstat`.`impressions`) AS `impressions__sum` 
FROM 
    `mars_adgroupstat` 
INNER JOIN 
    `mars_account` ON (`mars_adgroupstat`.`account_id` = `mars_account`.`id`) 
INNER JOIN 
    `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) 
GROUP BY 
    `mars_adgroupstat`.`id`, 
    `mars_account`.`engine`, 
    `mars_account`.`username`, 
    `mars_account`.`password`, 
    `mars_adgroupstat`.`ad_group_id`, 
    `mars_adgroup`.`engine_id`, 
    `mars_adgroup`.`ad_group`, 
    `mars_adgroup`.`max_cpc`, 
    `mars_adgroup`.`content_max_cpc`, 
    `mars_adgroup`.`status`, 
    `mars_adgroupstat`.`account_id` 
ORDER BY NULL

Note the extra mars_adgroupstat.id in the GROUP BY of the second query.

comment:4 by Ernesto Rico-Schmidt, 13 years ago

Resolution: worksforme
Status: newclosed
UI/UX: unset

I can't reproduce this problem given the code you've provided and using the Subversion r16341 with MySQL.

Note: See TracTickets for help on using tickets.
Back to Top