Opened 13 years ago
Closed 13 years ago
#16165 closed Bug (worksforme)
Annotations get crazy when .values has many arguments (MySQL)
Reported by: | Owned by: | nobody | |
---|---|---|---|
Component: | Database layer (models, ORM) | Version: | dev |
Severity: | Normal | Keywords: | aggregate, annotate |
Cc: | Triage Stage: | Unreviewed | |
Has patch: | no | Needs documentation: | no |
Needs tests: | no | Patch needs improvement: | no |
Easy pickings: | no | UI/UX: | no |
Description (last modified by )
Models in "mars" project:
class BiddingRule(models.Model): name = models.CharField(max_length=80) ... class TagList(models.Model): name = models.CharField(max_length=50) ... def __unicode__(self): return self.name class Property(models.Model): property = models.CharField(max_length=100) def __unicode__(self): return self.property class Meta: ordering = ['property'] verbose_name_plural = 'properties' class Account(models.Model): """A property has 1..n accounts""" username = models.CharField(max_length=200) password = models.CharField(max_length=200) ENGINE_CHOICES = ( (1, 'Google'), (2, 'Yahoo'), (3, 'Bing'), ) property = models.ForeignKey(Property) engine = models.IntegerField(choices=ENGINE_CHOICES) class Campaign(models.Model): """An account has 1..n campaigns""" campaign = models.CharField(max_length=500) engine_id = models.BigIntegerField() account = models.ForeignKey(Account) STATUS_CHOICES = ( (1, 'Active'), (2, 'Paused'), (3, 'Deleted'), ) status = models.IntegerField(choices=STATUS_CHOICES, default=1) budget = models.FloatField(default=0.0) tags = models.ManyToManyField('TagList') visible_tags = models.CharField(max_length=200, blank=True, null=True) bidding_rule = models.ForeignKey(BiddingRule, null=True, blank=True) class Meta: ordering = ['campaign'] def __unicode__(self): return self.campaign class AdGroup(models.Model): """A Campaign has 1..n AdGroups""" ad_group = models.CharField(max_length=500) engine_id = models.BigIntegerField() campaign = models.ForeignKey(Campaign) STATUS_CHOICES = ( (1, 'Active'), (2, 'Paused'), (3, 'Deleted'), (4, 'Draft'), ) status = models.IntegerField(choices=STATUS_CHOICES, default=1) max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) content_max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) tags = models.ManyToManyField('TagList') visible_tags = models.CharField(max_length=200, blank=True, null=True) bidding_rule = models.ForeignKey(BiddingRule, null=True, blank=True) class Meta: ordering = ['ad_group'] class CampaignStat(models.Model): """Stats by campaign""" date = models.DateField() account = models.ForeignKey(Account) campaign = models.ForeignKey(Campaign) impressions = models.IntegerField(default=0) clicks = models.IntegerField(default=0) cost = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) conversions = models.IntegerField(default=0) revenue = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) average_position = models.FloatField(default=0.0) class AdGroupStat(models.Model): """Stats by ad group""" date = models.DateField() account = models.ForeignKey(Account) ad_group = models.ForeignKey(AdGroup) impressions = models.IntegerField(default=0) clicks = models.IntegerField(default=0) cost = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) conversions = models.IntegerField(default=0) revenue = models.DecimalField(max_digits=13, decimal_places=4, default=Decimal('0')) average_position = models.FloatField(default=0.0)
And here is the crazy annotation issue I am running into. Note the GROUP BY section of each query.
>>> from mars.models import * >>> from django.db.models import Sum >>> annotations = { '_impressions': Sum('impressions'), '_clicks': Sum('clicks'), '_cost': Sum('cost'), '_average_position': Avg('average_position'), '_conversions': Sum('conversions'), '_revenue': Sum('revenue') } >>> ad_group_values = ['ad_group__id', 'ad_group__engine_id', 'ad_group__ad_group', 'ad_group__campaign__account__engine', 'ad_group__campaign__campaign', 'ad_group__max_cpc', 'ad_group__content_max_cpc', 'ad_group__status', 'ad_group__bidding_rule__name', 'ad_group__visible_tags'] >>> stats = AdGroupStat.objects.values(*ad_group_values) >>> stats = stats.annotate(**annotations) >>> print(stats.query) SELECT `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags`, AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`, SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`, SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`, SUM(`mars_adgroupstat`.`cost`) AS `_cost`, SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`, SUM(`mars_adgroupstat`.`impressions`) AS `_impressions` FROM `mars_adgroupstat` INNER JOIN `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) INNER JOIN `mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`) INNER JOIN `mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`) LEFT OUTER JOIN `mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`) GROUP BY `mars_adgroupstat`.`id`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags` ORDER BY NULL >>> stats = AdGroupStat.objects.values(*ad_group_values[:-1]) >>> stats = stats.annotate(**annotations) >>> print(stats.query) SELECT `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`, SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`, SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`, SUM(`mars_adgroupstat`.`cost`) AS `_cost`, SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`, SUM(`mars_adgroupstat`.`impressions`) AS `_impressions` FROM `mars_adgroupstat` INNER JOIN `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) INNER JOIN `mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`) INNER JOIN `mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`) LEFT OUTER JOIN `mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`) GROUP BY `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name` ORDER BY NULL >>> stats = AdGroupStat.objects.values(*ad_group_values[1:]) >>> stats = stats.annotate(**annotations) >>> print(stats.query) SELECT `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags`, AVG(`mars_adgroupstat`.`average_position`) AS `_average_position`, SUM(`mars_adgroupstat`.`conversions`) AS `_conversions`, SUM(`mars_adgroupstat`.`clicks`) AS `_clicks`, SUM(`mars_adgroupstat`.`cost`) AS `_cost`, SUM(`mars_adgroupstat`.`revenue`) AS `_revenue`, SUM(`mars_adgroupstat`.`impressions`) AS `_impressions` FROM `mars_adgroupstat` INNER JOIN `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) INNER JOIN `mars_campaign` ON (`mars_adgroup`.`campaign_id` = `mars_campaign`.`id`) INNER JOIN `mars_account` ON (`mars_campaign`.`account_id` = `mars_account`.`id`) LEFT OUTER JOIN `mars_biddingrule` ON (`mars_adgroup`.`bidding_rule_id` = `mars_biddingrule`.`id`) GROUP BY `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags` ORDER BY NULL >>>
In the first query, using all 10 arguments in .values, the GROUP BY has 11 items as it seems to be adding the local model id:
GROUP BY `mars_adgroupstat`.`id`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags`
In the second or third queries, using 9 arguments in .values, the GROUP BY has 18 items as it seems to be doubling the GROUP BY args:
GROUP BY `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroup`.`visible_tags` GROUP BY `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_account`.`engine`, `mars_campaign`.`campaign`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_biddingrule`.`name`
The first query does not return what I want, while the second and third return correctly, but look pretty funky. Using MySQL 5.1.41-3ubuntu12.8, Django SVN 16332, Python 2.6.5 on Ubuntu 10.04.
Change History (4)
comment:1 by , 13 years ago
Description: | modified (diff) |
---|
comment:2 by , 13 years ago
It would be great if the case can be reduced by the OP to a simpler one where the reported issue is still present. That way we could be able to diagnose if this is e.g. a duplicate of #14357.
comment:3 by , 13 years ago
Simplified:
Models:
class Account(models.Model): username = models.CharField(max_length=200) password = models.CharField(max_length=200) engine = models.IntegerField(choices=ENGINE_CHOICES) class AdGroup(models.Model): ad_group = models.CharField(max_length=500) engine_id = models.BigIntegerField() status = models.IntegerField(choices=STATUS_CHOICES, default=1) max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) content_max_cpc = models.DecimalField(max_digits=7, decimal_places=4, default=Decimal('0')) class Meta: ordering = ['ad_group'] class AdGroupStat(models.Model): date = models.DateField() account = models.ForeignKey(Account) ad_group = models.ForeignKey(AdGroup) impressions = models.IntegerField(default=0)
And here is the crazy annotation issue I am running into:
>>> from mars.models import * >>> from django.db.models import Sum >>> ad_group_values = [ 'account__engine', 'account__username', 'account__password', 'ad_group__id', 'ad_group__engine_id', 'ad_group__ad_group', 'ad_group__max_cpc', 'ad_group__content_max_cpc', 'ad_group__status' ] >>> s = AdGroupStat.objects.values(*ad_group_values) >>> s = s.annotate(Sum('impressions')) >>> print(s.query) SELECT `mars_account`.`engine`, `mars_account`.`username`, `mars_account`.`password`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, SUM(`mars_adgroupstat`.`impressions`) AS `impressions__sum` FROM `mars_adgroupstat` INNER JOIN `mars_account` ON (`mars_adgroupstat`.`account_id` = `mars_account`.`id`) INNER JOIN `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) GROUP BY `mars_account`.`engine`, `mars_account`.`username`, `mars_account`.`password`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status` ORDER BY NULL >>> ad_group_values.append('account__id') >>> s = AdGroupStat.objects.values(*ad_group_values) >>> s = s.annotate(Sum('impressions')) >>> print(s.query) SELECT `mars_account`.`engine`, `mars_account`.`username`, `mars_account`.`password`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_adgroupstat`.`account_id`, SUM(`mars_adgroupstat`.`impressions`) AS `impressions__sum` FROM `mars_adgroupstat` INNER JOIN `mars_account` ON (`mars_adgroupstat`.`account_id` = `mars_account`.`id`) INNER JOIN `mars_adgroup` ON (`mars_adgroupstat`.`ad_group_id` = `mars_adgroup`.`id`) GROUP BY `mars_adgroupstat`.`id`, `mars_account`.`engine`, `mars_account`.`username`, `mars_account`.`password`, `mars_adgroupstat`.`ad_group_id`, `mars_adgroup`.`engine_id`, `mars_adgroup`.`ad_group`, `mars_adgroup`.`max_cpc`, `mars_adgroup`.`content_max_cpc`, `mars_adgroup`.`status`, `mars_adgroupstat`.`account_id` ORDER BY NULL
Note the extra mars_adgroupstat.id in the GROUP BY of the second query.
comment:4 by , 13 years ago
Resolution: | → worksforme |
---|---|
Status: | new → closed |
UI/UX: | unset |
I can't reproduce this problem given the code you've provided and using the Subversion r16341 with MySQL.
Wrapped SQL to allow for easier reading.