Django 聚合分组、F对象的使用、聚合分组增加额外字段的方式

时间:2022-12-09 18:18:48

Django QuerySet 查询表达式

F 对象

简单理解为可以通过F实现数据库操作,而非在Python内存中操作。通过 F对象实现。filter() 条件过滤或者对象级别的操作。

  • 让数据库,而不是 Python 来完成工作
  • 减少某些操作所需的查询次数
from django.db.models import F

对象操作

F()对象表示模型字段的值、模型字段的转换值或带注释的列。它使得引用模型字段值并使用它们执行数据库操作成为可能,而实际上不必将它们从数据库中取出到Python内存中。Django 使用 F() 对象来生成一个 SQL 表达式,在数据库层面描述所需的操作

obj = Model.objects.get(name='小明')
obj.num = F('num') + 1
obj.save

update

reporter = Reporters.objects.filter(name='Tintin')
reporter.update(stories_filed=F('stories_filed') + 1)

filter中使用

Student.objects.filter(total__gt=F('amount') * 2)

annotate中使用

Company.objects.annotate(
    chairs_needed=F('num_employees') - F('num_chairs'))

django分组聚合

sql 中 group by : GROUP BY 语句用于结合聚合函数,根据一个或多个列对结果集进行分组。

Django 分组聚合

queryset = Student.objects.values('name').annotate(c=Count('pk'))

sql

SELECT "student"."name", COUNT("student"."id") AS "c" FROM "student" GROUP BY "student"."name"

values

返回一个 QuerySet,当用作可迭代对象时,返回字典,而不是模型实例。其中每一个字典都代表一个对象,键与模型对象的属性名相对应。

annotate

用所提供的 查询表达式 列表对 QuerySet 中的每个对象进行注解(类似sql中as)。表达式可以是一个简单的值,也可以是对模型(或任何相关模型)字段的引用,或者是对与 QuerySet 中的对象相关的对象进行计算的聚合表达式(平均数、总和等)。

annotate() 的每个参数都是一个注解,将被添加到返回的 QuerySet 中的每个对象。

Django 提供的聚合函数在下面的 聚合函数 中介绍。

使用关键字参数指定的注解将使用关键字作为注解的别名。匿名参数将根据聚合函数的名称和被聚合的模型字段为其生成一个别名。只有引用单个字段的聚合表达式才能成为匿名参数。其他一切都必须是关键字参数。

queryset = Student.objects.annotate(n=F('name')).values('n').annotate(c=Count('pk'))
# 使用 annotate 对某个字段 as。需要借助F对象

聚合

聚合多个值

queryset = Student.objects.annotate(n=F('name')).values('n', 'age').annotate(c=Count('pk'), a=Avg('age'))

聚合 select 展示非分组字段

聚合的queryset方法继承 models.Aggregate 修改function为 group_concat

源码

class Aggregate(Func):
    template = '%(function)s(%(distinct)s%(expressions)s)'
    contains_aggregate = True
    name = None
    filter_template = '%s FILTER (WHERE %%(filter)s)'
    window_compatible = True
    allow_distinct = False

    def __init__(self, *expressions, distinct=False, filter=None, **extra):
        if distinct and not self.allow_distinct:
            raise TypeError("%s does not allow distinct." % self.__class__.__name__)
        self.distinct = distinct
        self.filter = filter
        super().__init__(*expressions, **extra)

    def get_source_fields(self):
        # Don't return the filter expression since it's not a source field.
        return [e._output_field_or_none for e in super().get_source_expressions()]

    def get_source_expressions(self):
        source_expressions = super().get_source_expressions()
        if self.filter:
            return source_expressions + [self.filter]
        return source_expressions

    def set_source_expressions(self, exprs):
        self.filter = self.filter and exprs.pop()
        return super().set_source_expressions(exprs)

    def resolve_expression(self, query=None, allow_joins=True, reuse=None, summarize=False, for_save=False):
        # Aggregates are not allowed in UPDATE queries, so ignore for_save
        c = super().resolve_expression(query, allow_joins, reuse, summarize)
        c.filter = c.filter and c.filter.resolve_expression(query, allow_joins, reuse, summarize)
        if not summarize:
            # Call Aggregate.get_source_expressions() to avoid
            # returning self.filter and including that in this loop.
            expressions = super(Aggregate, c).get_source_expressions()
            for index, expr in enumerate(expressions):
                if expr.contains_aggregate:
                    before_resolved = self.get_source_expressions()[index]
                    name = before_resolved.name if hasattr(before_resolved, 'name') else repr(before_resolved)
                    raise FieldError("Cannot compute %s('%s'): '%s' is an aggregate" % (c.name, name, name))
        return c

    @property
    def default_alias(self):
        expressions = self.get_source_expressions()
        if len(expressions) == 1 and hasattr(expressions[0], 'name'):
            return '%s__%s' % (expressions[0].name, self.name.lower())
        raise TypeError("Complex expressions require an alias")

    def get_group_by_cols(self, alias=None):
        return []

    def as_sql(self, compiler, connection, **extra_context):
        extra_context['distinct'] = 'DISTINCT ' if self.distinct else ''
        if self.filter:
            if connection.features.supports_aggregate_filter_clause:
                filter_sql, filter_params = self.filter.as_sql(compiler, connection)
                template = self.filter_template % extra_context.get('template', self.template)
                sql, params = super().as_sql(
                    compiler, connection, template=template, filter=filter_sql,
                    **extra_context
                )
                return sql, params + filter_params
            else:
                copy = self.copy()
                copy.filter = None
                source_expressions = copy.get_source_expressions()
                condition = When(self.filter, then=source_expressions[0])
                copy.set_source_expressions([Case(condition)] + source_expressions[1:])
                return super(Aggregate, copy).as_sql(compiler, connection, **extra_context)
        return super().as_sql(compiler, connection, **extra_context)

    def _get_repr_options(self):
        options = super()._get_repr_options()
        if self.distinct:
            options['distinct'] = self.distinct
        if self.filter:
            options['filter'] = self.filter
        return options

修改

class GroupConcat(models.Aggregate):
    function = 'GROUP_CONCAT'
    template = '%(function)s(%(distinct)s%(expressions)s%(ordering)s%(separator)s)'
    allow_distinct = True

    def __init__(self, expression, distinct=True, ordering=None, separator=',', **extra):
        super(GroupConcat, self).__init__(
            expression,
            distinct='DISTINCT ' if distinct else '',
            ordering=' ORDER BY %s' % ordering if ordering is not None else '',
            separator=' SEPARATOR "%s"' % separator,
            output_field=models.CharField(),
            **extra
        )

queryset

queryset = Student.objects.annotate(n=F('name')).values('n', 'age').annotate(
    c=Count('pk'), a=Avg('age'), id=GroupConcat('pk')
)

datetime 日期分组

datetime 天、小时、分钟、秒分组

from django.db.models.functions import TruncMonth, TruncWeek, TruncYear, TruncDay
Student.objects.annotate(day=TruncDay('start_at')).values('day').annotate(c=Count('pk'))

sql

SELECT
	django_datetime_trunc ( 'day', "student"."start_at", 'UTC', 'UTC' ) AS "day",
	COUNT( "student"."id" ) AS "c" 
FROM
	"student" 
GROUP BY
	django_datetime_trunc ( 'day', "student"."start_at", 'UTC', 'UTC' )