Skip to content

SQL 分析

约 458 字大约 2 分钟

2025-09-03

基本顺序

SELECT
FROM
WHERE
GROUP BY
HAVING
ORDER BY

分析顺序

FROM -> WHERE -> GROUP BY -> SELECT -> ORDER

以一个复杂的 SQL 为例

SELECT
    c.region AS region,
    cat.category_name AS category,
    SUM(oi.quantity * oi.price) AS total_sales,
    COUNT(DISTINCT o.order_id) AS order_count,
    ROUND(SUM(oi.quantity * oi.price) / COUNT(DISTINCT o.order_id), 2) AS avg_order_value,
    RANK() OVER (PARTITION BY c.region ORDER BY SUM(oi.quantity * oi.price) DESC) AS sales_rank_in_region,
    ROUND(
        (SUM(oi.quantity * oi.price) / 
        SUM(SUM(oi.quantity * oi.price)) OVER (PARTITION BY c.region)) * 100, 
    2) AS sales_percentage,
    ROUND(
        (SUM(oi.quantity * oi.price) - 
        LAG(SUM(oi.quantity * oi.price), 1) OVER (
            PARTITION BY c.region, cat.category_id 
            ORDER BY DATE_FORMAT(o.order_date, '%Y-%m')
        )) / 
        NULLIF(LAG(SUM(oi.quantity * oi.price), 1) OVER (
            PARTITION BY c.region, cat.category_id 
            ORDER BY DATE_FORMAT(o.order_date, '%Y-%m')
        ), 0) * 100, 
    3) AS month_over_month_growth
FROM
    orders o
JOIN
    order_items oi ON o.order_id = oi.order_id
JOIN
    products p ON oi.product_id = p.product_id
JOIN
    categories cat ON p.category_id = cat.category_id
JOIN
    customers c ON o.customer_id = c.customer_id
WHERE
    o.order_date >= '2023-01-01' AND o.order_date < '2023-04-01'
    AND c.region IN ('华东', '华北', '华南')
GROUP BY
    c.region,
    cat.category_name,
    DATE_FORMAT(o.order_date, '%Y-%m')
ORDER BY
    c.region,
    sales_rank_in_region;

首先分析 FROM

FROM
    orders o
JOIN
    order_items oi ON o.order_id = oi.order_id
JOIN
    products p ON oi.product_id = p.product_id
JOIN
    categories cat ON p.category_id = cat.category_id
JOIN
    customers c ON o.customer_id = c.customer_id

将 5 个表进行拼接,得到第一个中间表,在这里称为表一

WHERE

WHERE
    o.order_date >= '2023-01-01' AND o.order_date < '2023-04-01'
    AND c.region IN ('华东', '华北', '华南')

根据 WHERE条件对表一进行行级过滤,得到表二

GROUP BY

GROUP BY
    c.region,
    cat.category_name,
    DATE_FORMAT(o.order_date, '%Y-%m')

表二按照GROUP BY条件进行字段分组,得到表三

SELECT

SELECT
    c.region AS region,
    cat.category_name AS category,
    SUM(oi.quantity * oi.price) AS total_sales,
    COUNT(DISTINCT o.order_id) AS order_count,
    ROUND(SUM(oi.quantity * oi.price) / COUNT(DISTINCT o.order_id), 2) AS avg_order_value,
    RANK() OVER (PARTITION BY c.region ORDER BY SUM(oi.quantity * oi.price) DESC) AS sales_rank_in_region,
    ROUND(
        (SUM(oi.quantity * oi.price) / 
        SUM(SUM(oi.quantity * oi.price)) OVER (PARTITION BY c.region)) * 100, 
    2) AS sales_percentage,
    ROUND(
        (SUM(oi.quantity * oi.price) - 
        LAG(SUM(oi.quantity * oi.price), 1) OVER (
            PARTITION BY c.region, cat.category_id 
            ORDER BY DATE_FORMAT(o.order_date, '%Y-%m')
        )) / 
        NULLIF(LAG(SUM(oi.quantity * oi.price), 1) OVER (
            PARTITION BY c.region, cat.category_id 
            ORDER BY DATE_FORMAT(o.order_date, '%Y-%m')
        ), 0) * 100, 
    3) AS month_over_month_growth

表四

ORDER BY

ORDER BY
    c.region,
    sales_rank_in_region;

表四ORDER BY进行排序,得到最终的结果表