参考文献

按日期分组统计

  • 按月统计

    1
    2
    3
    4
    5
    6
    7
    SELECT
    date_format( create_time, '%Y-%m' ) date,
    count(*) num
    FROM
    table_name
    GROUP BY
    date_format( create_time, '%Y-%m' );
  • 按天统计

    1
    2
    3
    4
    5
    6
    7
    SELECT
    date_format( create_time, '%Y-%m-%d' ) date,
    count(*) num
    FROM
    table_name
    GROUP BY
    date_format( create_time, '%Y-%m-%d' );

MySQL查询日期数据

  • 今天

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE to_days(时间字段名) = to_days(now());
  • 昨天

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE TO_DAYS( NOW( ) ) - TO_DAYS( 时间字段名) <= 1
  • 最近7天

    1
    2
    3
    SELECT	* 
    FROM 表名
    WHERE DATE_SUB( CURDATE(), INTERVAL 7 DAY ) <= date( 时间字段名) );
  • 近30天

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE DATE_SUB(CURDATE(), INTERVAL 30 DAY) <= date(时间字段名)
  • 本月

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE DATE_FORMAT( 时间字段名, '%Y%m' ) = DATE_FORMAT( CURDATE( ) , '%Y%m' )
  • 上一月

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE PERIOD_DIFF( date_format( now( ) , '%Y%m' ) , date_format( 时间字段名, '%Y%m' ) ) =1
  • 本季度

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE QUARTER ( 时间字段名 )= QUARTER (now())
  • 上季度

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE QUARTER(时间字段名) = QUARTER(DATE_SUB(now(),interval 1 QUARTER));
  • 本年

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE YEAR ( 时间字段名 )= YEAR (NOW());
  • 上年

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE YEAR ( 时间字段名 )= YEAR (DATE_SUB(now(),interval 1 YEAR);
  • 当前这周

    1
    2
    3
    SELECT *
    FROM 表名
    WHERE YEARWEEK(date_format(时间字段名,'%Y-%m-%d')) = YEARWEEK(now());
  • 上周

    1
    2
    3
    SELECT *
    FROM 表名
    WHERE YEARWEEK(date_format(时间字段名,'%Y-%m-%d')) = YEARWEEK(now()) - 1;
  • 距离当前N个月的数据

    1
    2
    3
    SELECT * 
    FROM 表名
    WHERE 时间字段名 BETWEEN date_sub( now(), INTERVAL N MONTH ) AND now();

自连接

  1. 自连接经常和非等值连接结合起来使用.

  2. 自连接和GROUP BY结合使用可以生成递归集合.

  3. 将自连接看作不同表之间的连接更容易理解.

  4. 应把表看作行的集合,用面向集合的方法来思考.

  5. 自连接的性能开销更大,应尽量给用于连接的列建立索引

删除重复行

1
2
3
4
5
6
7
8
9
mysql> select * from user;
+----+--------------+------+---------+
| id | email | name | version |
+----+--------------+------+---------+
| 1 | 123@test.com | 123 | 1 |
| 2 | 123@test.com | 123 | 1 |
| 3 | 123@test.com | 123 | 1 |
+----+--------------+------+---------+
3 rows in set (0.00 sec)
1
2
3
4
5
6
mysql> DELETE
-> FROM
-> `user` as u1
-> WHERE
-> id < ( SELECT max( u2.id ) id FROM `user` as u2 WHERE u1.`name` = u2.`name` AND u1.email = u2.email );
ERROR 1093 (HY000): You can't specify target table 'u1' for update in FROM clause
  • ERROR 1093: 不能先SELECT出同一表中的某些值,再UPDATE这个表(同一语句中)
1
2
3
4
5
6
7
8
9
10
11
12
13
DELETE 
FROM
`user` AS u1
WHERE id < (SELECT id
FROM (SELECT max( u2.id ) id FROM `user` AS u2 WHERE u1.`name` = u2.`name` AND u1.email = u2.email ) AS temp );

mysql> select * from user;
+----+--------------+------+---------+
| id | email | name | version |
+----+--------------+------+---------+
| 3 | 123@test.com | 123 | 1 |
+----+--------------+------+---------+
1 row in set (0.00 sec)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
DELETE 
FROM
`user` AS u1
WHERE
EXISTS (
SELECT
*
FROM
( SELECT * FROM `user` u2 WHERE u1.`name` = u2.`name` AND u1.email = u2.email AND u1.id < u2.id ) AS temp );

mysql> select * from user;
+----+--------------+------+---------+
| id | email | name | version |
+----+--------------+------+---------+
| 3 | 123@test.com | 123 | 1 |
+----+--------------+------+---------+
1 row in set (0.00 sec)
1
2
3
4
5
6
-- 删除重复行的高效SQL语句:通过NOT IN求补集
DELETE
FROM
goods
WHERE
id NOT IN ( SELECT id FROM ( SELECT max( id ) id FROM goods GROUP BY NAME, price ) tmp );

查找局部不一致的列

1
2
3
4
5
6
7
8
9
10
11
12
mysql> select * from Addresses;
+--------------+-----------+----------------------------------+
| name | family_id | address |
+--------------+-----------+----------------------------------+
| 前田义明 | 100 | 东京都港区虎之门3-2-29 |
| 前田由美 | 100 | 东京都港区虎之门3-2-92 |
| 加藤胜 | 200 | 东京都新宿区西新宿2-8-1 |
| 加藤茶 | 200 | 东京都新宿区西新宿2-8-1 |
| 华生 | 400 | 贝克街221B |
| 福尔摩斯 | 300 | 贝克街221B |
+--------------+-----------+----------------------------------+
6 rows in set (0.00 sec)
1
2
3
4
5
6
7
8
SELECT DISTINCT
a1.NAME,a1.address
FROM
Addresses a1,
Addresses a2
WHERE
a1.family_id = a2.family_id
AND a1.address <> a2.address;

排序

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
create table goods(
name varchar(10) ,
price int
);

INSERT INTO `goods` (`name`, `price`) VALUES ('pg', 50);
INSERT INTO `goods` (`name`, `price`) VALUES ('jz',100);
INSERT INTO `goods` (`name`, `price`) VALUES ('pt', 50);
INSERT INTO `goods` (`name`, `price`) VALUES ('xg', 80);
INSERT INTO `goods` (`name`, `price`) VALUES ('nm', 30);
INSERT INTO `goods` (`name`, `price`) VALUES ('xj', 50);
mysql> select * from goods;
+------+-------+
| name | price |
+------+-------+
| pg | 50 |
| pg | 50 |
| jz | 100 |
| pt | 50 |
| xg | 80 |
| nm | 30 |
| xj | 50 |
+------+-------+
7 rows in set (0.00 sec)

-- 排序: 使用窗口函数
SELECT `NAME`,
price,
RANK() over ( ORDER BY price DESC ) AS rank_1,
DENSE_RANK() over ( ORDER BY price DESC ) AS rank_2
FROM
goods;

+------+-------+--------+--------+
| NAME | price | rank_1 | rank_2 |
+------+-------+--------+--------+
| jz | 100 | 1 | 1 |
| xg | 80 | 2 | 2 |
| pg | 50 | 3 | 3 |
| pg | 50 | 3 | 3 |
| pt | 50 | 3 | 3 |
| xj | 50 | 3 | 3 |
| nm | 30 | 7 | 4 |
+------+-------+--------+--------+
7 rows in set (0.01 sec)

-- 排序从 1开始.如果已出现相同位次,则跳过之后的位次
SELECT P1.name,
P1.price,
(SELECT COUNT(P2.price)
FROM goods P2
WHERE P2.price > P1.price) + 1 AS rank_1
FROM goods P1
ORDER BY rank_1;
+------+-------+--------+
| name | price | rank_1 |
+------+-------+--------+
| jz | 100 | 1 |
| xg | 80 | 2 |
| pg | 50 | 3 |
| pg | 50 | 3 |
| pt | 50 | 3 |
| xj | 50 | 3 |
| nm | 30 | 7 |
+------+-------+--------+
7 rows in set (0.01 sec)

SELECT P1.name,
P1.price,
(SELECT COUNT(DISTINCT P2.price)
FROM goods P2
WHERE P2.price > P1.price) + 1 AS rank_1
FROM goods P1
ORDER BY rank_1;
+------+-------+--------+
| name | price | rank_1 |
+------+-------+--------+
| jz | 100 | 1 |
| xg | 80 | 2 |
| pg | 50 | 3 |
| pg | 50 | 3 |
| pt | 50 | 3 |
| xj | 50 | 3 |
| nm | 30 | 4 |
+------+-------+--------+
7 rows in set (0.00 sec)

SELECT P1.name,
MAX(P1.price) AS price,
COUNT(P2.name) +1 AS rank_1
FROM goods P1 LEFT OUTER JOIN goods P2
ON P1.price < P2.price
GROUP BY P1.name
ORDER BY rank_1;

+------+-------+--------+
| name | price | rank_1 |
+------+-------+--------+
| jz | 100 | 1 |
| xg | 80 | 2 |
| pt | 50 | 3 |
| xj | 50 | 3 |
| pg | 50 | 5 |
| nm | 30 | 7 |
+------+-------+--------+
6 rows in set (0.00 sec)
  • 注意 的地方,与多表之间进行的普通连接相比,自连接的性能开销更大(特别是与非等值连接结合使用的时候),因此用于自连接的列推荐使用主键或者在相关列上建立索引

NULL

SQL 三值逻辑

  1. NULL不是值.

  2. 因为NULL不是值,所以不能对其使用谓词

  3. NULL使用谓词后的结果是unknown.

  4. unknown参与到逻辑运算时,SQL 的运行会和预想的不一样.

  5. 按步骤追踪 SQL 的执行过程能有效应对 4 中的情况.

NOT INNOT EXISTS 不是等价的

HAVING

  • HAVING子句是可以单独使用的(相当于对空字段进行了GROUP BY操作).

判断序列是否连续

1
2
3
4
5
6
SELECT
'存在缺失的编号' AS gap
FROM
tab_name
HAVING
count(*) <> max( seq );
  • 若查询结果有1行,则表示序列不连续;如查询结果为空,则表示序列连续;

查询缺失编号的最小值

1
2
3
4
5
6
SELECT
min( seq + 1 ) AS gap
FROM
tab_name
WHERE
( seq + 1 ) NOT IN ( SELECT seq FROM tab_name );

HAVING子句进行子查询: 求众数

  • 众数(Mode)是指在统计分布上具有明显集中趋势点的数值,代表数据的一般水平. 也是一组数据中出现次数最多的数值,有时众数在一组数中有好几个.用M表示.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
-- 求众数的 SQL语句 (1):使用谓词 
SELECT income, COUNT(*) AS cnt
FROM Graduates
GROUP BY income
HAVING COUNT(*) >= ALL ( SELECT COUNT(*)
FROM Graduates
   GROUP BY income);
  
--求众数的 SQL语句 (2):使用极值函数
SELECT income, COUNT(*) AS cnt
FROM Graduates
GROUP BY income
HAVING COUNT(*) >= ( SELECT MAX(cnt)
FROM ( SELECT COUNT(*) AS cnt
FROM Graduates
GROUP BY income) TMP ) ;  

HAVING 子句进行自连接: 求中位数

  • 中位数指的是将集合中的元素按升序排列后恰好位于正中间的元素. 如果集合的元素个数为偶数,则取中间两个元素的平均值作为中位数.
1
2
3
4
5
6
7
8
9
10
11
-- 求中位数的SQL语句: 在HAVING子句中使用非等值自连接
SELECT
AVG( DISTINCT income )
FROM
(SELECT T1.income
FROM
Graduates T1,Graduates T2
GROUP BY
T1.income
HAVING SUM( CASE WHEN T2.income >= T1.income THEN 1 ELSE 0 END ) >= COUNT(*) / 2 -- S1的条件
AND SUM( CASE WHEN T2.income <= T1.income THEN 1 ELSE 0 END ) >= COUNT(*) / 2 ) TMP;-- S2的条件

查询不包含NULL的集合

  • COUNT函数的使用方法有 COUNT(*)和 COUNT(列名)两种,它们的区别有两个:第一个是性能上的区别;第二个是 COUNT(*)可以用于NULL,而 COUNT(列名 )与其他聚合函数一样,要先排除掉 NULL的行再进行统计. 第二个区别也可以这么理解:COUNT(*)查询的是所有行的数目,而 COUNT(列名 )查询的则不一定是.
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
mysql> select * from t;
+---+------+
| a | b |
+---+------+
| 1 | NULL |
| 2 | NULL |
| 4 | NULL |
+---+------+
3 rows in set (0.00 sec)
mysql> select count(*),count(b) from t;
+----------+----------+
| count(*) | count(b) |
+----------+----------+
| 3 | 0 |
+----------+----------+
1 row in set (0.00 sec)

查询用户最近一次生日

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
SELECT
`NAME`,
birthday,
IF
( cur > today, cur, next ) AS birth_day
FROM
(
SELECT
`NAME`,
birthday,
today,
DATE_ADD( cur, INTERVAL IF ( DAY ( birthday )= 29 && DAY ( cur )= 28, 1, 0 ) DAY ) AS cur,
DATE_ADD( next, INTERVAL IF ( DAY ( birthday )= 29 && DAY ( cur )= 28, 1, 0 ) DAY ) AS next
FROM
(
SELECT
`NAME`,
birthday,
today,
DATE_ADD( birthday, INTERVAL diff YEAR ) AS cur,
DATE_ADD( birthday, INTERVAL diff + 1 YEAR ) AS next
FROM
( SELECT patient_name AS `NAME`, birth_date AS birthday, ( YEAR ( NOW())- YEAR ( birth_date )) AS diff, NOW() AS today FROM user_info ) AS a
) AS b
) AS c

存储过程

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
CREATE TABLE nums ( a INT UNSIGNED NOT NULL PRIMARY KEY ) ENGINE = InnoDB;
CREATE PROCEDURE pCreateNums ( cnt INT UNSIGNED ) BEGIN
DECLARE
s INT UNSIGNED DEFAULT 1;
TRUNCATE TABLE nums;
WHILE
s <= cnt DO
BEGIN
INSERT INTO nums SELECT
s;

SET s = s + 1;

END;

END WHILE;

END;
CALL pCreateNums ( 100000 );

--- 快速生成数字辅助表
CREATE PROCEDURE pFastCreateNums ( cnt INT UNSIGNED ) BEGIN
DECLARE
s INT UNSIGNED DEFAULT 1;
TRUNCATE TABLE nums;
INSERT INTO nums SELECT
s;
WHILE
s * 2 <= cnt DO
BEGIN
INSERT INTO nums SELECT
a + s
FROM
nums;

SET s = s * 2;

END;

END WHILE;

END;
CALL pFastCreateNums ( 200000 );
--- 生成某个时间范围内的时间维度表
CREATE PROCEDURE pCreateDimTime ( START DATE, END DATE ) BEGIN
SELECT
DATE_ADD( START, INTERVAL a - 1 DAY )
FROM
nums
WHERE
a <= DATEDIFF( END, START )+ 1;

END;
CALL pCreateDimTime ( '2023-01-01', '2024-01-01' );

示例

统计一张表中每条记录所占字节的平均值

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
mysql> desc orders;
+-----------------+-------------+------+-----+---------+-------+
| Field | Type | Null | Key | Default | Extra |
+-----------------+-------------+------+-----+---------+-------+
| o_orderkey | int | NO | PRI | NULL | |
| o_custkey | int | YES | MUL | NULL | |
| o_orderstatus | char(1) | YES | | NULL | |
| o_totalprice | double | YES | | NULL | |
| o_orderDATE | date | YES | MUL | NULL | |
| o_orderpriority | char(15) | YES | | NULL | |
| o_clerk | char(15) | YES | | NULL | |
| o_shippriority | int | YES | | NULL | |
| o_comment | varchar(79) | YES | | NULL | |
+-----------------+-------------+------+-----+---------+-------+
9 rows in set (0.02 sec)
1
2
3
4
5
6
SELECT
ROUND( AVG( `ROW` ), 2 )
FROM
( SELECT ( 4+4+1+8+8+ LENGTH( o_orderpriority )+ LENGTH( o_clerk )+ 4+ LENGTH( o_comment )) AS `ROW` FROM orders LIMIT 15000 ) a;

desc orders;