-- 需求:
-- 需求一:
-- 统计视频观看数Top10
SELECT
videoId,
views
FROM
gulivideo_orc
ORDER BY views DESC
LIMIT 10
+--------------+-----------+
| videoid | views |
+--------------+-----------+
| dMH0bHeiRNg | 42513417 |
| 0XxI-hvPRRA | 20282464 |
| 1dmVU08zVpA | 16087899 |
| RB-wUgnyGv0 | 15712924 |
| QjA5faZF1A8 | 15256922 |
| -_CSo1gOd48 | 13199833 |
| 49IDp76kjPw | 11970018 |
| tYnn51C3X_w | 11823701 |
| pv5zWaTEVkI | 11672017 |
| D2kJZOfq7zk | 11184051 |
+--------------+-----------+
-- 需求二:
-- 统计视频类别热度Top10 (热度: 通过类别下视频的个数来衡量.)
-- 1. 炸开每个视频的类别
SELECT
videoId ,
category_name
FROM
gulivideo_orc
lateral VIEW explode(category) gulivideo_orc_tmp AS category_name ==>t1
-- 2. 按照视频类别分组,求组内的视频个数, 按照视频个数倒叙排序,取top10
SELECT
t1.category_name,
COUNT(t1.videoId) video_count
FROM
t1
GROUP BY t1.category_name
ORDER BY video_count DESC
LIMIT 10
-- 3. 组装
SELECT
t1.category_name,
COUNT(t1.videoId) video_count
FROM
(SELECT
videoId ,
category_name
FROM
gulivideo_orc
lateral VIEW explode(category) gulivideo_orc_tmp AS category_name)t1
GROUP BY t1.category_name
ORDER BY video_count DESC
LIMIT 10
+-------------------+--------------+
| t1.category_name | video_count |
+-------------------+--------------+
| Music | 179049 |
| Entertainment | 127674 |
| Comedy | 87818 |
| Film | 73293 |
| Animation | 73293 |
| Sports | 67329 |
| Games | 59817 |
| Gadgets | 59817 |
| Blogs | 48890 |
| People | 48890 |
+-------------------+--------------+
-- 需求三:
-- 统计出视频观看数最高的20个视频的所属类别以及类别包含Top20视频的个数
-- 1. 统计出视频观看数最高的20个视频的所属类别
SELECT
videoId,
views ,
category
FROM
gulivideo_orc
ORDER BY views DESC
LIMIT 20 ==>t1
+--------------+-----------+---------------------+
| videoid | views | category |
+--------------+-----------+---------------------+
| dMH0bHeiRNg | 42513417 | ["Comedy"] |
| 0XxI-hvPRRA | 20282464 | ["Comedy"] |
| 1dmVU08zVpA | 16087899 | ["Entertainment"] |
| RB-wUgnyGv0 | 15712924 | ["Entertainment"] |
| QjA5faZF1A8 | 15256922 | ["Music"] |
| -_CSo1gOd48 | 13199833 | ["People","Blogs"] |
| 49IDp76kjPw | 11970018 | ["Comedy"] |
| tYnn51C3X_w | 11823701 | ["Music"] |
| pv5zWaTEVkI | 11672017 | ["Music"] |
| D2kJZOfq7zk | 11184051 | ["People","Blogs"] |
| vr3x_RRJdd4 | 10786529 | ["Entertainment"] |
| lsO6D1rwrKc | 10334975 | ["Entertainment"] |
| 5P6UU6m3cqk | 10107491 | ["Comedy"] |
| 8bbTtPL1jRs | 9579911 | ["Music"] |
| _BuRwH59oAo | 9566609 | ["Comedy"] |
| aRNzWyD7C9o | 8825788 | ["UNA"] |
| UMf40daefsI | 7533070 | ["Music"] |
| ixsZy2425eY | 7456875 | ["Entertainment"] |
| MNxwAU_xAMk | 7066676 | ["Comedy"] |
| RUCZJVJ_M8o | 6952767 | ["Entertainment"] |
+--------------+-----------+---------------------+
-- 2. 炸开视频类别
SELECT
t1.videoId,
t1.views,
category_name
FROM
t1
lateral VIEW explode(t1.category) t1_tmp AS category_name ==>t2
-- 组合
SELECT
t1.videoId,
t1.views,
category_name
FROM
(SELECT
videoId,
views ,
category
FROM
gulivideo_orc
ORDER BY views DESC
LIMIT 20) t1
lateral VIEW explode(t1.category) t1_tmp AS category_name
+--------------+-----------+----------------+
| t1.videoid | t1.views | category_name |
+--------------+-----------+----------------+
| dMH0bHeiRNg | 42513417 | Comedy |
| 0XxI-hvPRRA | 20282464 | Comedy |
| 1dmVU08zVpA | 16087899 | Entertainment |
| RB-wUgnyGv0 | 15712924 | Entertainment |
| QjA5faZF1A8 | 15256922 | Music |
| -_CSo1gOd48 | 13199833 | People |
| -_CSo1gOd48 | 13199833 | Blogs |
| 49IDp76kjPw | 11970018 | Comedy |
| tYnn51C3X_w | 11823701 | Music |
| pv5zWaTEVkI | 11672017 | Music |
| D2kJZOfq7zk | 11184051 | People |
| D2kJZOfq7zk | 11184051 | Blogs |
| vr3x_RRJdd4 | 10786529 | Entertainment |
| lsO6D1rwrKc | 10334975 | Entertainment |
| 5P6UU6m3cqk | 10107491 | Comedy |
| 8bbTtPL1jRs | 9579911 | Music |
| _BuRwH59oAo | 9566609 | Comedy |
| aRNzWyD7C9o | 8825788 | UNA |
| UMf40daefsI | 7533070 | Music |
| ixsZy2425eY | 7456875 | Entertainment |
| MNxwAU_xAMk | 7066676 | Comedy |
| RUCZJVJ_M8o | 6952767 | Entertainment |
+--------------+-----------+----------------+
-- 3. 按照类别分组,求组内count
SELECT
t2.category_name,
COUNT(t2.videoId) video_count
FROM
t2
GROUP BY t2.category_name
-- 4. 组合
SELECT
t2.category_name,
COUNT(t2.videoId) video_count
FROM
(SELECT
t1.videoId,
t1.views,
category_name
FROM
(SELECT
videoId,
views ,
category
FROM
gulivideo_orc
ORDER BY views DESC
LIMIT 20) t1
lateral VIEW explode(t1.category) t1_tmp AS category_name)t2
GROUP BY t2.category_name
+-------------------+--------------+
| t2.category_name | video_count |
+-------------------+--------------+
| Blogs | 2 |
| Comedy | 6 |
| Entertainment | 6 |
| Music | 5 |
| People | 2 |
| UNA | 1 |
+-------------------+--------------+
-- 需求四:
-- 统计视频观看数Top50所关联视频的所属类别Rank
-- 1. 统计视频观看数Top50所关联视频
SELECT
videoId,
relatedId,
views
FROM
gulivideo_orc
ORDER BY views DESC
LIMIT 50 ==>t1
-- 2. 炸开关联视频
SELECT
relatedId_id
FROM
t1
lateral VIEW explode(t1.relatedId) t1_tmp AS relatedId_id ==>t2
-- 3. join gulivideo_orc表,求出每个视频的类别
SELECT
t2.relatedId_id,
t3.category
FROM
t2 JOIN gulivideo_orc t3
ON t2.relatedId_id = t3.videoId ==>t4
-- 4. 炸开视频的类别
SELECT
category_name
FROM
t4
lateral VIEW explode(t4.category) t4_tmp AS category_name ==>t5
-- 5. 按照category_name分组, 求每个列别出现多少次
SELECT
t5.category_name,
COUNT(t5.category_name) category_count
FROM
t5
GROUP BY t5.category_name ==>t6
-- 6. 求排名
SELECT
t6.category_name,
t6.category_count,
rank() over(ORDER BY t6.category_count DESC ) category_rk
FROM
t6
-- 组合:
SELECT
t6.category_name,
t6.category_count,
rank() over(ORDER BY t6.category_count DESC ) category_rk
FROM
(SELECT
t5.category_name,
COUNT(t5.category_name) category_count
FROM
(SELECT
category_name
FROM
(SELECT
t2.relatedId_id,
t3.category
FROM
(SELECT
relatedId_id
FROM
(SELECT
videoId,
relatedId,
views
FROM
gulivideo_orc
ORDER BY views DESC
LIMIT 50)t1
lateral VIEW explode(t1.relatedId) t1_tmp AS relatedId_id)t2 JOIN gulivideo_orc t3
ON t2.relatedId_id = t3.videoId)t4
lateral VIEW explode(t4.category) t4_tmp AS category_name)t5
GROUP BY t5.category_name)t6
+-------------------+--------------------+--------------+
| t6.category_name | t6.category_count | category_rk |
+-------------------+--------------------+--------------+
| Comedy | 237 | 1 |
| Entertainment | 216 | 2 |
| Music | 195 | 3 |
| People | 51 | 4 |
| Blogs | 51 | 4 |
| Film | 47 | 6 |
| Animation | 47 | 6 |
| News | 24 | 8 |
| Politics | 24 | 8 |
| Gadgets | 22 | 10 |
| Games | 22 | 10 |
| Sports | 19 | 12 |
| Howto | 14 | 13 |
| DIY | 14 | 13 |
| UNA | 13 | 15 |
| Travel | 12 | 16 |
| Places | 12 | 16 |
| Pets | 11 | 18 |
| Animals | 11 | 18 |
| Autos | 4 | 20 |
| Vehicles | 4 | 20 |
+-------------------+--------------------+--------------+
-- 需求五
-- 统计每个类别中的视频热度Top10,以Music为例
-- 1.炸开每个视频的类别
SELECT
videoId,
views,
category_name
FROM
gulivideo_orc
lateral VIEW explode(category) gulivideo_orc_tmp AS category_name ==>t1
-- 2. 过滤出类别为music的视频,按照views倒叙取10
SELECT
t1.videoId,
t1.views,
t1.category_name
FROM
t1
WHERE t1. category_name = 'Music'
ORDER BY t1.views DESC
LIMIT 10
-- 组合
SELECT
t1.videoId,
t1.views,
t1.category_name
FROM
(SELECT
videoId,
views,
category_name
FROM
gulivideo_orc
lateral VIEW explode(category) gulivideo_orc_tmp AS category_name)t1
WHERE t1. category_name = 'Music'
ORDER BY t1.views DESC
LIMIT 10
+--------------+-----------+-------------------+
| t1.videoid | t1.views | t1.category_name |
+--------------+-----------+-------------------+
| QjA5faZF1A8 | 15256922 | Music |
| tYnn51C3X_w | 11823701 | Music |
| pv5zWaTEVkI | 11672017 | Music |
| 8bbTtPL1jRs | 9579911 | Music |
| UMf40daefsI | 7533070 | Music |
| -xEzGIuY7kw | 6946033 | Music |
| d6C0bNDqf3Y | 6935578 | Music |
| HSoVKUVOnfQ | 6193057 | Music |
| 3URfWTEPmtE | 5581171 | Music |
| thtmaZnxk_0 | 5142238 | Music |
+--------------+-----------+-------------------+
-- 需求六
-- 统计每个类别视频观看数Top10
-- 1. 炸开每个视频的类别
SELECT
videoId,
views,
category_name
FROM
gulivideo_orc
lateral VIEW explode(category) gulivideo_orc_tmp AS category_name ==>t1
-- 2.开窗, 按照类别分区,views倒叙, 做排名
SELECT
t1.videoId,
t1.views,
t1.category_name,
rank() over(PARTITION BY t1.category_name ORDER BY t1.views DESC ) rk
FROM
t1 ==>t2
-- 3.取每个类别视频的top10
SELECT
t2.videoId,
t2.views,
t2.category_name,
t2.rk
FROM
t2
WHERE t2.rk <=10
-- 组合
SELECT
t2.videoId,
t2.views,
t2.category_name,
t2.rk
FROM
(SELECT
t1.videoId,
t1.views,
t1.category_name,
rank() over(PARTITION BY t1.category_name ORDER BY t1.views DESC ) rk
FROM
(SELECT
videoId,
views,
category_name
FROM
gulivideo_orc
lateral VIEW explode(category) gulivideo_orc_tmp AS category_name)t1)t2
WHERE t2.rk <=10
+--------------+-----------+-------------------+--------+
| t2.videoid | t2.views | t2.category_name | t2.rk |
+--------------+-----------+-------------------+--------+
| 1dmVU08zVpA | 16087899 | Entertainment | 1 |
| RB-wUgnyGv0 | 15712924 | Entertainment | 2 |
| vr3x_RRJdd4 | 10786529 | Entertainment | 3 |
| lsO6D1rwrKc | 10334975 | Entertainment | 4 |
| ixsZy2425eY | 7456875 | Entertainment | 5 |
| RUCZJVJ_M8o | 6952767 | Entertainment | 6 |
| tFXLbXyXy6M | 5810013 | Entertainment | 7 |
| 7uwCEnDgd5o | 5280504 | Entertainment | 8 |
| 2KrdBUFeFtY | 4676195 | Entertainment | 9 |
| vD4OnHCRd_4 | 4230610 | Entertainment | 10 |
| hr23tpWX8lM | 4706030 | News | 1 |
| YgW7or1TuFk | 2899397 | News | 2 |
| nda_OSWeyn8 | 2817078 | News | 3 |
| 7SV2sfoPAY8 | 2803520 | News | 4 |
| HBa9wdOANHw | 2348709 | News | 5 |
| xDh_pvv1tUM | 2335060 | News | 6 |
| p_YMigZmUuk | 2326680 | News | 7 |
| QCVxQ_3Ejkg | 2318782 | News | 8 |
| a9WB_PXjTBo | 2310583 | News | 9 |
| qSM_3fyiaxM | 2291369 | News | 10 |
| sdUUx5FdySs | 5840839 | Film | 1 |
| 6B26asyGKDo | 5147533 | Film | 2 |
| H20dhY01Xjk | 3772116 | Film | 3 |
| 55YYaJIrmzo | 3356163 | Film | 4 |
| JzqumbhfxRo | 3230774 | Film | 5 |
| eAhfZUZiwSE | 3114215 | Film | 6 |
| h7svw0m-wO0 | 2866490 | Film | 7 |
| tAq3hWBlalU | 2830024 | Film | 8 |
| AJzU3NjDikY | 2569611 | Film | 9 |
| ElrldD02if0 | 2337238 | Film | 10 |
| aRNzWyD7C9o | 8825788 | UNA | 1 |
| jtExxsiLgPM | 5320895 | UNA | 2 |
| PxNNR4symuE | 4033376 | UNA | 3 |
| 8cjTSvvoddc | 3486368 | UNA | 4 |
| LIhbap3FlGc | 2849832 | UNA | 5 |
| lCSTULqmmYE | 2179562 | UNA | 6 |
| UyTxWvp8upM | 2106933 | UNA | 7 |
| y6oXEWowirI | 1666084 | UNA | 8 |
| _x2-AmY8FI8 | 1403113 | UNA | 9 |
| ICoDFooBXpU | 1376215 | UNA | 10 |
| -_CSo1gOd48 | 13199833 | Blogs | 1 |
| D2kJZOfq7zk | 11184051 | Blogs | 2 |
| pa_7P5AbUww | 5705136 | Blogs | 3 |
| f4B-r8KJhlE | 4937616 | Blogs | 4 |
| LB84A3zcmVo | 4866739 | Blogs | 5 |
| tXNquTYnyg0 | 3613323 | Blogs | 6 |
| EYppbbbSxjc | 2896562 | Blogs | 7 |
| LH7vrLlDZ6U | 2615359 | Blogs | 8 |
| bTV85fQhj0E | 2192656 | Blogs | 9 |
| eVFF98kNg8Q | 1813803 | Blogs | 10 |
| 2GWPOPSXGYI | 3660009 | Animals | 1 |
| xmsV9R8FsDA | 3164582 | Animals | 2 |
| 12PsUW-8ge4 | 3133523 | Animals | 3 |
| OeNggIGSKH8 | 2457750 | Animals | 4 |
| WofFb_eOxxA | 2075728 | Animals | 5 |
| AgEmZ39EtFk | 1999469 | Animals | 6 |
| a-gW3RbJd8U | 1836870 | Animals | 7 |
| 8CL2hetqpfg | 1646808 | Animals | 8 |
| QmroaYVD_so | 1645984 | Animals | 9 |
| Sg9x5mUjbH8 | 1527238 | Animals | 10 |
| RjrEQaG5jPM | 2803140 | Autos | 1 |
| cv157ZIInUk | 2773979 | Autos | 2 |
| Gyg9U1YaVk8 | 1832224 | Autos | 3 |
| 6GNB7xT3rNE | 1412497 | Autos | 4 |
| tth9krDtxII | 1347317 | Autos | 5 |
| 46LQd9dXFRU | 1262173 | Autos | 6 |
| pdiuDXwgrjQ | 1013697 | Autos | 7 |
| kY_cDpENQLE | 956665 | Autos | 8 |
| YtxfbxGz1u4 | 942604 | Autos | 9 |
| aCamHfJwSGU | 847442 | Autos | 10 |
| hut3VRL5XRE | 2684989 | DIY | 1 |
| YYTpb-QXV0k | 2492153 | DIY | 2 |
| Pf3z935R37E | 2096661 | DIY | 3 |
| Yd99gyE4jCk | 1918946 | DIY | 4 |
| koQFjKwVFB0 | 1757071 | DIY | 5 |
| f5Fg6KFcOsU | 1751817 | DIY | 6 |
| STQ3nhXuuEM | 1713974 | DIY | 7 |
| FtKuBKIaVvs | 1520774 | DIY | 8 |
| M0ODskdEPnQ | 1503351 | DIY | 9 |
| uFwCk4UPtlM | 1500110 | DIY | 10 |
| hr23tpWX8lM | 4706030 | Politics | 1 |
| YgW7or1TuFk | 2899397 | Politics | 2 |
| nda_OSWeyn8 | 2817078 | Politics | 3 |
| 7SV2sfoPAY8 | 2803520 | Politics | 4 |
| HBa9wdOANHw | 2348709 | Politics | 5 |
| xDh_pvv1tUM | 2335060 | Politics | 6 |
| p_YMigZmUuk | 2326680 | Politics | 7 |
| QCVxQ_3Ejkg | 2318782 | Politics | 8 |
| a9WB_PXjTBo | 2310583 | Politics | 9 |
| qSM_3fyiaxM | 2291369 | Politics | 10 |
| RjrEQaG5jPM | 2803140 | Vehicles | 1 |
| cv157ZIInUk | 2773979 | Vehicles | 2 |
| Gyg9U1YaVk8 | 1832224 | Vehicles | 3 |
| 6GNB7xT3rNE | 1412497 | Vehicles | 4 |
| tth9krDtxII | 1347317 | Vehicles | 5 |
| 46LQd9dXFRU | 1262173 | Vehicles | 6 |
| pdiuDXwgrjQ | 1013697 | Vehicles | 7 |
| kY_cDpENQLE | 956665 | Vehicles | 8 |
| YtxfbxGz1u4 | 942604 | Vehicles | 9 |
| aCamHfJwSGU | 847442 | Vehicles | 10 |
+--------------+-----------+-------------------+--------+
| t2.videoid | t2.views | t2.category_name | t2.rk |
+--------------+-----------+-------------------+--------+
| pFlcqWQVVuU | 3651600 | Gadgets | 1 |
| bcu8ZdJ2dQo | 2617568 | Gadgets | 2 |
| -G7h626wJwM | 2565170 | Gadgets | 3 |
| oMaTZFCLbq0 | 2554620 | Gadgets | 4 |
| GxSdKF5Fd38 | 2468395 | Gadgets | 5 |
| z1lj87UyvfY | 2373875 | Gadgets | 6 |
| KhCmfX_PQ7E | 1967929 | Gadgets | 7 |
| 2SVMFCZgvNM | 1813794 | Gadgets | 8 |
| gPutYwiiE0o | 1633482 | Gadgets | 9 |
| 7wt5FiZQrgM | 1399531 | Gadgets | 10 |
| QjA5faZF1A8 | 15256922 | Music | 1 |
| tYnn51C3X_w | 11823701 | Music | 2 |
| pv5zWaTEVkI | 11672017 | Music | 3 |
| 8bbTtPL1jRs | 9579911 | Music | 4 |
| UMf40daefsI | 7533070 | Music | 5 |
| -xEzGIuY7kw | 6946033 | Music | 6 |
| d6C0bNDqf3Y | 6935578 | Music | 7 |
| HSoVKUVOnfQ | 6193057 | Music | 8 |
| 3URfWTEPmtE | 5581171 | Music | 9 |
| thtmaZnxk_0 | 5142238 | Music | 10 |
| bNF_P281Uu4 | 5231539 | Travel | 1 |
| s5ipz_0uC_U | 1198840 | Travel | 2 |
| 6jJW7aSNCzU | 1143287 | Travel | 3 |
| dVRUBIyRAYk | 1000309 | Travel | 4 |
| lqbt6X4ZgEI | 921593 | Travel | 5 |
| RIH1I1doUI4 | 879577 | Travel | 6 |
| AlPqL7IUT6M | 845180 | Travel | 7 |
| _5QUdvUhCZc | 819974 | Travel | 8 |
| m9A_vxIOB-I | 677876 | Travel | 9 |
| CL6f3Cyh85w | 611786 | Travel | 10 |
| pFlcqWQVVuU | 3651600 | Games | 1 |
| bcu8ZdJ2dQo | 2617568 | Games | 2 |
| -G7h626wJwM | 2565170 | Games | 3 |
| oMaTZFCLbq0 | 2554620 | Games | 4 |
| GxSdKF5Fd38 | 2468395 | Games | 5 |
| z1lj87UyvfY | 2373875 | Games | 6 |
| KhCmfX_PQ7E | 1967929 | Games | 7 |
| 2SVMFCZgvNM | 1813794 | Games | 8 |
| gPutYwiiE0o | 1633482 | Games | 9 |
| 7wt5FiZQrgM | 1399531 | Games | 10 |
| sdUUx5FdySs | 5840839 | Animation | 1 |
| 6B26asyGKDo | 5147533 | Animation | 2 |
| H20dhY01Xjk | 3772116 | Animation | 3 |
| 55YYaJIrmzo | 3356163 | Animation | 4 |
| JzqumbhfxRo | 3230774 | Animation | 5 |
| eAhfZUZiwSE | 3114215 | Animation | 6 |
| h7svw0m-wO0 | 2866490 | Animation | 7 |
| tAq3hWBlalU | 2830024 | Animation | 8 |
| AJzU3NjDikY | 2569611 | Animation | 9 |
| ElrldD02if0 | 2337238 | Animation | 10 |
| dMH0bHeiRNg | 42513417 | Comedy | 1 |
| 0XxI-hvPRRA | 20282464 | Comedy | 2 |
| 49IDp76kjPw | 11970018 | Comedy | 3 |
| 5P6UU6m3cqk | 10107491 | Comedy | 4 |
| _BuRwH59oAo | 9566609 | Comedy | 5 |
| MNxwAU_xAMk | 7066676 | Comedy | 6 |
| pYak2F1hUYA | 6322117 | Comedy | 7 |
| h0zAlXr1UOs | 5826923 | Comedy | 8 |
| C8rjr4jmWd0 | 5587299 | Comedy | 9 |
| R4cQ3BoHFas | 5508079 | Comedy | 10 |
| Ugrlzm7fySE | 2867888 | Sports | 1 |
| q8t7iSGAKik | 2735003 | Sports | 2 |
| 7vL19q8yL54 | 2527713 | Sports | 3 |
| g3dXfFZ6SH0 | 2295871 | Sports | 4 |
| P-bWsOK-h98 | 2268107 | Sports | 5 |
| HD8f_Qgwc50 | 2165475 | Sports | 6 |
| qjWQNwv-GJ4 | 2132591 | Sports | 7 |
| eN0V-rJQSHE | 2124653 | Sports | 8 |
| fM38G1450Ew | 2052778 | Sports | 9 |
| 3PGzrfE8rJg | 2013466 | Sports | 10 |
| hut3VRL5XRE | 2684989 | Howto | 1 |
| YYTpb-QXV0k | 2492153 | Howto | 2 |
| Pf3z935R37E | 2096661 | Howto | 3 |
| Yd99gyE4jCk | 1918946 | Howto | 4 |
| koQFjKwVFB0 | 1757071 | Howto | 5 |
| f5Fg6KFcOsU | 1751817 | Howto | 6 |
| STQ3nhXuuEM | 1713974 | Howto | 7 |
| FtKuBKIaVvs | 1520774 | Howto | 8 |
| M0ODskdEPnQ | 1503351 | Howto | 9 |
| uFwCk4UPtlM | 1500110 | Howto | 10 |
| bNF_P281Uu4 | 5231539 | Places | 1 |
| s5ipz_0uC_U | 1198840 | Places | 2 |
| 6jJW7aSNCzU | 1143287 | Places | 3 |
| dVRUBIyRAYk | 1000309 | Places | 4 |
| lqbt6X4ZgEI | 921593 | Places | 5 |
| RIH1I1doUI4 | 879577 | Places | 6 |
| AlPqL7IUT6M | 845180 | Places | 7 |
| _5QUdvUhCZc | 819974 | Places | 8 |
| m9A_vxIOB-I | 677876 | Places | 9 |
| CL6f3Cyh85w | 611786 | Places | 10 |
| -_CSo1gOd48 | 13199833 | People | 1 |
| D2kJZOfq7zk | 11184051 | People | 2 |
| pa_7P5AbUww | 5705136 | People | 3 |
| f4B-r8KJhlE | 4937616 | People | 4 |
| LB84A3zcmVo | 4866739 | People | 5 |
| tXNquTYnyg0 | 3613323 | People | 6 |
| EYppbbbSxjc | 2896562 | People | 7 |
| LH7vrLlDZ6U | 2615359 | People | 8 |
| bTV85fQhj0E | 2192656 | People | 9 |
| eVFF98kNg8Q | 1813803 | People | 10 |
+--------------+-----------+-------------------+--------+
| t2.videoid | t2.views | t2.category_name | t2.rk |
+--------------+-----------+-------------------+--------+
| 2GWPOPSXGYI | 3660009 | Pets | 1 |
| xmsV9R8FsDA | 3164582 | Pets | 2 |
| 12PsUW-8ge4 | 3133523 | Pets | 3 |
| OeNggIGSKH8 | 2457750 | Pets | 4 |
| WofFb_eOxxA | 2075728 | Pets | 5 |
| AgEmZ39EtFk | 1999469 | Pets | 6 |
| a-gW3RbJd8U | 1836870 | Pets | 7 |
| 8CL2hetqpfg | 1646808 | Pets | 8 |
| QmroaYVD_so | 1645984 | Pets | 9 |
| Sg9x5mUjbH8 | 1527238 | Pets | 10 |
+--------------+-----------+-------------------+--------+
-- 需求七
-- 统计上传视频最多的用户Top10以及他们上传的视频观看次数在前20的视频
-- 1. 上传视频最多的用户Top10
SELECT
uploader
FROM
gulivideo_user_orc
ORDER BY videos DESC
LIMIT 10 ==>t1
+---------------------+
| uploader |
+---------------------+
| expertvillage |
| TourFactory |
| myHotelVideo |
| AlexanderRodchenko |
| VHTStudios |
| ephemeral8 |
| HSN |
| rattanakorn |
| Ruchaneewan |
| futifu |
+---------------------+
-- 2. 关联gulivideo_orc表,查出10个用户上传的所有的视频,views排序,取20
SELECT
t1.uploader,
t2.videoId,
t2.views
FROM
t1 JOIN gulivideo_orc t2
ON t1.uploader = t2.uploader
ORDER BY t2.views DESC
LIMIT 20
-- 组合
SELECT
t1.uploader,
t2.videoId,
t2.views
FROM
(SELECT
uploader
FROM
gulivideo_user_orc
ORDER BY videos DESC
LIMIT 10)t1 JOIN gulivideo_orc t2
ON t1.uploader = t2.uploader
ORDER BY t2.views DESC
LIMIT 100
+----------------+--------------+-----------+
| t1.uploader | t2.videoid | t2.views |
+----------------+--------------+-----------+
| expertvillage | -IxHBW0YpZw | 39059 |
| expertvillage | BU-fT5XI_8I | 29975 |
| expertvillage | ADOcaBYbMl0 | 26270 |
| expertvillage | yAqsULIDJFE | 25511 |
| expertvillage | vcm-t0TJXNg | 25366 |
| expertvillage | 0KYGFawp14c | 24659 |
| expertvillage | j4DpuPvMLF4 | 22593 |
| expertvillage | Msu4lZb2oeQ | 18822 |
| expertvillage | ZHZVj44rpjE | 16304 |
| expertvillage | foATQY3wovI | 13576 |
| expertvillage | -UnQ8rcBOQs | 13450 |
| expertvillage | crtNd46CDks | 11639 |
| expertvillage | D1leA0JKHhE | 11553 |
| expertvillage | NJu2oG1Wm98 | 11452 |
| expertvillage | CapbXdyv4j4 | 10915 |
| expertvillage | epr5erraEp4 | 10817 |
| expertvillage | IyQoDgaLM7U | 10597 |
| expertvillage | tbZibBnusLQ | 10402 |
| expertvillage | _GnCHodc7mk | 9422 |
| expertvillage | hvEYlSlRitU | 7123 |
+----------------+--------------+-----------+
=====================================================================================
课后练习一:
数据:
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4
结果:
用户id 月份 小计 累积
u01 2017-01 11 11
u01 2017-02 12 23
u02 2017-01 12 12
u03 2017-01 8 8
u04 2017-01 3 3
a. 先处理日期格式
SELECT userId,
DATE_FORMAT(regexp_replace(visitDate,"/","-"),'yyyy-MM') visitDate , visitCount
FROM ACTION ==>t1
b. 根据用户id和月份分组,求小计
SELECT t1.userId, t1.visitDate, SUM(visitCount) xj
FROM t1
GROUP BY userId,visitDate ==>t2
组合:
SELECT t1.userId, t1.visitDate, SUM(visitCount) xj
FROM (SELECT userId,
DATE_FORMAT(regexp_replace(visitDate,"/","-"),'yyyy-MM') visitDate , visitCount
FROM ACTION)t1
GROUP BY userId,visitDate
+------------+---------------+-----+--+
| t1.userid | t1.visitdate | xj |
+------------+---------------+-----+--+
| u01 | 2017-01 | 11 |
| u01 | 2017-02 | 12 |
| u02 | 2017-01 | 12 |
| u03 | 2017-01 | 8 |
| u04 | 2017-01 | 3 |
+------------+---------------+-----+--+
c. 开窗,通过userId分区,visitData排序, 求累计
SELECT t2.userId,t2.visitDate, t2.xj ,
SUM(t2.xj) over(PARTITION BY t2.userId ORDER BY t2.visitDate) lj
FROM t2
组合:
SELECT t2.userId,t2.visitDate, t2.xj ,
SUM(t2.xj) over(PARTITION BY t2.userId ORDER BY t2.visitDate) lj
FROM (SELECT t1.userId, t1.visitDate, SUM(visitCount) xj
FROM (SELECT userId,
DATE_FORMAT(regexp_replace(visitDate,"/","-"),'yyyy-MM') visitDate , visitCount
FROM ACTION)t1
GROUP BY userId,visitDate)t2
+------------+---------------+--------+-----+--+
| t2.userid | t2.visitdate | t2.xj | lj |
+------------+---------------+--------+-----+--+
| u01 | 2017-01 | 11 | 11 |
| u01 | 2017-02 | 12 | 23 |
| u02 | 2017-01 | 12 | 12 |
| u03 | 2017-01 | 8 | 8 |
| u04 | 2017-01 | 3 | 3 |
+------------+---------------+--------+-----+--+
=====================================================================================
课后练习二:
+----------------+-------------+--+
| visit.user_id | visit.shop |
+----------------+-------------+--+
| u1 | a |
| u2 | b |
| u1 | b |
| u1 | a |
| u3 | c |
| u4 | b |
| u1 | a |
| u2 | c |
| u5 | b |
| u4 | b |
| u6 | c |
| u2 | c |
| u1 | b |
| u2 | a |
| u2 | a |
| u3 | a |
| u5 | a |
| u5 | a |
| u5 | a |
+----------------+-------------+--+
需求一: 每个店铺的UV(访客数)
思路一: 按照店铺分组,通过count(DISTINCT) 求UV
SELECT shop ,COUNT(DISTINCT user_id) uv
FROM visit
GROUP BY shop
+-------+-----+--+
| shop | uv |
+-------+-----+--+
| a | 4 |
| b | 4 |
| c | 3 |
+-------+-----+--+
思路二:
a. 按照user_id 和shop分组 ,求出哪个用户访问过哪个店铺
SELECT user_id, shop
FROM visit
GROUP BY user_id, shop ==>t1
+----------+-------+--+
| user_id | shop |
+----------+-------+--+
| u1 | a |
| u1 | b |
| u2 | a |
| u2 | b |
| u2 | c |
| u3 | a |
| u3 | c |
| u4 | b |
| u5 | a |
| u5 | b |
| u6 | c |
+----------+-------+--+
b. 按照shop分组,求uv
SELECT t1.shop ,COUNT(t1.user_id) uv
FROM t1
GROUP BY t1.shop
组合:
SELECT t1.shop ,COUNT(t1.user_id) uv
FROM (SELECT user_id, shop
FROM visit
GROUP BY user_id, shop)t1
GROUP BY t1.shop
+----------+-----+--+
| t1.shop | uv |
+----------+-----+--+
| a | 4 |
| b | 4 |
| c | 3 |
+----------+-----+--+
需求二:每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
a. 通过user_id, shop分组,求每个店铺,每个访客访问的次数
SELECT user_id, shop ,COUNT(user_id) visit_count
FROM visit
GROUP BY user_id, shop ==>t1
+----------+-------+--------------+--+
| user_id | shop | visit_count |
+----------+-------+--------------+--+
| u1 | a | 3 |
| u1 | b | 2 |
| u2 | a | 2 |
| u2 | b | 1 |
| u2 | c | 2 |
| u3 | a | 1 |
| u3 | c | 1 |
| u4 | b | 2 |
| u5 | a | 3 |
| u5 | b | 1 |
| u6 | c | 1 |
+----------+-------+--------------+--+
b.按照shop分区,访问次数排序,求排名
SELECT t1.user_id ,t1.shop ,t1.visit_count,
rank() over(distribute BY t1.shop sort BY t1.visit_count DESC ) rk
FROM t1 ==>t2
组合:
SELECT t1.user_id ,t1.shop ,t1.visit_count,
rank() over(distribute BY t1.shop sort BY t1.visit_count DESC ) rk
FROM (SELECT user_id, shop ,COUNT(user_id) visit_count
FROM visit
GROUP BY user_id, shop )t1
+-------------+----------+-----------------+-----+--+
| t1.user_id | t1.shop | t1.visit_count | rk |
+-------------+----------+-----------------+-----+--+
| u5 | a | 3 | 1 |
| u1 | a | 3 | 1 |
| u2 | a | 2 | 3 |
| u3 | a | 1 | 4 |
| u4 | b | 2 | 1 |
| u1 | b | 2 | 1 |
| u5 | b | 1 | 3 |
| u2 | b | 1 | 3 |
| u2 | c | 2 | 1 |
| u6 | c | 1 | 2 |
| u3 | c | 1 | 2 |
+-------------+----------+-----------------+-----+--+
c. 取top3
SELECT t2.user_id, t2.shop, t2.visit_count
FROM t2
WHERE t2.rk <=3
组合:
SELECT t2.user_id, t2.shop, t2.visit_count
FROM (SELECT t1.user_id ,t1.shop ,t1.visit_count,
rank() over(distribute BY t1.shop sort BY t1.visit_count DESC ) rk
FROM (SELECT user_id, shop ,COUNT(user_id) visit_count
FROM visit
GROUP BY user_id, shop )t1 )t2
WHERE t2.rk <=3
+-------------+----------+-----------------+--+
| t2.user_id | t2.shop | t2.visit_count |
+-------------+----------+-----------------+--+
| u5 | a | 3 |
| u1 | a | 3 |
| u2 | a | 2 |
| u4 | b | 2 |
| u1 | b | 2 |
| u5 | b | 1 |
| u2 | b | 1 |
| u2 | c | 2 |
| u6 | c | 1 |
| u3 | c | 1 |
+-------------+----------+-----------------+--+
=============================================================================
课后练习三:
table_name:user_low_carbon
user_id data_dt low_carbon
用户 日期 减少碳排放(g)
table_name: plant_carbon
plant_id plant_name low_carbon
植物编号 植物名 换购植物所需要的碳
-- 需求一
-- 数据
+--------------------------+--------------------------+-----------------------------+
| user_low_carbon.user_id | user_low_carbon.data_dt | user_low_carbon.low_carbon |
+--------------------------+--------------------------+-----------------------------+
| u_001 | 2017/1/1 | 10 |
| u_001 | 2017/1/2 | 150 |
| u_001 | 2017/1/2 | 110 |
| u_001 | 2017/1/2 | 10 |
| u_001 | 2017/1/4 | 50 |
| u_001 | 2017/1/4 | 10 |
| u_001 | 2017/1/6 | 45 |
| u_001 | 2017/1/6 | 90 |
| u_002 | 2017/1/1 | 10 |
1.蚂蚁森林植物申领统计
问题:假设2017年1月1日开始记录低碳数据(user_low_carbon),
假设2017年10月1日之前满足申领条件的用户都申领了一颗p004-胡杨,
剩余的能量全部用来领取“p002-沙柳” 。
统计在10月1日之前累计申领“p002-沙柳” 排名前10的用户信息;
以及他比后一名多领了几颗沙柳
-- 1. 处理日期格式 2017/1/1 == > 2017-01-01
SELECT
user_id ,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon ==>t1
-- 2. 求出2017年10月1日之前,每个用户的总能量
SELECT
t1.user_id,
SUM(t1.low_carbon) sum_low_carbon
FROM
t1
WHERE t1.data_dt < '2017-10-01'
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC
LIMIT 11 ==>t2
-- 组合
SELECT
t1.user_id,
SUM(t1.low_carbon) sum_low_carbon
FROM
(SELECT
user_id ,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01'
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC
LIMIT 11
+-------------+-----------------+
| t1.user_id | sum_low_carbon |
+-------------+-----------------+
| u_007 | 1470 |
| u_013 | 1430 |
| u_008 | 1240 |
| u_005 | 1100 |
| u_010 | 1080 |
| u_014 | 1060 |
| u_011 | 960 |
| u_009 | 930 |
| u_006 | 830 |
| u_002 | 659 |
| u_004 | 640 |
+-------------+-----------------+
-- 3. 求出领取胡杨和沙柳需要的能量
SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004' ==> t3
SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002' ==> t4
-- 4. 求出每个人领取的沙柳的数量
SELECT
t2.user_id ,
t2.sum_low_carbon,
FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM
t2 ,
t3 ,
t4 ==>t5
-- 组合
SELECT
t2.user_id ,
t2.sum_low_carbon,
FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM
(SELECT
t1.user_id,
SUM(t1.low_carbon) sum_low_carbon
FROM
(SELECT
user_id ,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01'
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC
LIMIT 11)t2 ,
(SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004')t3 ,
(SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002')t4
+-------------+--------------------+-----+
| t2.user_id | t2.sum_low_carbon | sl |
+-------------+--------------------+-----+
| u_007 | 1470 | 66 |
| u_013 | 1430 | 63 |
| u_008 | 1240 | 53 |
| u_005 | 1100 | 46 |
| u_010 | 1080 | 45 |
| u_014 | 1060 | 44 |
| u_011 | 960 | 39 |
| u_009 | 930 | 37 |
| u_006 | 830 | 32 |
| u_002 | 659 | 23 |
| u_004 | 640 | 22 |
+-------------+--------------------+-----+
-- 5. 将下一行的数据提取到当前行
SELECT
t5.user_id ,
t5.sum_low_carbon,
t5.sl,
lead(t5.sl,1) over(ORDER BY t5.sl DESC ) next_sl
FROM
t5
LIMIT 10 ==>t6
-- 组合
SELECT
t5.user_id ,
t5.sum_low_carbon,
t5.sl,
lead(t5.sl,1) over(ORDER BY t5.sl DESC ) next_sl
FROM
(SELECT
t2.user_id ,
t2.sum_low_carbon,
FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM
(SELECT
t1.user_id,
SUM(t1.low_carbon) sum_low_carbon
FROM
(SELECT
user_id ,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01'
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC
LIMIT 11)t2 ,
(SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004')t3 ,
(SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002')t4 )t5
LIMIT 10
+-------------+--------------------+--------+----------+
| t5.user_id | t5.sum_low_carbon | t5.sl | next_sl |
+-------------+--------------------+--------+----------+
| u_007 | 1470 | 66 | 63 |
| u_013 | 1430 | 63 | 53 |
| u_008 | 1240 | 53 | 46 |
| u_005 | 1100 | 46 | 45 |
| u_010 | 1080 | 45 | 44 |
| u_014 | 1060 | 44 | 39 |
| u_011 | 960 | 39 | 37 |
| u_009 | 930 | 37 | 32 |
| u_006 | 830 | 32 | 23 |
| u_002 | 659 | 23 | 22 |
+-------------+--------------------+--------+----------+
-- 计算比下个人多领取多少颗沙柳
SELECT
t6.user_id ,
t6.sum_low_carbon,
t6.sl,
t6.next_sl,
t6.sl-t6.next_sl sl_diff
FROM t6
-- 最终组合 :
SELECT
t6.user_id ,
t6.sum_low_carbon,
t6.sl,
t6.next_sl,
t6.sl-t6.next_sl sl_diff
FROM
(SELECT
t5.user_id ,
t5.sum_low_carbon,
t5.sl,
lead(t5.sl,1) over(ORDER BY t5.sl DESC ) next_sl
FROM
(SELECT
t2.user_id ,
t2.sum_low_carbon,
FLOOR((t2.sum_low_carbon - t3.low_carbon) / t4.low_carbon) sl
FROM
(SELECT
t1.user_id,
SUM(t1.low_carbon) sum_low_carbon
FROM
(SELECT
user_id ,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
WHERE t1.data_dt < '2017-10-01'
GROUP BY t1.user_id
ORDER BY sum_low_carbon DESC
LIMIT 11)t2 ,
(SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p004')t3 ,
(SELECT low_carbon FROM plant_carbon WHERE plant_id = 'p002')t4 )t5
LIMIT 10)t6
+-------------+--------------------+--------+-------------+----------+
| t6.user_id | t6.sum_low_carbon | t6.sl | t6.next_sl | sl_diff |
+-------------+--------------------+--------+-------------+----------+
| u_007 | 1470 | 66 | 63 | 3 |
| u_013 | 1430 | 63 | 53 | 10 |
| u_008 | 1240 | 53 | 46 | 7 |
| u_005 | 1100 | 46 | 45 | 1 |
| u_010 | 1080 | 45 | 44 | 1 |
| u_014 | 1060 | 44 | 39 | 5 |
| u_011 | 960 | 39 | 37 | 2 |
| u_009 | 930 | 37 | 32 | 5 |
| u_006 | 830 | 32 | 23 | 9 |
| u_002 | 659 | 23 | 22 | 1 |
+-------------+--------------------+--------+-------------+----------+
-- 需求二
2、蚂蚁森林低碳用户排名分析
问题:查询user_low_carbon表中每日流水记录,条件为:
用户在2017年,连续三天(或以上)的天数里,
每天减少碳排放(low_carbon)都超过100g的用户低碳流水。
需要查询返回满足以上条件的user_low_carbon表中的记录流水。
==================================================================
-- 思路一:
-- 1. 先处理日期格式
SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon ==>t1
-- 2.求出每个用户每天的总能量,
SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100 ==>t2
-- 组合
SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100
+-------------+-------------+---------------------+
| t1.user_id | t1.data_dt | day_sum_low_carbon |
+-------------+-------------+---------------------+
| u_001 | 2017-01-02 | 270 |
| u_001 | 2017-01-06 | 135 |
| u_002 | 2017-01-02 | 220 |
| u_002 | 2017-01-03 | 110 |
| u_002 | 2017-01-04 | 150 |
| u_002 | 2017-01-05 | 101 |
| u_003 | 2017-01-02 | 160 |
| u_003 | 2017-01-03 | 160 |
| u_003 | 2017-01-05 | 120 |
| u_003 | 2017-01-07 | 120 |
| u_004 | 2017-01-01 | 110 |
| u_004 | 2017-01-03 | 120 |
| u_004 | 2017-01-06 | 120 |
| u_004 | 2017-01-07 | 130 |
| u_005 | 2017-01-02 | 130 |
| u_005 | 2017-01-03 | 180 |
| u_005 | 2017-01-04 | 190 |
| u_005 | 2017-01-06 | 280 |
| u_005 | 2017-01-07 | 160 |
| u_006 | 2017-01-02 | 180 |
| u_006 | 2017-01-03 | 220 |
| u_006 | 2017-01-07 | 290 |
| u_007 | 2017-01-01 | 130 |
| u_007 | 2017-01-02 | 360 |
| u_007 | 2017-01-04 | 530 |
| u_007 | 2017-01-06 | 230 |
| u_007 | 2017-01-07 | 160 |
| u_008 | 2017-01-01 | 160 |
| u_008 | 2017-01-02 | 120 |
| u_008 | 2017-01-04 | 260 |
| u_008 | 2017-01-05 | 360 |
| u_008 | 2017-01-06 | 160 |
| u_008 | 2017-01-07 | 120 |
| u_009 | 2017-01-02 | 140 |
| u_009 | 2017-01-03 | 170 |
| u_009 | 2017-01-04 | 270 |
| u_009 | 2017-01-07 | 140 |
| u_010 | 2017-01-02 | 180 |
| u_010 | 2017-01-04 | 170 |
| u_010 | 2017-01-05 | 180 |
| u_010 | 2017-01-06 | 190 |
| u_010 | 2017-01-07 | 180 |
| u_011 | 2017-01-01 | 110 |
| u_011 | 2017-01-02 | 200 |
| u_011 | 2017-01-03 | 120 |
| u_011 | 2017-01-04 | 100 |
| u_011 | 2017-01-05 | 100 |
| u_011 | 2017-01-06 | 100 |
| u_011 | 2017-01-07 | 230 |
| u_012 | 2017-01-02 | 130 |
| u_013 | 2017-01-02 | 200 |
| u_013 | 2017-01-03 | 150 |
| u_013 | 2017-01-04 | 550 |
| u_013 | 2017-01-05 | 350 |
| u_014 | 2017-01-01 | 220 |
| u_014 | 2017-01-02 | 140 |
| u_014 | 2017-01-05 | 250 |
| u_014 | 2017-01-06 | 120 |
| u_014 | 2017-01-07 | 290 |
| u_015 | 2017-01-07 | 140 |
+-------------+-------------+---------------------+
-- 3. 连续三天或以上:
对于一条数据如何判断是否满足在连续三天或以上
u_014 | 2017-01-01 | 220
假设把当前的数据当成今天来处理:
需要满足如下条件:
前天 昨天 今天 ==> 今天-昨天=1 AND 今天-前天=2
OR
昨天 今天 明天 ==> 今天-昨天=1 AND 今天-明天=-1
OR
今天 明天 后天 ==> 今天-明天=-1 AND 今天-后天=-2
-- 4. 把每条数据的往前第2条(前天) 往前第1条(昨天) 往后第1条(明天) 往后第2天(后天)
-- 提取到当前行.
SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
lag(t2.data_dt,2,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) qt,
lag(t2.data_dt,1,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) zt,
lead(t2.data_dt,1,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) mt,
lead(t2.data_dt,2,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) ht
FROM
t2 ==>t3
-- 组合:
SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
lag(t2.data_dt,2,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) qt,
lag(t2.data_dt,1,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) zt,
lead(t2.data_dt,1,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) mt,
lead(t2.data_dt,2,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) ht
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2
+-------------+-------------+------------------------+-------------+-------------+-------------+-------------+
| t2.user_id | t2.data_dt | t2.day_sum_low_carbon | qt | zt | mt | ht |
+-------------+-------------+------------------------+-------------+-------------+-------------+-------------+
| u_001 | 2017-01-02 | 270 | 1970-01-01 | 1970-01-01 | 2017-01-06 | 9999-01-01 |
| u_001 | 2017-01-06 | 135 | 1970-01-01 | 2017-01-02 | 9999-01-01 | 9999-01-01 |
| u_002 | 2017-01-02 | 220 | 1970-01-01 | 1970-01-01 | 2017-01-03 | 2017-01-04 |
| u_002 | 2017-01-03 | 110 | 1970-01-01 | 2017-01-02 | 2017-01-04 | 2017-01-05 |
| u_002 | 2017-01-04 | 150 | 2017-01-02 | 2017-01-03 | 2017-01-05 | 9999-01-01 |
| u_002 | 2017-01-05 | 101 | 2017-01-03 | 2017-01-04 | 9999-01-01 | 9999-01-01 |
| u_003 | 2017-01-02 | 160 | 1970-01-01 | 1970-01-01 | 2017-01-03 | 2017-01-05 |
| u_003 | 2017-01-03 | 160 | 1970-01-01 | 2017-01-02 | 2017-01-05 | 2017-01-07 |
| u_003 | 2017-01-05 | 120 | 2017-01-02 | 2017-01-03 | 2017-01-07 | 9999-01-01 |
| u_003 | 2017-01-07 | 120 | 2017-01-03 | 2017-01-05 | 9999-01-01 | 9999-01-01 |
| u_004 | 2017-01-01 | 110 | 1970-01-01 | 1970-01-01 | 2017-01-03 | 2017-01-06 |
| u_004 | 2017-01-03 | 120 | 1970-01-01 | 2017-01-01 | 2017-01-06 | 2017-01-07 |
| u_004 | 2017-01-06 | 120 | 2017-01-01 | 2017-01-03 | 2017-01-07 | 9999-01-01 |
| u_004 | 2017-01-07 | 130 | 2017-01-03 | 2017-01-06 | 9999-01-01 | 9999-01-01 |
| u_005 | 2017-01-02 | 130 | 1970-01-01 | 1970-01-01 | 2017-01-03 | 2017-01-04 |
| u_005 | 2017-01-03 | 180 | 1970-01-01 | 2017-01-02 | 2017-01-04 | 2017-01-06 |
| u_005 | 2017-01-04 | 190 | 2017-01-02 | 2017-01-03 | 2017-01-06 | 2017-01-07 |
| u_005 | 2017-01-06 | 280 | 2017-01-03 | 2017-01-04 | 2017-01-07 | 9999-01-01 |
| u_005 | 2017-01-07 | 160 | 2017-01-04 | 2017-01-06 | 9999-01-01 | 9999-01-01 |
| u_006 | 2017-01-02 | 180 | 1970-01-01 | 1970-01-01 | 2017-01-03 | 2017-01-07 |
| u_006 | 2017-01-03 | 220 | 1970-01-01 | 2017-01-02 | 2017-01-07 | 9999-01-01 |
| u_006 | 2017-01-07 | 290 | 2017-01-02 | 2017-01-03 | 9999-01-01 | 9999-01-01 |
| u_007 | 2017-01-01 | 130 | 1970-01-01 | 1970-01-01 | 2017-01-02 | 2017-01-04 |
| u_007 | 2017-01-02 | 360 | 1970-01-01 | 2017-01-01 | 2017-01-04 | 2017-01-06 |
| u_007 | 2017-01-04 | 530 | 2017-01-01 | 2017-01-02 | 2017-01-06 | 2017-01-07 |
| u_007 | 2017-01-06 | 230 | 2017-01-02 | 2017-01-04 | 2017-01-07 | 9999-01-01 |
| u_007 | 2017-01-07 | 160 | 2017-01-04 | 2017-01-06 | 9999-01-01 | 9999-01-01 |
| u_008 | 2017-01-01 | 160 | 1970-01-01 | 1970-01-01 | 2017-01-02 | 2017-01-04 |
| u_008 | 2017-01-02 | 120 | 1970-01-01 | 2017-01-01 | 2017-01-04 | 2017-01-05 |
| u_008 | 2017-01-04 | 260 | 2017-01-01 | 2017-01-02 | 2017-01-05 | 2017-01-06 |
| u_008 | 2017-01-05 | 360 | 2017-01-02 | 2017-01-04 | 2017-01-06 | 2017-01-07 |
| u_008 | 2017-01-06 | 160 | 2017-01-04 | 2017-01-05 | 2017-01-07 | 9999-01-01 |
| u_008 | 2017-01-07 | 120 | 2017-01-05 | 2017-01-06 | 9999-01-01 | 9999-01-01 |
| u_009 | 2017-01-02 | 140 | 1970-01-01 | 1970-01-01 | 2017-01-03 | 2017-01-04 |
| u_009 | 2017-01-03 | 170 | 1970-01-01 | 2017-01-02 | 2017-01-04 | 2017-01-07 |
| u_009 | 2017-01-04 | 270 | 2017-01-02 | 2017-01-03 | 2017-01-07 | 9999-01-01 |
| u_009 | 2017-01-07 | 140 | 2017-01-03 | 2017-01-04 | 9999-01-01 | 9999-01-01 |
| u_010 | 2017-01-02 | 180 | 1970-01-01 | 1970-01-01 | 2017-01-04 | 2017-01-05 |
| u_010 | 2017-01-04 | 170 | 1970-01-01 | 2017-01-02 | 2017-01-05 | 2017-01-06 |
| u_010 | 2017-01-05 | 180 | 2017-01-02 | 2017-01-04 | 2017-01-06 | 2017-01-07 |
| u_010 | 2017-01-06 | 190 | 2017-01-04 | 2017-01-05 | 2017-01-07 | 9999-01-01 |
| u_010 | 2017-01-07 | 180 | 2017-01-05 | 2017-01-06 | 9999-01-01 | 9999-01-01 |
| u_011 | 2017-01-01 | 110 | 1970-01-01 | 1970-01-01 | 2017-01-02 | 2017-01-03 |
| u_011 | 2017-01-02 | 200 | 1970-01-01 | 2017-01-01 | 2017-01-03 | 2017-01-04 |
| u_011 | 2017-01-03 | 120 | 2017-01-01 | 2017-01-02 | 2017-01-04 | 2017-01-05 |
| u_011 | 2017-01-04 | 100 | 2017-01-02 | 2017-01-03 | 2017-01-05 | 2017-01-06 |
| u_011 | 2017-01-05 | 100 | 2017-01-03 | 2017-01-04 | 2017-01-06 | 2017-01-07 |
| u_011 | 2017-01-06 | 100 | 2017-01-04 | 2017-01-05 | 2017-01-07 | 9999-01-01 |
| u_011 | 2017-01-07 | 230 | 2017-01-05 | 2017-01-06 | 9999-01-01 | 9999-01-01 |
| u_012 | 2017-01-02 | 130 | 1970-01-01 | 1970-01-01 | 9999-01-01 | 9999-01-01 |
| u_013 | 2017-01-02 | 200 | 1970-01-01 | 1970-01-01 | 2017-01-03 | 2017-01-04 |
| u_013 | 2017-01-03 | 150 | 1970-01-01 | 2017-01-02 | 2017-01-04 | 2017-01-05 |
| u_013 | 2017-01-04 | 550 | 2017-01-02 | 2017-01-03 | 2017-01-05 | 9999-01-01 |
| u_013 | 2017-01-05 | 350 | 2017-01-03 | 2017-01-04 | 9999-01-01 | 9999-01-01 |
| u_014 | 2017-01-01 | 220 | 1970-01-01 | 1970-01-01 | 2017-01-02 | 2017-01-05 |
| u_014 | 2017-01-02 | 140 | 1970-01-01 | 2017-01-01 | 2017-01-05 | 2017-01-06 |
| u_014 | 2017-01-05 | 250 | 2017-01-01 | 2017-01-02 | 2017-01-06 | 2017-01-07 |
| u_014 | 2017-01-06 | 120 | 2017-01-02 | 2017-01-05 | 2017-01-07 | 9999-01-01 |
| u_014 | 2017-01-07 | 290 | 2017-01-05 | 2017-01-06 | 9999-01-01 | 9999-01-01 |
| u_015 | 2017-01-07 | 140 | 1970-01-01 | 1970-01-01 | 9999-01-01 | 9999-01-01 |
+-------------+-------------+------------------------+-------------+-------------+-------------+-------------+
-- 5. 计算差值
SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATEDIFF(t3.data_dt,t3.qt) jt_qt_diff,
DATEDIFF(t3.data_dt,t3.zt) jt_zt_diff,
DATEDIFF(t3.data_dt,t3.mt) jt_mt_diff,
DATEDIFF(t3.data_dt,t3.ht) jt_ht_diff
FROM
()t3 ==>t4
-- 组合
SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATEDIFF(t3.data_dt,t3.qt) jt_qt_diff,
DATEDIFF(t3.data_dt,t3.zt) jt_zt_diff,
DATEDIFF(t3.data_dt,t3.mt) jt_mt_diff,
DATEDIFF(t3.data_dt,t3.ht) jt_ht_diff
FROM
(SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
lag(t2.data_dt,2,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) qt,
lag(t2.data_dt,1,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) zt,
lead(t2.data_dt,1,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) mt,
lead(t2.data_dt,2,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) ht
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2)t3
+-------------+-------------+------------------------+-------------+-------------+-------------+-------------+
| t3.user_id | t3.data_dt | t3.day_sum_low_carbon | jt_qt_diff | jt_zt_diff | jt_mt_diff | jt_ht_diff |
+-------------+-------------+------------------------+-------------+-------------+-------------+-------------+
| u_001 | 2017-01-02 | 270 | 17168 | 17168 | -4 | -2915364 |
| u_001 | 2017-01-06 | 135 | 17172 | 4 | -2915360 | -2915360 |
| u_002 | 2017-01-02 | 220 | 17168 | 17168 | -1 | -2 |
| u_002 | 2017-01-03 | 110 | 17169 | 1 | -1 | -2 |
| u_002 | 2017-01-04 | 150 | 2 | 1 | -1 | -2915362 |
| u_002 | 2017-01-05 | 101 | 2 | 1 | -2915361 | -2915361 |
| u_003 | 2017-01-02 | 160 | 17168 | 17168 | -1 | -3 |
| u_003 | 2017-01-03 | 160 | 17169 | 1 | -2 | -4 |
| u_003 | 2017-01-05 | 120 | 3 | 2 | -2 | -2915361 |
| u_003 | 2017-01-07 | 120 | 4 | 2 | -2915359 | -2915359 |
| u_004 | 2017-01-01 | 110 | 17167 | 17167 | -2 | -5 |
| u_004 | 2017-01-03 | 120 | 17169 | 2 | -3 | -4 |
| u_004 | 2017-01-06 | 120 | 5 | 3 | -1 | -2915360 |
| u_004 | 2017-01-07 | 130 | 4 | 1 | -2915359 | -2915359 |
| u_005 | 2017-01-02 | 130 | 17168 | 17168 | -1 | -2 |
| u_005 | 2017-01-03 | 180 | 17169 | 1 | -1 | -3 |
| u_005 | 2017-01-04 | 190 | 2 | 1 | -2 | -3 |
| u_005 | 2017-01-06 | 280 | 3 | 2 | -1 | -2915360 |
| u_005 | 2017-01-07 | 160 | 3 | 1 | -2915359 | -2915359 |
| u_006 | 2017-01-02 | 180 | 17168 | 17168 | -1 | -5 |
| u_006 | 2017-01-03 | 220 | 17169 | 1 | -4 | -2915363 |
| u_006 | 2017-01-07 | 290 | 5 | 4 | -2915359 | -2915359 |
| u_007 | 2017-01-01 | 130 | 17167 | 17167 | -1 | -3 |
| u_007 | 2017-01-02 | 360 | 17168 | 1 | -2 | -4 |
| u_007 | 2017-01-04 | 530 | 3 | 2 | -2 | -3 |
| u_007 | 2017-01-06 | 230 | 4 | 2 | -1 | -2915360 |
| u_007 | 2017-01-07 | 160 | 3 | 1 | -2915359 | -2915359 |
| u_008 | 2017-01-01 | 160 | 17167 | 17167 | -1 | -3 |
| u_008 | 2017-01-02 | 120 | 17168 | 1 | -2 | -3 |
| u_008 | 2017-01-04 | 260 | 3 | 2 | -1 | -2 |
| u_008 | 2017-01-05 | 360 | 3 | 1 | -1 | -2 |
| u_008 | 2017-01-06 | 160 | 2 | 1 | -1 | -2915360 |
| u_008 | 2017-01-07 | 120 | 2 | 1 | -2915359 | -2915359 |
| u_009 | 2017-01-02 | 140 | 17168 | 17168 | -1 | -2 |
| u_009 | 2017-01-03 | 170 | 17169 | 1 | -1 | -4 |
| u_009 | 2017-01-04 | 270 | 2 | 1 | -3 | -2915362 |
| u_009 | 2017-01-07 | 140 | 4 | 3 | -2915359 | -2915359 |
| u_010 | 2017-01-02 | 180 | 17168 | 17168 | -2 | -3 |
| u_010 | 2017-01-04 | 170 | 17170 | 2 | -1 | -2 |
| u_010 | 2017-01-05 | 180 | 3 | 1 | -1 | -2 |
| u_010 | 2017-01-06 | 190 | 2 | 1 | -1 | -2915360 |
| u_010 | 2017-01-07 | 180 | 2 | 1 | -2915359 | -2915359 |
| u_011 | 2017-01-01 | 110 | 17167 | 17167 | -1 | -2 |
| u_011 | 2017-01-02 | 200 | 17168 | 1 | -1 | -2 |
| u_011 | 2017-01-03 | 120 | 2 | 1 | -1 | -2 |
| u_011 | 2017-01-04 | 100 | 2 | 1 | -1 | -2 |
| u_011 | 2017-01-05 | 100 | 2 | 1 | -1 | -2 |
| u_011 | 2017-01-06 | 100 | 2 | 1 | -1 | -2915360 |
| u_011 | 2017-01-07 | 230 | 2 | 1 | -2915359 | -2915359 |
| u_012 | 2017-01-02 | 130 | 17168 | 17168 | -2915364 | -2915364 |
| u_013 | 2017-01-02 | 200 | 17168 | 17168 | -1 | -2 |
| u_013 | 2017-01-03 | 150 | 17169 | 1 | -1 | -2 |
| u_013 | 2017-01-04 | 550 | 2 | 1 | -1 | -2915362 |
| u_013 | 2017-01-05 | 350 | 2 | 1 | -2915361 | -2915361 |
| u_014 | 2017-01-01 | 220 | 17167 | 17167 | -1 | -4 |
| u_014 | 2017-01-02 | 140 | 17168 | 1 | -3 | -4 |
| u_014 | 2017-01-05 | 250 | 4 | 3 | -1 | -2 |
| u_014 | 2017-01-06 | 120 | 4 | 1 | -1 | -2915360 |
| u_014 | 2017-01-07 | 290 | 2 | 1 | -2915359 | -2915359 |
| u_015 | 2017-01-07 | 140 | 17173 | 17173 | -2915359 | -2915359 |
+-------------+-------------+------------------------+-------------+-------------+-------------+-------------+
-- 6.过滤满足3天或以上
SELECT
t4.user_id,
t4.data_dt,
t4.day_sum_low_carbon
FROM
t4
WHERE
t4.jt_qt_diff =2 AND t4.jt_zt_diff =1
OR
t4.jt_zt_diff =1 AND t4.jt_mt_diff =-1
OR
t4.jt_mt_diff =-1 AND t4.jt_ht_diff = -2 ==>t5
-- 组合
SELECT
t4.user_id,
t4.data_dt,
t4.day_sum_low_carbon
FROM
(SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATEDIFF(t3.data_dt,t3.qt) jt_qt_diff,
DATEDIFF(t3.data_dt,t3.zt) jt_zt_diff,
DATEDIFF(t3.data_dt,t3.mt) jt_mt_diff,
DATEDIFF(t3.data_dt,t3.ht) jt_ht_diff
FROM
(SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
lag(t2.data_dt,2,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) qt,
lag(t2.data_dt,1,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) zt,
lead(t2.data_dt,1,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) mt,
lead(t2.data_dt,2,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) ht
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2)t3)t4
WHERE
t4.jt_qt_diff =2 AND t4.jt_zt_diff =1
OR
t4.jt_zt_diff =1 AND t4.jt_mt_diff =-1
OR
t4.jt_mt_diff =-1 AND t4.jt_ht_diff = -2
+-------------+-------------+------------------------+
| t4.user_id | t4.data_dt | t4.day_sum_low_carbon |
+-------------+-------------+------------------------+
| u_002 | 2017-01-02 | 220 |
| u_002 | 2017-01-03 | 110 |
| u_002 | 2017-01-04 | 150 |
| u_002 | 2017-01-05 | 101 |
| u_005 | 2017-01-02 | 130 |
| u_005 | 2017-01-03 | 180 |
| u_005 | 2017-01-04 | 190 |
| u_008 | 2017-01-04 | 260 |
| u_008 | 2017-01-05 | 360 |
| u_008 | 2017-01-06 | 160 |
| u_008 | 2017-01-07 | 120 |
| u_009 | 2017-01-02 | 140 |
| u_009 | 2017-01-03 | 170 |
| u_009 | 2017-01-04 | 270 |
| u_010 | 2017-01-04 | 170 |
| u_010 | 2017-01-05 | 180 |
| u_010 | 2017-01-06 | 190 |
| u_010 | 2017-01-07 | 180 |
| u_011 | 2017-01-01 | 110 |
| u_011 | 2017-01-02 | 200 |
| u_011 | 2017-01-03 | 120 |
| u_011 | 2017-01-04 | 100 |
| u_011 | 2017-01-05 | 100 |
| u_011 | 2017-01-06 | 100 |
| u_011 | 2017-01-07 | 230 |
| u_013 | 2017-01-02 | 200 |
| u_013 | 2017-01-03 | 150 |
| u_013 | 2017-01-04 | 550 |
| u_013 | 2017-01-05 | 350 |
| u_014 | 2017-01-05 | 250 |
| u_014 | 2017-01-06 | 120 |
| u_014 | 2017-01-07 | 290 |
+-------------+-------------+------------------------+
-- 7. 跟原始表join, 取出满足条件的记录流水
SELECT
t5.user_id,
t5.data_dt,
t6.low_carbon
FROM
(SELECT
t4.user_id,
t4.data_dt,
t4.day_sum_low_carbon
FROM
(SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATEDIFF(t3.data_dt,t3.qt) jt_qt_diff,
DATEDIFF(t3.data_dt,t3.zt) jt_zt_diff,
DATEDIFF(t3.data_dt,t3.mt) jt_mt_diff,
DATEDIFF(t3.data_dt,t3.ht) jt_ht_diff
FROM
(SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
lag(t2.data_dt,2,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) qt,
lag(t2.data_dt,1,'1970-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) zt,
lead(t2.data_dt,1,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) mt,
lead(t2.data_dt,2,'9999-01-01') over(PARTITION BY t2.user_id ORDER BY t2.data_dt) ht
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2)t3)t4
WHERE
t4.jt_qt_diff =2 AND t4.jt_zt_diff =1
OR
t4.jt_zt_diff =1 AND t4.jt_mt_diff =-1
OR
t4.jt_mt_diff =-1 AND t4.jt_ht_diff = -2)t5
JOIN
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t6
ON t5.user_id = t6.user_id AND t5.data_dt = t6.data_dt
+-------------+-------------+----------------+
| t5.user_id | t5.data_dt | t6.low_carbon |
+-------------+-------------+----------------+
| u_010 | 2017-01-06 | 190 |
| u_011 | 2017-01-02 | 100 |
| u_011 | 2017-01-02 | 100 |
| u_002 | 2017-01-02 | 150 |
| u_002 | 2017-01-02 | 70 |
| u_002 | 2017-01-05 | 101 |
| u_009 | 2017-01-02 | 70 |
| u_009 | 2017-01-02 | 70 |
| u_013 | 2017-01-04 | 550 |
| u_002 | 2017-01-04 | 150 |
| u_008 | 2017-01-06 | 160 |
| u_013 | 2017-01-02 | 150 |
| u_013 | 2017-01-02 | 50 |
| u_014 | 2017-01-07 | 270 |
| u_014 | 2017-01-07 | 20 |
| u_002 | 2017-01-03 | 30 |
| u_002 | 2017-01-03 | 80 |
| u_010 | 2017-01-05 | 90 |
| u_010 | 2017-01-05 | 90 |
| u_011 | 2017-01-04 | 100 |
| u_011 | 2017-01-05 | 100 |
| u_005 | 2017-01-03 | 180 |
| u_011 | 2017-01-01 | 110 |
| u_011 | 2017-01-06 | 100 |
| u_011 | 2017-01-07 | 130 |
| u_011 | 2017-01-07 | 100 |
| u_014 | 2017-01-05 | 250 |
| u_005 | 2017-01-02 | 50 |
| u_005 | 2017-01-02 | 80 |
| u_010 | 2017-01-04 | 90 |
| u_010 | 2017-01-04 | 80 |
| u_008 | 2017-01-04 | 260 |
| u_008 | 2017-01-05 | 360 |
| u_008 | 2017-01-07 | 60 |
| u_008 | 2017-01-07 | 60 |
| u_010 | 2017-01-07 | 90 |
| u_010 | 2017-01-07 | 90 |
| u_011 | 2017-01-03 | 120 |
| u_013 | 2017-01-03 | 150 |
| u_013 | 2017-01-05 | 350 |
| u_005 | 2017-01-04 | 180 |
| u_005 | 2017-01-04 | 10 |
| u_009 | 2017-01-04 | 270 |
| u_009 | 2017-01-03 | 170 |
| u_014 | 2017-01-06 | 120 |
+-------------+-------------+----------------+
=================================================================
-- 求连续7天或7天以上的记录流水?
-- 思路二:
-- 1. 先处理日期格式
SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon ==>t1
-- 2.求出每个用户每天的总能量,
SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100 ==>t2
-- 组合
SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100
+-------------+-------------+---------------------+
| t1.user_id | t1.data_dt | day_sum_low_carbon |
+-------------+-------------+---------------------+
| u_001 | 2017-01-02 | 270 |
| u_001 | 2017-01-06 | 135 |
| u_002 | 2017-01-02 | 220 |
| u_002 | 2017-01-03 | 110 |
| u_002 | 2017-01-04 | 150 |
| u_002 | 2017-01-05 | 101 |
| u_003 | 2017-01-02 | 160 |
| u_003 | 2017-01-03 | 160 |
| u_003 | 2017-01-05 | 120 |
| u_003 | 2017-01-07 | 120 |
| u_004 | 2017-01-01 | 110 |
| u_004 | 2017-01-03 | 120 |
| u_004 | 2017-01-06 | 120 |
| u_004 | 2017-01-07 | 130 |
| u_005 | 2017-01-02 | 130 |
| u_005 | 2017-01-03 | 180 |
| u_005 | 2017-01-04 | 190 |
| u_005 | 2017-01-06 | 280 |
| u_005 | 2017-01-07 | 160 |
| u_006 | 2017-01-02 | 180 |
| u_006 | 2017-01-03 | 220 |
| u_006 | 2017-01-07 | 290 |
| u_007 | 2017-01-01 | 130 |
| u_007 | 2017-01-02 | 360 |
| u_007 | 2017-01-04 | 530 |
| u_007 | 2017-01-06 | 230 |
| u_007 | 2017-01-07 | 160 |
| u_008 | 2017-01-01 | 160 |
| u_008 | 2017-01-02 | 120 |
| u_008 | 2017-01-04 | 260 |
| u_008 | 2017-01-05 | 360 |
| u_008 | 2017-01-06 | 160 |
| u_008 | 2017-01-07 | 120 |
| u_009 | 2017-01-02 | 140 |
| u_009 | 2017-01-03 | 170 |
| u_009 | 2017-01-04 | 270 |
| u_009 | 2017-01-07 | 140 |
| u_010 | 2017-01-02 | 180 |
| u_010 | 2017-01-04 | 170 |
| u_010 | 2017-01-05 | 180 |
| u_010 | 2017-01-06 | 190 |
| u_010 | 2017-01-07 | 180 |
| u_011 | 2017-01-01 | 110 |
| u_011 | 2017-01-02 | 200 |
| u_011 | 2017-01-03 | 120 |
| u_011 | 2017-01-04 | 100 |
| u_011 | 2017-01-05 | 100 |
| u_011 | 2017-01-06 | 100 |
| u_011 | 2017-01-07 | 230 |
| u_012 | 2017-01-02 | 130 |
| u_013 | 2017-01-02 | 200 |
| u_013 | 2017-01-03 | 150 |
| u_013 | 2017-01-04 | 550 |
| u_013 | 2017-01-05 | 350 |
| u_014 | 2017-01-01 | 220 |
| u_014 | 2017-01-02 | 140 |
| u_014 | 2017-01-05 | 250 |
| u_014 | 2017-01-06 | 120 |
| u_014 | 2017-01-07 | 290 |
| u_015 | 2017-01-07 | 140 |
+-------------+-------------+---------------------+
-- 3. 分析 连续3天或以上
| u_008 | 2017-01-01 | 160 1 2016-12-31
| u_008 | 2017-01-02 | 120 2 2016-12-31
| u_008 | 2017-01-04 | 260 3 2017-01-01
| u_008 | 2017-01-05 | 360 4 2017-01-01
| u_008 | 2017-01-06 | 160 5 2017-01-01
-- 4. 按照user_id分区,求row_number()
SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
row_number() over(PARTITION BY t2.user_id ORDER BY t2.data_dt) rn
FROM
t2 ==>t3
-- 组合
SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
row_number() over(PARTITION BY t2.user_id ORDER BY t2.data_dt) rn
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2
+-------------+-------------+------------------------+-----+
| t2.user_id | t2.data_dt | t2.day_sum_low_carbon | rn |
+-------------+-------------+------------------------+-----+
| u_001 | 2017-01-02 | 270 | 1 |
| u_001 | 2017-01-06 | 135 | 2 |
| u_002 | 2017-01-02 | 220 | 1 |
| u_002 | 2017-01-03 | 110 | 2 |
| u_002 | 2017-01-04 | 150 | 3 |
| u_002 | 2017-01-05 | 101 | 4 |
| u_003 | 2017-01-02 | 160 | 1 |
| u_003 | 2017-01-03 | 160 | 2 |
| u_003 | 2017-01-05 | 120 | 3 |
| u_003 | 2017-01-07 | 120 | 4 |
| u_004 | 2017-01-01 | 110 | 1 |
| u_004 | 2017-01-03 | 120 | 2 |
| u_004 | 2017-01-06 | 120 | 3 |
| u_004 | 2017-01-07 | 130 | 4 |
| u_005 | 2017-01-02 | 130 | 1 |
| u_005 | 2017-01-03 | 180 | 2 |
| u_005 | 2017-01-04 | 190 | 3 |
| u_005 | 2017-01-06 | 280 | 4 |
| u_005 | 2017-01-07 | 160 | 5 |
| u_006 | 2017-01-02 | 180 | 1 |
| u_006 | 2017-01-03 | 220 | 2 |
| u_006 | 2017-01-07 | 290 | 3 |
| u_007 | 2017-01-01 | 130 | 1 |
| u_007 | 2017-01-02 | 360 | 2 |
| u_007 | 2017-01-04 | 530 | 3 |
| u_007 | 2017-01-06 | 230 | 4 |
| u_007 | 2017-01-07 | 160 | 5 |
| u_008 | 2017-01-01 | 160 | 1 |
| u_008 | 2017-01-02 | 120 | 2 |
| u_008 | 2017-01-04 | 260 | 3 |
| u_008 | 2017-01-05 | 360 | 4 |
| u_008 | 2017-01-06 | 160 | 5 |
| u_008 | 2017-01-07 | 120 | 6 |
| u_009 | 2017-01-02 | 140 | 1 |
| u_009 | 2017-01-03 | 170 | 2 |
| u_009 | 2017-01-04 | 270 | 3 |
| u_009 | 2017-01-07 | 140 | 4 |
| u_010 | 2017-01-02 | 180 | 1 |
| u_010 | 2017-01-04 | 170 | 2 |
| u_010 | 2017-01-05 | 180 | 3 |
| u_010 | 2017-01-06 | 190 | 4 |
| u_010 | 2017-01-07 | 180 | 5 |
| u_011 | 2017-01-01 | 110 | 1 |
| u_011 | 2017-01-02 | 200 | 2 |
| u_011 | 2017-01-03 | 120 | 3 |
| u_011 | 2017-01-04 | 100 | 4 |
| u_011 | 2017-01-05 | 100 | 5 |
| u_011 | 2017-01-06 | 100 | 6 |
| u_011 | 2017-01-07 | 230 | 7 |
| u_012 | 2017-01-02 | 130 | 1 |
| u_013 | 2017-01-02 | 200 | 1 |
| u_013 | 2017-01-03 | 150 | 2 |
| u_013 | 2017-01-04 | 550 | 3 |
| u_013 | 2017-01-05 | 350 | 4 |
| u_014 | 2017-01-01 | 220 | 1 |
| u_014 | 2017-01-02 | 140 | 2 |
| u_014 | 2017-01-05 | 250 | 3 |
| u_014 | 2017-01-06 | 120 | 4 |
| u_014 | 2017-01-07 | 290 | 5 |
| u_015 | 2017-01-07 | 140 | 1 |
+-------------+-------------+------------------------+-----+
-- 5. 做 data_dt 和 rn的差值
SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATE_SUB(t3.data_dt,t3.rn) data_dt_rn_diff
FROM
t3 ==>t4
-- 组合
SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATE_SUB(t3.data_dt,t3.rn) data_dt_rn_diff
FROM
(SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
row_number() over(PARTITION BY t2.user_id ORDER BY t2.data_dt) rn
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2)t3
+-------------+-------------+------------------------+------------------+
| t3.user_id | t3.data_dt | t3.day_sum_low_carbon | data_dt_rn_diff |
+-------------+-------------+------------------------+------------------+
| u_001 | 2017-01-02 | 270 | 2017-01-01 |
| u_001 | 2017-01-06 | 135 | 2017-01-04 |
| u_002 | 2017-01-02 | 220 | 2017-01-01 |
| u_002 | 2017-01-03 | 110 | 2017-01-01 |
| u_002 | 2017-01-04 | 150 | 2017-01-01 |
| u_002 | 2017-01-05 | 101 | 2017-01-01 |
| u_003 | 2017-01-02 | 160 | 2017-01-01 |
| u_003 | 2017-01-03 | 160 | 2017-01-01 |
| u_003 | 2017-01-05 | 120 | 2017-01-02 |
| u_003 | 2017-01-07 | 120 | 2017-01-03 |
| u_004 | 2017-01-01 | 110 | 2016-12-31 |
| u_004 | 2017-01-03 | 120 | 2017-01-01 |
| u_004 | 2017-01-06 | 120 | 2017-01-03 |
| u_004 | 2017-01-07 | 130 | 2017-01-03 |
| u_005 | 2017-01-02 | 130 | 2017-01-01 |
| u_005 | 2017-01-03 | 180 | 2017-01-01 |
| u_005 | 2017-01-04 | 190 | 2017-01-01 |
| u_005 | 2017-01-06 | 280 | 2017-01-02 |
| u_005 | 2017-01-07 | 160 | 2017-01-02 |
| u_006 | 2017-01-02 | 180 | 2017-01-01 |
| u_006 | 2017-01-03 | 220 | 2017-01-01 |
| u_006 | 2017-01-07 | 290 | 2017-01-04 |
| u_007 | 2017-01-01 | 130 | 2016-12-31 |
| u_007 | 2017-01-02 | 360 | 2016-12-31 |
| u_007 | 2017-01-04 | 530 | 2017-01-01 |
| u_007 | 2017-01-06 | 230 | 2017-01-02 |
| u_007 | 2017-01-07 | 160 | 2017-01-02 |
| u_008 | 2017-01-01 | 160 | 2016-12-31 |
| u_008 | 2017-01-02 | 120 | 2016-12-31 |
| u_008 | 2017-01-04 | 260 | 2017-01-01 |
| u_008 | 2017-01-05 | 360 | 2017-01-01 |
| u_008 | 2017-01-06 | 160 | 2017-01-01 |
| u_008 | 2017-01-07 | 120 | 2017-01-01 |
| u_009 | 2017-01-02 | 140 | 2017-01-01 |
| u_009 | 2017-01-03 | 170 | 2017-01-01 |
| u_009 | 2017-01-04 | 270 | 2017-01-01 |
| u_009 | 2017-01-07 | 140 | 2017-01-03 |
| u_010 | 2017-01-02 | 180 | 2017-01-01 |
| u_010 | 2017-01-04 | 170 | 2017-01-02 |
| u_010 | 2017-01-05 | 180 | 2017-01-02 |
| u_010 | 2017-01-06 | 190 | 2017-01-02 |
| u_010 | 2017-01-07 | 180 | 2017-01-02 |
| u_011 | 2017-01-01 | 110 | 2016-12-31 |
| u_011 | 2017-01-02 | 200 | 2016-12-31 |
| u_011 | 2017-01-03 | 120 | 2016-12-31 |
| u_011 | 2017-01-04 | 100 | 2016-12-31 |
| u_011 | 2017-01-05 | 100 | 2016-12-31 |
| u_011 | 2017-01-06 | 100 | 2016-12-31 |
| u_011 | 2017-01-07 | 230 | 2016-12-31 |
| u_012 | 2017-01-02 | 130 | 2017-01-01 |
| u_013 | 2017-01-02 | 200 | 2017-01-01 |
| u_013 | 2017-01-03 | 150 | 2017-01-01 |
| u_013 | 2017-01-04 | 550 | 2017-01-01 |
| u_013 | 2017-01-05 | 350 | 2017-01-01 |
| u_014 | 2017-01-01 | 220 | 2016-12-31 |
| u_014 | 2017-01-02 | 140 | 2016-12-31 |
| u_014 | 2017-01-05 | 250 | 2017-01-02 |
| u_014 | 2017-01-06 | 120 | 2017-01-02 |
| u_014 | 2017-01-07 | 290 | 2017-01-02 |
| u_015 | 2017-01-07 | 140 | 2017-01-06 |
+-------------+-------------+------------------------+------------------+
-- 6. 按照user_id 和 data_dt_rn_diff 分区,求count()
SELECT
t4.user_id,
t4.data_dt,
t4.day_sum_low_carbon,
COUNT(t4.user_id) over(PARTITION BY t4.user_id,t4.data_dt_rn_diff) days
FROM
t4 ==>t5
-- 组合
SELECT
t4.user_id,
t4.data_dt,
t4.day_sum_low_carbon,
COUNT(t4.user_id) over(PARTITION BY t4.user_id,t4.data_dt_rn_diff) days
FROM
(SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATE_SUB(t3.data_dt,t3.rn) data_dt_rn_diff
FROM
(SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
row_number() over(PARTITION BY t2.user_id ORDER BY t2.data_dt) rn
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2)t3)t4
+-------------+-------------+------------------------+-------+
| t4.user_id | t4.data_dt | t4.day_sum_low_carbon | days |
+-------------+-------------+------------------------+-------+
| u_001 | 2017-01-02 | 270 | 1 |
| u_001 | 2017-01-06 | 135 | 1 |
| u_002 | 2017-01-02 | 220 | 4 |
| u_002 | 2017-01-03 | 110 | 4 |
| u_002 | 2017-01-04 | 150 | 4 |
| u_002 | 2017-01-05 | 101 | 4 |
| u_003 | 2017-01-03 | 160 | 2 |
| u_003 | 2017-01-02 | 160 | 2 |
| u_003 | 2017-01-05 | 120 | 1 |
| u_003 | 2017-01-07 | 120 | 1 |
| u_004 | 2017-01-01 | 110 | 1 |
| u_004 | 2017-01-03 | 120 | 1 |
| u_004 | 2017-01-06 | 120 | 2 |
| u_004 | 2017-01-07 | 130 | 2 |
| u_005 | 2017-01-02 | 130 | 3 |
| u_005 | 2017-01-03 | 180 | 3 |
| u_005 | 2017-01-04 | 190 | 3 |
| u_005 | 2017-01-06 | 280 | 2 |
| u_005 | 2017-01-07 | 160 | 2 |
| u_006 | 2017-01-02 | 180 | 2 |
| u_006 | 2017-01-03 | 220 | 2 |
| u_006 | 2017-01-07 | 290 | 1 |
| u_007 | 2017-01-01 | 130 | 2 |
| u_007 | 2017-01-02 | 360 | 2 |
| u_007 | 2017-01-04 | 530 | 1 |
| u_007 | 2017-01-06 | 230 | 2 |
| u_007 | 2017-01-07 | 160 | 2 |
| u_008 | 2017-01-01 | 160 | 2 |
| u_008 | 2017-01-02 | 120 | 2 |
| u_008 | 2017-01-05 | 360 | 4 |
| u_008 | 2017-01-04 | 260 | 4 |
| u_008 | 2017-01-06 | 160 | 4 |
| u_008 | 2017-01-07 | 120 | 4 |
| u_009 | 2017-01-03 | 170 | 3 |
| u_009 | 2017-01-04 | 270 | 3 |
| u_009 | 2017-01-02 | 140 | 3 |
| u_009 | 2017-01-07 | 140 | 1 |
| u_010 | 2017-01-02 | 180 | 1 |
| u_010 | 2017-01-04 | 170 | 4 |
| u_010 | 2017-01-05 | 180 | 4 |
| u_010 | 2017-01-06 | 190 | 4 |
| u_010 | 2017-01-07 | 180 | 4 |
| u_011 | 2017-01-05 | 100 | 7 |
| u_011 | 2017-01-01 | 110 | 7 |
| u_011 | 2017-01-02 | 200 | 7 |
| u_011 | 2017-01-03 | 120 | 7 |
| u_011 | 2017-01-04 | 100 | 7 |
| u_011 | 2017-01-06 | 100 | 7 |
| u_011 | 2017-01-07 | 230 | 7 |
| u_012 | 2017-01-02 | 130 | 1 |
| u_013 | 2017-01-02 | 200 | 4 |
| u_013 | 2017-01-03 | 150 | 4 |
| u_013 | 2017-01-04 | 550 | 4 |
| u_013 | 2017-01-05 | 350 | 4 |
| u_014 | 2017-01-01 | 220 | 2 |
| u_014 | 2017-01-02 | 140 | 2 |
| u_014 | 2017-01-05 | 250 | 3 |
| u_014 | 2017-01-06 | 120 | 3 |
| u_014 | 2017-01-07 | 290 | 3 |
| u_015 | 2017-01-07 | 140 | 1 |
+-------------+-------------+------------------------+-------+
-- 取连续3天或以上
SELECT
t5.user_id,
t5.data_dt,
t5.day_sum_low_carbon,
t5.days
FROM
t5
WHERE t5.days >=3
-- 组合
SELECT
t5.user_id,
t5.data_dt,
t5.day_sum_low_carbon,
t5.days
FROM
(SELECT
t4.user_id,
t4.data_dt,
t4.day_sum_low_carbon,
COUNT(t4.user_id) over(PARTITION BY t4.user_id,t4.data_dt_rn_diff) days
FROM
(SELECT
t3.user_id,
t3.data_dt,
t3.day_sum_low_carbon,
DATE_SUB(t3.data_dt,t3.rn) data_dt_rn_diff
FROM
(SELECT
t2.user_id,
t2.data_dt,
t2.day_sum_low_carbon,
row_number() over(PARTITION BY t2.user_id ORDER BY t2.data_dt) rn
FROM
(SELECT
t1.user_id,
t1.data_dt,
SUM(t1.low_carbon) day_sum_low_carbon
FROM
(SELECT
user_id,
DATE_FORMAT(regexp_replace(data_dt,"/","-"),"yyyy-MM-dd") data_dt ,
low_carbon
FROM
user_low_carbon)t1
GROUP BY
t1.user_id,
t1.data_dt
HAVING day_sum_low_carbon >=100)t2)t3)t4)t5
WHERE t5.days >=7
-- join
网友评论