美文网首页数仓实战1(阿里架构)
数仓项目03:DWS公共汇总粒度事实层

数仓项目03:DWS公共汇总粒度事实层

作者: 勇于自信 | 来源:发表于2020-06-24 11:51 被阅读0次

2.2 DWS公共汇总粒度事实层

Hive数据库建库建表:
创建Hive库并进入:

create database if not exists dws_nshop;
use dws_nshop;
2.2.1 用户主题
2.2.2.1 用户启动【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_launch (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
launch_count int COMMENT '启动次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_launch/';

通过对dwd层的用户启动日志表做一个count聚合即可,sql如下:

insert overwrite table dws_nshop.dws_nshop_ulog_launch partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(device_num) over(partition by device_num) as launch_count
from 
dwd_nshop.dwd_nshop_actlog_launch
where
bdp_day="20200618"

用户启动7days【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_launch_7d (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
launch_count int COMMENT '启动次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_launch_7d/';

入库:和上面逻辑一样,只是加一周时间条件限制:

insert overwrite table dws_nshop.dws_nshop_ulog_launch_7d partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(device_num) over(partition by device_num) as launch_count
from 
dwd_nshop.dwd_nshop_actlog_launch
where
bdp_day between '20200611' and '20200618'
2.2.2.2 用户浏览表

建表:

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_view (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
view_count INT COMMENT '浏览次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_view/';

数据从DWD层的用户产品浏览表聚合得来:

insert overwrite table dws_nshop.dws_nshop_ulog_view partition(bdp_day='20200618')
select
user_id ,
device_num ,
device_type,
os  ,
os_version ,
manufacturer,
carrier ,
network_type,
area_code,
count(device_num) over(partition by device_num)as view_count
from dwd_nshop.dwd_nshop_actlog_pdtview
where
bdp_day='20200618'

用户查询【DWS】
建表:

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_search (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
search_count int COMMENT '查询次数'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_search/'

对查询主题表进行count聚合即可,如下:

insert overwrite table dws_nshop.dws_nshop_ulog_search partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(device_num) over(partition by device_num) as search_count
from 
dwd_nshop.dwd_nshop_actlog_pdtsearch
where
bdp_day="20200618"
2.2.2.3 用户关注【DWS】
CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_ulog_comment (
user_id string COMMENT '用户id',
device_num string COMMENT '设备号',
device_type string COMMENT '设备类型',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
comment_count int COMMENT '评论次数',
comment_target_count int COMMENT '类别评论次数',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_ulog_comment/';

聚合用户对产品的关注,使用count聚合出关注次数

insert overwrite table dws_nshop.dws_nshop_ulog_comment partition(bdp_day='20200618')
select
user_id,
device_num,
device_type,
os,
os_version,
manufacturer,
carrier,
network_type,
area_code,
count(target_id) over(partition by comment_count) as comment_count,
count(distinct target_id) over(partition by target_id) as comment_target_count,
bdp_day
from 
dwd_nshop.dwd_actlog_product_comment
where
bdp_day="20200618"

用户交易宽表【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_orders (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
orders_count INT COMMENT '订单数量',
orders_pay DECIMAL (10, 1) COMMENT '订单金额',
orders_shipping DECIMAL (10, 1) COMMENT '订单运费金额',
orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_orders/'

用户交易宽表的数据主要是从DWD层交易订单明细流水表中抽取出来,再结合用户表查出用户相关信息:

with tborder as(
select
    o.order_id,
    o.district_money,
    o.shipping_money,
    o.payment_money,
    c.customer_id,
    c.customer_natives
    from dwd_nshop.dwd_nshop_orders_details o
    join ods_nshop.ods_02_customer c
    on o.customer_id=c.customer_id
    where
    bdp_day='20200618'
)
insert overwrite table dws_nshop.dws_nshop_user_orders partition(bdp_day='20200618')
select
customer_id,
customer_natives,
count(order_id) over(partition by customer_id) as orders_count,
sum(payment_money) over(partition by customer_id)as orders_pay,
sum(shipping_money) over(partition by customer_id)as orders_shipping,
sum(district_money) over(partition by customer_id)as orders_district,
current_timestamp() as ct
from tborder

用户交易宽表7day【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_orders_7d (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
orders_count INT COMMENT '订单数量',
orders_pay DECIMAL (10, 1) COMMENT '订单金额',
orders_shipping DECIMAL (10, 1) COMMENT '订单运费金额',
orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_orders_7d/'

用户投诉订单宽表【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_complainant (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
compl_orders_count INT COMMENT '订单数量',
compl_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
compl_supplier_count INT COMMENT '商家数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_complainant/'

用户投诉订单宽表7day【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_complainant_7d (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
compl_orders_count INT COMMENT '订单数量',
compl_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
compl_supplier_count INT COMMENT '商家数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_complainant_7d/'

用户营销活动宽表【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_release (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
source_count INT COMMENT '投放来源数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_release/'

用户营销活动宽表7day【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_user_release_7d (
user_id string COMMENT '用户id',
os string COMMENT '手机系统',
os_version string COMMENT '手机系统版本',
manufacturer string COMMENT '手机制造商',
carrier string COMMENT '电信运营商',
network_type string COMMENT '网络类型',
area_code string COMMENT '地区编码',
source_count INT COMMENT '投放来源数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/user/dws_nshop_user_release_7d/'

商家用户交互记录宽表【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_user (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
view_count INT COMMENT '浏览次数',
comment_users INT COMMENT '关注人数',
comment_area_code INT COMMENT '关注地区数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_user/'

商家用户交互记录的数据主要统计:
1.商家维度下用户的浏览次数
所以要结合DWD层的用户产品浏览表dwd_nshop_actlog_pdtview结合页面布局维度表、产品维度表和店铺维度表查出相关信息
2.商家维度下关注人数、关注地区数
所以要结合DWD层的用户产品关注表dwd_actlog_product_comment 结合页面布局维度表,产品维度表和店铺维度表查出相关信息
整合sql如下:

-- 统计商家维度下用户的浏览次数
with pgview as(
select
    su.supplier_code,
    su.supplier_type,
    count(*) as view_count
    from dwd_nshop.dwd_nshop_actlog_pdtview pv
    join
    ods_nshop.dim_pub_page pp
    on
    pp.page_type='4'
    and pp.page_code=pv.target_id
    join
    ods_nshop.dim_pub_product pr
    on
    pr.product_code=pp.page_code
    join
    ods_nshop.dim_pub_supplier su
    on
    su.supplier_code=pr.supplier_code
    where 
    pv.bdp_day='20200618'
    group by
    su.supplier_code,
    su.supplier_type
),
-- 统计商家维度下关注人数、关注地区数
prcomment as(
select
    su.supplier_code,
    su.supplier_type,
    count(distinct pc.user_id) as comment_users,
    count(distinct pc.area_code)as comment_area_code
    from
    dwd_nshop.dwd_actlog_product_comment pc
    join
    ods_nshop.dim_pub_page pp
    on
    pp.page_type='4'
    and pp.page_code=pc.target_id
    join
    ods_nshop.dim_pub_product pr
    on
    pr.product_code=pp.page_code
    join
    ods_nshop.dim_pub_supplier su
    on
    su.supplier_code=pr.supplier_code
    where 
    pc.bdp_day='20200618'
    group by
    su.supplier_code,
    su.supplier_type
)
-- 整合指标到DWS表
insert overwrite table dws_nshop.dws_nshop_supplier_user partition(bdp_day='20200618')
select
pgview.supplier_code,
pgview.supplier_type,
pgview.view_count,
prcomment.comment_users,
prcomment.comment_area_code,
current_timestamp() as ct
from pgview 
join 
prcomment
on
pgview.supplier_code=prcomment.supplier_code
and
pgview.supplier_type=prcomment.supplier_type

商家用户交互记录宽表7day【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_user_7d (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
view_count INT COMMENT '浏览次数',
comment_users INT COMMENT '关注人数',
comment_area_code INT COMMENT '关注地区数量',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_user_7d/'

商家日流水宽表【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_sales (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
sales_users INT COMMENT '购物人数',
sales_users_area INT COMMENT '购物地区数量',
sales_orders INT COMMENT '购物订单数',
salaes_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
salaes_orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_sales/'

商家日流水宽表的数据主要是从订单明细流水表里抽取,再结合用户信息表和产品信息表、店铺表查出相关信息:

insert overwrite table dws_nshop.dws_nshop_supplier_sales partition(bdp_day='20200321')
select
od.supplier_code,
su.supplier_type,
count(distinct od.customer_id) sales_users,
count(distinct oc.customer_natives) sales_users_area,
count(distinct od.order_id) sales_orders,
sum(pr.product_price * od.product_cnt) salaes_orders_pay,
sum(od.district_money) salaes_orders_district,
current_timestamp() as ct
from dwd_nshop.dwd_nshop_orders_details od
join
ods_nshop.ods_02_customer oc
on
od.customer_id=oc.custxomer_id
join 
ods_nshop.dim_pub_product pr
on 
od.supplier_code=pr.supplier_code
join
ods_nshop.dim_pub_supplier su
on
su.supplier_code=od.supplier_code
where
od.bdp_day='20200321'
group by 
od.supplier_code,
su.supplier_type

商家日流水宽表7day【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_supplier_sales_7d (
supplier_id string COMMENT '商家id',
supplier_type INT COMMENT '供应商类型:1.自营,2.官方 3其他',
sales_users INT COMMENT '购物人数',
sales_users_area INT COMMENT '购物地区数量',
sales_orders INT COMMENT '购物订单数',
salaes_orders_pay DECIMAL (10, 1) COMMENT '订单金额',
salaes_orders_district DECIMAL (10, 1) COMMENT '订单优惠金额',
ct BIGINT COMMENT '产生时间'
) partitioned BY (bdp_week string) stored AS parquet location '/data/nshop/dws/supplier/dws_nshop_supplier_sales_7d/'
2.3 营销活动主题

广告投放用户宽表【DWS】

CREATE external TABLE
IF NOT EXISTS dws_nshop.dws_nshop_release_user (
release_sources string COMMENT '投放渠道',
release_category string COMMENT '投放浏览产品分类',
release_users INT COMMENT '投放浏览用户数',
release_product_page INT COMMENT '投放浏览产品页面数',
ct BIGINT COMMENT '创建时间'
) partitioned BY (bdp_day string) stored AS parquet location '/data/nshop/dws/release/dws_nshop_release_user/'

对用户数和产品数进行聚合即可:

insert overwrite table dws_nshop.dws_nshop_release_user partition(bdp_day='20200321')
select 
release_sources,
release_category,
count(distinct customer_id) release_users,
count(1) release_product_page,
current_timestamp() ct
from 
dwd_nshop.dwd_nshop_releasedatas
where
bdp_day='20200321'
group by
release_sources,
release_category

相关文章

  • 数仓项目03:DWS公共汇总粒度事实层

    2.2 DWS公共汇总粒度事实层 Hive数据库建库建表:创建Hive库并进入: 2.2.1 用户主题 2.2.2...

  • 用户行为数仓搭建

    # 数仓分层概念 1.数仓分层 ods 存放原始数据 dwd 数据清洗 dws 数据汇总 ads 为统计报表提供数...

  • 数仓项目02:DWD明细粒度事实层

    2. 第二层DW主题层 2.1 DWD明细层 2.1.1 用户主题 DWD明细粒度事实层Hive数据库建库建表:创...

  • 数仓实战05:数仓搭建-DWS层

    1.业务术语 1)用户用户以设备为判断标准,在移动统计中,每个独立设备认为是一个独立用户。Android 系统根据...

  • DWS层销售主题宽表实现

    DWS的功能 面向主题,进行主题需求分析,轻度汇总,细粒度汇总 基本步骤 step01:需求解读 如果不明白的指标...

  • 该如何设计数仓的汇总层(DWS)

    关于数据仓库的分层,似乎大家都有一个共同的认识。但涉及到每一层该如何去建模,可能每个人都有自己的理解。数据建模,毫...

  • 数仓实战03:数仓搭建-ODS 层

    1.创建数据库 1)启动hive[atguigu@hadoop102 hive]$ nohup bin/hive ...

  • 数仓建设规范

    本文将全面讲解数仓建设规范,从数据模型规范,到数仓公共规范,数仓各层规范,最后到数仓命名规范,包括表命名,指标字段...

  • 数仓项目1:ODS贴源层

    1. 第一层ODS层 1 ODS层存放您从业务系统获取的最原始的数据,是其他上层数据的源数据。2 本项目中使用的O...

  • 数仓实战04:数仓搭建-DWD层

    1)对用户行为数据解析2)对核心数据进行判空过滤。3)对业务数据采用维度模型重新建模,即维度退化。 1.用户行为启...

网友评论

    本文标题:数仓项目03:DWS公共汇总粒度事实层

    本文链接:https://www.haomeiwen.com/subject/vskrfktx.html