数据分析与SQL统计
名词解释:
OP,全称Operation,指用户在每张表格上的最小颗粒度的一次操作,比如:新增一行、修改了一次单元格内容、隐藏、筛选、删除一行、新建一列等等...
1、统计近30天活跃度数据
可以统计所有空间站过去30天的活跃情况
SELECT date(d.created_at) AS "创建时间", count(*) AS "总数"
FROM vika_datasheet_changeset d
where d.created_at > DATE_ADD(CURRENT_DATE(), INTERVAL -30 DAY) and d.created_at < CURRENT_DATE()
GROUP BY date(d.created_at)
ORDER BY date(d.created_at) ASC;
2、每周各活跃度总数趋势(最近一年)
可以统计所有空间站每周的活跃情况
SELECT
dst_op_count as "总OP",
api_count as "总API-OP",
(dst_op_count - api_count ) as "非API-OP",
cst_op.weeks
FROM
(
SELECT
CONCAT(YEAR(c.created_at), '-', WEEK(c.created_at) + 10) AS weeks,
COUNT(*) as dst_op_count
FROM
vika_datasheet_changeset c
WHERE
c.created_at > DATE_SUB(CURDATE(), INTERVAL 365 DAY)
GROUP BY
weeks
ORDER BY
weeks
) AS cst_op
LEFT JOIN (
SELECT
CONCAT(YEAR(c.created_at), '-', WEEK(c.created_at) + 10) AS weeks,
COUNT(*) as api_count
FROM
vika_datasheet_changeset_source c
WHERE
c.created_at > DATE_SUB(CURDATE(), INTERVAL 365 DAY)
AND c.SOURCE_TYPE = 1
GROUP BY
weeks
ORDER BY
weeks
) AS api_op ON api_op.weeks = cst_op.weeks
ORDER by cst_op.weeks;
3、空间站30天活跃度排行榜
可以清晰地看到,哪个空间站比较活跃,排名第一的是最活跃的空间站,
select s.name as "空间站名字", count(c.id) as "OP数"
from VIKA_SPACE s
join VIKA_DATASHEET d on s.space_id = d.space_id
join VIKA_DATASHEET_CHANGESET c on d.dst_id = c.dst_id
where c.created_at >DATE_ADD(CURRENT_DATE(), INTERVAL -30 DAY) and d.created_at < CURRENT_DATE()
group by s.name,s.space_id
order by count(c.id) desc
4、所有空间站30天活跃度与新增表格数
所有空间站近30天op数和表格数
SELECT d.space_id,count(*),'op' AS mark
FROM vika_datasheet_changeset AS c
LEFT JOIN vika_datasheet AS d ON d.dst_id = c.dst_id
WHERE date(c.created_at)< DATE_ADD(CURRENT_DATE(), INTERVAL -30 DAY)
GROUP BY d.space_id
UNION
SELECT d.space_id,count(DISTINCT d.dst_id),'dst' AS mark
FROM vika_datasheet as d
WHERE date(d.created_at)< DATE_ADD(CURRENT_DATE(), INTERVAL -30 DAY)
GROUP BY d.space_id;
5、所有空间站活跃度和新增表格数
所有空间站op数排行+表格数
SELECT d.space_id,count(*),'op' AS mark
FROM vika_datasheet_changeset AS c
LEFT JOIN vika_datasheet AS d ON d.dst_id = c.dst_id
GROUP BY d.space_id
UNION
SELECT d.space_id,count(DISTINCT d.dst_id),'dst' AS mark
FROM vika_datasheet as d
GROUP BY d.space_id
6、[用户]活跃度排行榜-按用户
可以查看哪些用户比较活跃,查询维度包括:用户昵称、手机号、Op数、空间站名称,活跃度高的靠前
select u.NICK_NAME,s.NAME,u.email,count(*)
from vika_datasheet_changeset as c
left join vika_datasheet as d on d.DST_ID = c.DST_ID
left join vika_user as u on u.ID = c.CREATED_BY
left join vika_space as s on s.SPACE_ID = d.SPACE_ID
group by u.NICK_NAME,u.MOBILE_PHONE,s.NAME
order by count(*) desc;7、活跃度排行榜-按表格
查询活跃度哪些表格比较活跃,对应哪个空间站
SELECT d.dst_id,dst_name,count(*),count(*) as total
FROM vika_datasheet_changeset AS c
LEFT JOIN vika_datasheet AS d on d.dst_id = c.dst_id
group by d.dst_id
order by total desc
8、指定空间站,统计已创建的表格数
查询未删除且不在回收站中的数量
SELECT space_id,COUNT(distinct node_id)
FROM vika_node
WHERE space_id = 'spcidxxxxxxxx'
AND is_rubbish = 0
AND is_deleted = 0
AND type = '2'9、指定空间站,统计已创建的文件夹(场景)数
查询未删除且不在回收站中的数量
SELECT space_id,COUNT(distinct node_id)
FROM vika_node
WHERE space_id = 'spcixxxxxxxx'
AND is_rubbish = 0
AND is_deleted = 0
AND type = '1'
GROUP BY space_id
10、统计公开API总调用次数
SELECT COUNT(*) FROM vika_api_usage
11、近30天公开API调用次数趋势
可清晰地查询出近30天,每一天调用API的数量,查看趋势变化情况
SELECT date(a.created_at) as "创建时间", count(*) as "调用次数"
FROM vika_api_usage a
where a.created_at > date(a.created_at)< DATE_ADD(CURRENT_DATE(), INTERVAL -30 DAY)
GROUP BY date(a.created_at)
12、查询空间站活跃度排行榜
可清晰地查询到所有空间站的活跃度,越活跃的空间站排名靠前
select s.space_id as "空间站ID", s.name as "空间站名字", count(c.id) as "OP数(活跃度)"
from vika_space s
join vika_datasheet d on s.space_id = d.space_id
join vika_datasheet_changeset c on d.dst_id = c.dst_id
group by s.name,s.space_id
order by count(c.id) desc
13、[用户]每个月新增用户数
按月份查看,每个月新增的用户数,可以清晰的看到用户增长趋势
select date_format(u.created_at,"%Y/%m") as '月份', count(*) as '新增用户数'
from vika_user u
where u.remark is null and u.created_at >= '2023-01-01'
group by date_format(u.created_at,'%Y-%m')
order by date_format(u.created_at,'%Y-%m') asc
14、指定空间站,统计活跃表格前100
查询指定空间站中比较活跃表格,按OP 操作数排序,前100
SELECT d.dst_id,d.dst_name,count(*) as op
FROM vika_datasheet_changeset AS c
LEFT JOIN vika_datasheet AS d on d.dst_id = c.dst_id
where d.space_id='spcidxxxxxxxx'
group by d.dst_id
order by op desc
limit 100
15、查询现有空间站数量
select count(s.id)
from vika_space s
where s.is_deleted = false
16、查询空间站人数排行榜
统计所有空间站,哪些空间站人数最多,每个空间站的人数多少,管理员是谁
select s.name as '空间站名字', count(m.id) as '空间站人数', m.member_name as '主管理员', m.mobile as '主管理员手机号',s.space_id
from vika_space s
join vika_unit_member m
on s.space_id = m.space_id
where m.is_active = true
and m.is_deleted = false
and m.user_id is not null
group by s.space_id
order by count(m.id) desc
17、指定空间站查询活跃度
按时间排序,查询某个空间站每天的活跃度
SELECT date(d.created_at) AS "创建时间", count(*) AS "总数"
FROM vika_datasheet_changeset d
where d.dst_id in (select dst_id from vika_datasheet d where space_id='spcxxxxx')
GROUP BY date(d.created_at)
ORDER BY date(d.created_at) ASC18、指定空间站查询活跃人数
按月份排序,查询某个空间站每个月的活跃人数
SELECT DATE_FORMAT(c.created_at, '%Y-%m') AS month, COUNT(DISTINCT c.member_id) AS '活跃人数'
FROM vika_datasheet_changeset c
where c.dst_id in (select dst_id from vika_datasheet d where space_id='spcxxxx')
GROUP BY month
ORDER BY month;19、查看表活跃度
按天排序,查询某张表每天的活跃人数
SELECT
DATE_FORMAT( c.created_at, '%Y-%m-%d' ) AS op_day,
count(*) AS total
FROM
vika_datasheet_changeset as c
WHERE
dst_id = "dstxxxxxx"
GROUP BY
op_day
ORDER BY
op_day desc;20、查看记录数最多空间站
SELECT vd.space_id as space_id,vs.name as space_name ,SUM(JSON_LENGTH(vdm.meta_data -> '$.views[0].rows')) as total_row
FROM vika_datasheet_meta vdm, vika_datasheet vd , vika_space vs
WHERE vdm.dst_id = vd.dst_id
AND vdm.is_deleted = 0
AND vd.is_deleted = 0
And vd.space_id = vs.space_id
GROUP BY space_id
order by total_row desc limit 2021、指定空间站内最近登录用户
空间站
spcxxxxx最近30天内有登录的用户总数
SELECT
count(*)
FROM
vika_unit_member vum
JOIN vika_user vu ON vum.user_id = vu.id
WHERE
vu.last_login_time > DATE_ADD(CURRENT_DATE(), INTERVAL - 30 DAY)
AND vum.space_id = 'spcxxxxx' and vum.is_deleted = 0