WITH base AS(
SELECT
DISTINCT
user_id,
event_name,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
user_pseudo_id
FROM advanced.app_logs
), first_week_and_diff AS(
SELECT
*,
DATE_DIFF(event_week, first_week, WEEK) AS diff_of_week
FROM (
SELECT
DISTINCT
user_pseudo_id,
DATE_TRUNC(MIN(event_date) OVER(PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
), user_counts AS(
SELECT
diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_week_and_diff
GROUP BY diff_of_week
)
SELECT
*,
ROUND(SAFE_DIVIDE(user_cnt, first_week_user_cnt), 3) AS retention_ratio
FROM (
SELECT
diff_of_week,
user_cnt,
FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week ASC) AS first_week_user_cnt
FROM user_counts
)
일단 이 앱로그가 배달 주문하는 앱이라는 점을 가장 먼저 고려해야한다.
기준 일자를 정해야 하는데 이때 휴먼 유저 ( 앱을 사용하지 않는다고 판단되는 유저 )의 비중을 30~40%을 잡아야된다고 판단했습니다.
휴먼 유저 비중이 30~40가 되는 날짜가 “45일” 가장 좋다고 판단해서 45일을 기준으로 User Group을 나누었습니다
User Group | 이름 | 정의 |
---|---|---|
New_User | 신규 유저 | 현재 날짜 기준 45일 이내 최초 접속자 |
Current_User | 사용 유저 | 이전 접속일자 와 현재 날짜 기준 모두 45일 이내 접속자 |
Resurrected_User | 복귀 유저 | 이전 접속일자 45일 초과 현재 날짜 기준 45일 이내 접속자 |
Dormant_User | 휴먼 유저 | 현재날짜 기준 45일 초과 접속자 |
WITH base AS(
SELECT
DISTINCT
user_pseudo_id,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
FROM advanced.app_logs
), previous_date AS(
SELECT
*,
LAG(event_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date) AS previous_event_date
FROM base
), previous_date_diff AS(
SELECT
*,
DATE_DIFF(event_date, previous_event_date, DAY) AS days_since_last_date,
FROM previous_date
), last_date_diff AS(
SELECT
DISTINCT
user_pseudo_id,
LAST_VALUE(days_since_last_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_days_since_last_date,
LAST_VALUE(event_date) OVER(PARTITION BY user_pseudo_id ORDER BY event_date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS last_event_date
FROM previous_date_diff
), final_diff AS (
SELECT
user_pseudo_id,
last_days_since_last_date,
DATE_DIFF(DATE '2023-01-20', last_event_date, DAY) AS final_diff_day
FROM last_date_diff
), user_group AS (
SELECT
user_pseudo_id,
last_days_since_last_date,
final_diff_day,
CASE WHEN last_days_since_last_date IS NULL AND final_diff_day <= 45 THEN "New_User"
WHEN last_days_since_last_date <= 45 AND final_diff_day <= 45 THEN "Current_User"
WHEN last_days_since_last_date > 45 AND final_diff_day <= 45 THEN "Resurrected_User"
WHEN final_diff_day > 45 THEN "Dormant_User"
END AS user_group_name
FROM final_diff
)
SELECT
user_group_name,
COUNT(*) AS group_cnt
FROM user_group
GROUP BY user_group_name;
WITH base AS(
SELECT
DISTINCT
user_id,
event_name,
DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul') AS event_datetime,
DATE(DATETIME(TIMESTAMP_MICROS(event_timestamp), 'Asia/Seoul')) AS event_date,
user_pseudo_id
FROM advanced.app_logs
WHERE
event_name = "click_payment"
), first_week_and_diff AS(
SELECT
*,
DATE_DIFF(event_week, first_week, WEEK) AS diff_of_week
FROM (
SELECT
DISTINCT
user_pseudo_id,
DATE_TRUNC(MIN(event_date) OVER(PARTITION BY user_pseudo_id), WEEK(MONDAY)) AS first_week,
DATE_TRUNC(event_date, WEEK(MONDAY)) AS event_week
FROM base
)
), user_counts AS(
SELECT
diff_of_week,
COUNT(DISTINCT user_pseudo_id) AS user_cnt
FROM first_week_and_diff
GROUP BY diff_of_week
)
SELECT
*,
ROUND(SAFE_DIVIDE(user_cnt, first_week_user_cnt), 3) AS retention_ratio
FROM (
SELECT
diff_of_week,
user_cnt,
FIRST_VALUE(user_cnt) OVER(ORDER BY diff_of_week ASC) AS first_week_user_cnt
FROM user_counts
)