Skip to content

Commit

Permalink
Version 2.1.0 (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
agregori97 authored Apr 15, 2024
1 parent 216678b commit a15c5a8
Show file tree
Hide file tree
Showing 13 changed files with 246 additions and 166 deletions.
1 change: 1 addition & 0 deletions views/attribution_sources/others.view.lkml
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ view: others {
derived_table: {
datagroup_trigger:attribution_channel
sql: select '(direct)' as medium, 'Direct' as source UNION ALL
select '(direct)(none)' as medium, 'Direct' as source UNION ALL
select 'referral' as medium, 'Referral' as source UNION ALL
select 'audio' as medium, 'Audio' as source UNION ALL
select 'sms' as medium, 'SMS' as source UNION ALL
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
include: "/views/sessions/*.view.lkml"
view: future_input {
derived_table: {
datagroup_trigger: bqml_datagroup
sql_trigger_value: ${testing_input.SQL_TABLE_NAME} ;;
# partition_keys: ["session_date"]
# cluster_keys: ["session_date"]
#cluster_keys: ["session_date"]
sql:
select * from
(WITH
Expand All @@ -23,9 +24,9 @@ view: future_input {
THEN 1
ELSE 0 END) AS label
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)
GROUP BY
user_pseudo_id
),
Expand All @@ -36,11 +37,11 @@ view: future_input {
MAX(geo.region) AS region,
MAX(geo.country) AS country
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand All @@ -59,11 +60,11 @@ view: future_input {
END)
AS pages_viewed
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down Expand Up @@ -101,11 +102,11 @@ view: future_input {
-- SUM(IF(event_name = 'my custom event', 1, 0)) AS cnt_my_custom_event
-- Don't forget to add a comma after 'cnt_session_start' when adding a new field.
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down Expand Up @@ -139,11 +140,11 @@ view: future_input {
WHERE key = 'engagement_time_msec'
)) AS engagement_time_msec
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down Expand Up @@ -188,7 +189,7 @@ view: future_input {
-- IFNULL(MAX(Event_counts.cnt_my_custom_event), 0) AS cnt_my_custom_event
-- Don't forget to add a comma after 'cnt_session_start' when adding a new field.
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
LEFT JOIN engagement AS Engagement
Expand All @@ -200,7 +201,7 @@ view: future_input {
LEFT JOIN event_cnts AS Event_counts
ON GA.user_pseudo_id = Event_counts.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
include: "/explores/sessions.explore.lkml"
include: "/views/*/*.view.lkml"
include: "/views/bqml/*/*.view.lkml"
include: "/views/*.view.lkml"

explore: pred_history {}
view: pred_history {
Expand Down Expand Up @@ -49,10 +52,11 @@ view: pred_history {
# }
}

explore: incremental_prediction {}
explore: incremental_prediction {hidden:yes}
view: incremental_prediction {
derived_table: {
datagroup_trigger: bqml_datagroup
#datagroup_trigger: bqml_datagroup
sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;;
create_process: {
sql_step:
CREATE TABLE IF NOT EXISTS ${SQL_TABLE_NAME} (
Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
######################## TRAINING/TESTING INPUTS #############################
include: "/views/bqml/future_input.view"
include: "/views/bqml/training_input.view"
include: "/views/bqml/testing_input.view"
include: "/views/bqml/purchase_propensity/*.view"
include: "/views/sessions/*.view"

######################## MODEL #############################

view: future_purchase_model {
derived_table: {
datagroup_trigger: bqml_datagroup
sql_trigger_value: ${future_input.SQL_TABLE_NAME} ;;
sql_create:
CREATE OR REPLACE MODEL ${SQL_TABLE_NAME}
OPTIONS(
Expand All @@ -33,15 +32,16 @@ view: future_purchase_model {
}

######################## TRAINING INFORMATION #############################
explore: future_purchase_model_evaluation {}
explore: future_purchase_model_training_info {}
explore: roc_curve {}
explore: confusion_matrix {}
explore: feature_importance {}
explore: future_purchase_model_evaluation {hidden:yes}
explore: future_purchase_model_training_info {hidden:yes}
explore: roc_curve {hidden:yes}
explore: confusion_matrix {hidden:yes}
explore: feature_importance {hidden:yes}

# VIEWS:
view: future_purchase_model_evaluation {
derived_table: {
sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;;
sql: SELECT * FROM ml.EVALUATE(
MODEL ${future_purchase_model.SQL_TABLE_NAME},
(SELECT * FROM ${testing_input.SQL_TABLE_NAME}));;
Expand All @@ -55,6 +55,7 @@ view: future_purchase_model_evaluation {

view: roc_curve {
derived_table: {
sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;;
sql: SELECT * FROM ml.ROC_CURVE(
MODEL ${future_purchase_model.SQL_TABLE_NAME},
(SELECT * FROM ${testing_input.SQL_TABLE_NAME}));;
Expand Down Expand Up @@ -100,6 +101,7 @@ view: roc_curve {

view: confusion_matrix {
derived_table: {
sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;;
sql: SELECT Expected_label,_0 as Predicted_0,_1 as Predicted_1 FROM ml.confusion_matrix(
MODEL ${future_purchase_model.SQL_TABLE_NAME},
(SELECT * FROM ${testing_input.SQL_TABLE_NAME}));;
Expand All @@ -111,6 +113,7 @@ view: confusion_matrix {

view: future_purchase_model_training_info {
derived_table: {
sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;;
sql: SELECT * FROM ml.TRAINING_INFO(MODEL ${future_purchase_model.SQL_TABLE_NAME});;
}
dimension: training_run {type: number}
Expand Down Expand Up @@ -143,6 +146,7 @@ view: future_purchase_model_training_info {

view: feature_importance {
derived_table: {
sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;;
sql: SELECT
*
FROM
Expand All @@ -153,10 +157,10 @@ view: feature_importance {
}

########################################## PREDICT FUTURE ############################
explore: future_purchase_prediction {}
explore: future_purchase_prediction {hidden:yes}
view: future_purchase_prediction {
derived_table: {
datagroup_trigger: bqml_datagroup
sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;;
sql: select
pred.*,
predicted_will_purchase_in_future_probs_unnest.prob as pred_probability from
Expand All @@ -179,23 +183,61 @@ view: future_purchase_prediction {
sql: ${TABLE}.pred_probability ;;
drill_fields: [user_pseudo_id]
}
dimension: pred_prob_perc {
type: number
sql:APPROX_QUANTILES(${TABLE}.pred_probability,100);;
}
dimension: pred_prob_perc_10 {
type: number
hidden: yes
sql: ${pred_prob_perc}[OFFSET(10)] ;;
}
dimension: pred_prob_perc_20 {
type: number
hidden: yes
sql: ${pred_prob_perc}[OFFSET(20)] ;;
}
dimension: pred_prob_perc_30 {
type: number
hidden: yes
sql: ${pred_prob_perc}[OFFSET(30)];;
}
dimension: pred_prob_perc_40 {
type: number
hidden: yes
sql:${pred_prob_perc}[OFFSET(40)];;
}
dimension: pred_prob_perc_50 {
type: number
hidden: yes
sql: ${pred_prob_perc}[OFFSET(50)] ;;
}
dimension: pred_prob_perc_60 {
type: number
hidden: yes
sql:${pred_prob_perc}[OFFSET(60)] ;;
}
dimension: pred_prob_perc_70 {
type: number
hidden: yes
sql: ${pred_prob_perc}[OFFSET(70)] ;;
}
dimension: pred_prob_perc_80 {
type: number
hidden: yes
sql: ${pred_prob_perc}[OFFSET(80)];;
}
dimension: pred_prob_perc_90 {
type: number
hidden: yes
sql: ${pred_prob_perc}[OFFSET(90)] ;;
}

dimension: pred_probability_bucket {
case: {
when: {
sql: ${TABLE}.pred_probability <= 0.25;;
label: "Low"
}
when: {
sql: ${TABLE}.pred_probability > 0.25 AND ${TABLE}.pred_probability <= 0.75;;
label: "Medium"
}
when: {
sql: ${TABLE}.pred_probability > 0.75;;
label: "High"
}
else:"Unknown"
}
type: tier
tiers: [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9]
style: relational
sql: ${pred_probability} ;;
drill_fields: [user_pseudo_id]
}
measure: count {
Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
include: "/views/sessions/*.view.lkml"
view: testing_input {
derived_table: {
datagroup_trigger: bqml_datagroup
sql_trigger_value: ${training_input.SQL_TABLE_NAME} ;;
sql:
select * from
(WITH
Expand All @@ -21,9 +22,9 @@ view: testing_input {
THEN 1
ELSE 0 END) AS label
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)
GROUP BY
user_pseudo_id
),
Expand All @@ -34,11 +35,11 @@ view: testing_input {
MAX(geo.region) AS region,
MAX(geo.country) AS country
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand All @@ -57,11 +58,11 @@ view: testing_input {
END)
AS pages_viewed
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down Expand Up @@ -99,11 +100,11 @@ view: testing_input {
-- SUM(IF(event_name = 'my custom event', 1, 0)) AS cnt_my_custom_event
-- Don't forget to add a comma after 'cnt_session_start' when adding a new field.
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down Expand Up @@ -137,11 +138,11 @@ view: testing_input {
WHERE key = 'engagement_time_msec'
)) AS engagement_time_msec
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down Expand Up @@ -186,7 +187,7 @@ view: testing_input {
-- IFNULL(MAX(Event_counts.cnt_my_custom_event), 0) AS cnt_my_custom_event
-- Don't forget to add a comma after 'cnt_session_start' when adding a new field.
FROM
`@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA
${session_list_with_event_history.SQL_TABLE_NAME} AS GA
LEFT JOIN visitors_labeled AS Labels
ON GA.user_pseudo_id = Labels.user_pseudo_id
LEFT JOIN engagement AS Engagement
Expand All @@ -198,7 +199,7 @@ view: testing_input {
LEFT JOIN event_cnts AS Event_counts
ON GA.user_pseudo_id = Event_counts.user_pseudo_id
WHERE
_TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH))
DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)
AND (
GA.event_timestamp < IFNULL(event_session, 0)
OR event_session IS NULL)
Expand Down
Loading

0 comments on commit a15c5a8

Please sign in to comment.