diff --git a/views/attribution_sources/others.view.lkml b/views/attribution_sources/others.view.lkml index 8701a26..c0d29e1 100644 --- a/views/attribution_sources/others.view.lkml +++ b/views/attribution_sources/others.view.lkml @@ -2,6 +2,7 @@ view: others { derived_table: { datagroup_trigger:attribution_channel sql: select '(direct)' as medium, 'Direct' as source UNION ALL +select '(direct)(none)' as medium, 'Direct' as source UNION ALL select 'referral' as medium, 'Referral' as source UNION ALL select 'audio' as medium, 'Audio' as source UNION ALL select 'sms' as medium, 'SMS' as source UNION ALL diff --git a/views/bqml/future_input.view.lkml b/views/bqml/purchase_propensity/future_input.view.lkml similarity index 86% rename from views/bqml/future_input.view.lkml rename to views/bqml/purchase_propensity/future_input.view.lkml index bc0d3f3..0653a66 100644 --- a/views/bqml/future_input.view.lkml +++ b/views/bqml/purchase_propensity/future_input.view.lkml @@ -1,8 +1,9 @@ +include: "/views/sessions/*.view.lkml" view: future_input { derived_table: { - datagroup_trigger: bqml_datagroup + sql_trigger_value: ${testing_input.SQL_TABLE_NAME} ;; # partition_keys: ["session_date"] - # cluster_keys: ["session_date"] + #cluster_keys: ["session_date"] sql: select * from (WITH @@ -23,9 +24,9 @@ view: future_input { THEN 1 ELSE 0 END) AS label FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH) GROUP BY user_pseudo_id ), @@ -36,11 +37,11 @@ view: future_input { MAX(geo.region) AS region, MAX(geo.country) AS country FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -59,11 +60,11 @@ view: future_input { END) AS pages_viewed FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -101,11 +102,11 @@ view: future_input { -- SUM(IF(event_name = 'my custom event', 1, 0)) AS cnt_my_custom_event -- Don't forget to add a comma after 'cnt_session_start' when adding a new field. FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -139,11 +140,11 @@ view: future_input { WHERE key = 'engagement_time_msec' )) AS engagement_time_msec FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -188,7 +189,7 @@ view: future_input { -- IFNULL(MAX(Event_counts.cnt_my_custom_event), 0) AS cnt_my_custom_event -- Don't forget to add a comma after 'cnt_session_start' when adding a new field. FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id LEFT JOIN engagement AS Engagement @@ -200,7 +201,7 @@ view: future_input { LEFT JOIN event_cnts AS Event_counts ON GA.user_pseudo_id = Event_counts.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_future_synth_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) diff --git a/views/bqml/incremental_prediction.view.lkml b/views/bqml/purchase_propensity/incremental_prediction.view.lkml similarity index 91% rename from views/bqml/incremental_prediction.view.lkml rename to views/bqml/purchase_propensity/incremental_prediction.view.lkml index 63234fd..5ebf769 100644 --- a/views/bqml/incremental_prediction.view.lkml +++ b/views/bqml/purchase_propensity/incremental_prediction.view.lkml @@ -1,4 +1,7 @@ include: "/explores/sessions.explore.lkml" +include: "/views/*/*.view.lkml" +include: "/views/bqml/*/*.view.lkml" +include: "/views/*.view.lkml" explore: pred_history {} view: pred_history { @@ -49,10 +52,11 @@ view: pred_history { # } } -explore: incremental_prediction {} +explore: incremental_prediction {hidden:yes} view: incremental_prediction { derived_table: { - datagroup_trigger: bqml_datagroup + #datagroup_trigger: bqml_datagroup + sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;; create_process: { sql_step: CREATE TABLE IF NOT EXISTS ${SQL_TABLE_NAME} ( diff --git a/views/bqml/predictions.view.lkml b/views/bqml/purchase_propensity/predictions.view.lkml similarity index 73% rename from views/bqml/predictions.view.lkml rename to views/bqml/purchase_propensity/predictions.view.lkml index e215a8d..05af958 100644 --- a/views/bqml/predictions.view.lkml +++ b/views/bqml/purchase_propensity/predictions.view.lkml @@ -1,13 +1,12 @@ ######################## TRAINING/TESTING INPUTS ############################# -include: "/views/bqml/future_input.view" -include: "/views/bqml/training_input.view" -include: "/views/bqml/testing_input.view" +include: "/views/bqml/purchase_propensity/*.view" +include: "/views/sessions/*.view" ######################## MODEL ############################# view: future_purchase_model { derived_table: { - datagroup_trigger: bqml_datagroup + sql_trigger_value: ${future_input.SQL_TABLE_NAME} ;; sql_create: CREATE OR REPLACE MODEL ${SQL_TABLE_NAME} OPTIONS( @@ -33,15 +32,16 @@ view: future_purchase_model { } ######################## TRAINING INFORMATION ############################# -explore: future_purchase_model_evaluation {} -explore: future_purchase_model_training_info {} -explore: roc_curve {} -explore: confusion_matrix {} -explore: feature_importance {} +explore: future_purchase_model_evaluation {hidden:yes} +explore: future_purchase_model_training_info {hidden:yes} +explore: roc_curve {hidden:yes} +explore: confusion_matrix {hidden:yes} +explore: feature_importance {hidden:yes} # VIEWS: view: future_purchase_model_evaluation { derived_table: { + sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;; sql: SELECT * FROM ml.EVALUATE( MODEL ${future_purchase_model.SQL_TABLE_NAME}, (SELECT * FROM ${testing_input.SQL_TABLE_NAME}));; @@ -55,6 +55,7 @@ view: future_purchase_model_evaluation { view: roc_curve { derived_table: { + sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;; sql: SELECT * FROM ml.ROC_CURVE( MODEL ${future_purchase_model.SQL_TABLE_NAME}, (SELECT * FROM ${testing_input.SQL_TABLE_NAME}));; @@ -100,6 +101,7 @@ view: roc_curve { view: confusion_matrix { derived_table: { + sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;; sql: SELECT Expected_label,_0 as Predicted_0,_1 as Predicted_1 FROM ml.confusion_matrix( MODEL ${future_purchase_model.SQL_TABLE_NAME}, (SELECT * FROM ${testing_input.SQL_TABLE_NAME}));; @@ -111,6 +113,7 @@ view: confusion_matrix { view: future_purchase_model_training_info { derived_table: { + sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;; sql: SELECT * FROM ml.TRAINING_INFO(MODEL ${future_purchase_model.SQL_TABLE_NAME});; } dimension: training_run {type: number} @@ -143,6 +146,7 @@ view: future_purchase_model_training_info { view: feature_importance { derived_table: { + sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;; sql: SELECT * FROM @@ -153,10 +157,10 @@ view: feature_importance { } ########################################## PREDICT FUTURE ############################ -explore: future_purchase_prediction {} +explore: future_purchase_prediction {hidden:yes} view: future_purchase_prediction { derived_table: { - datagroup_trigger: bqml_datagroup + sql_trigger_value: ${future_purchase_model.SQL_TABLE_NAME} ;; sql: select pred.*, predicted_will_purchase_in_future_probs_unnest.prob as pred_probability from @@ -179,23 +183,61 @@ view: future_purchase_prediction { sql: ${TABLE}.pred_probability ;; drill_fields: [user_pseudo_id] } + dimension: pred_prob_perc { + type: number + sql:APPROX_QUANTILES(${TABLE}.pred_probability,100);; + } + dimension: pred_prob_perc_10 { + type: number + hidden: yes + sql: ${pred_prob_perc}[OFFSET(10)] ;; + } + dimension: pred_prob_perc_20 { + type: number + hidden: yes + sql: ${pred_prob_perc}[OFFSET(20)] ;; + } + dimension: pred_prob_perc_30 { + type: number + hidden: yes + sql: ${pred_prob_perc}[OFFSET(30)];; + } + dimension: pred_prob_perc_40 { + type: number + hidden: yes + sql:${pred_prob_perc}[OFFSET(40)];; + } + dimension: pred_prob_perc_50 { + type: number + hidden: yes + sql: ${pred_prob_perc}[OFFSET(50)] ;; + } + dimension: pred_prob_perc_60 { + type: number + hidden: yes + sql:${pred_prob_perc}[OFFSET(60)] ;; + } + dimension: pred_prob_perc_70 { + type: number + hidden: yes + sql: ${pred_prob_perc}[OFFSET(70)] ;; + } + dimension: pred_prob_perc_80 { + type: number + hidden: yes + sql: ${pred_prob_perc}[OFFSET(80)];; + } + dimension: pred_prob_perc_90 { + type: number + hidden: yes + sql: ${pred_prob_perc}[OFFSET(90)] ;; + } dimension: pred_probability_bucket { - case: { - when: { - sql: ${TABLE}.pred_probability <= 0.25;; - label: "Low" - } - when: { - sql: ${TABLE}.pred_probability > 0.25 AND ${TABLE}.pred_probability <= 0.75;; - label: "Medium" - } - when: { - sql: ${TABLE}.pred_probability > 0.75;; - label: "High" - } - else:"Unknown" - } + type: tier + tiers: [0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9] + style: relational + sql: ${pred_probability} ;; drill_fields: [user_pseudo_id] } measure: count { diff --git a/views/bqml/testing_input.view.lkml b/views/bqml/purchase_propensity/testing_input.view.lkml similarity index 87% rename from views/bqml/testing_input.view.lkml rename to views/bqml/purchase_propensity/testing_input.view.lkml index 9478126..f96d87b 100644 --- a/views/bqml/testing_input.view.lkml +++ b/views/bqml/purchase_propensity/testing_input.view.lkml @@ -1,6 +1,7 @@ +include: "/views/sessions/*.view.lkml" view: testing_input { derived_table: { - datagroup_trigger: bqml_datagroup + sql_trigger_value: ${training_input.SQL_TABLE_NAME} ;; sql: select * from (WITH @@ -21,9 +22,9 @@ view: testing_input { THEN 1 ELSE 0 END) AS label FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) GROUP BY user_pseudo_id ), @@ -34,11 +35,11 @@ view: testing_input { MAX(geo.region) AS region, MAX(geo.country) AS country FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -57,11 +58,11 @@ view: testing_input { END) AS pages_viewed FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -99,11 +100,11 @@ view: testing_input { -- SUM(IF(event_name = 'my custom event', 1, 0)) AS cnt_my_custom_event -- Don't forget to add a comma after 'cnt_session_start' when adding a new field. FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -137,11 +138,11 @@ view: testing_input { WHERE key = 'engagement_time_msec' )) AS engagement_time_msec FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -186,7 +187,7 @@ view: testing_input { -- IFNULL(MAX(Event_counts.cnt_my_custom_event), 0) AS cnt_my_custom_event -- Don't forget to add a comma after 'cnt_session_start' when adding a new field. FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id LEFT JOIN engagement AS Engagement @@ -198,7 +199,7 @@ view: testing_input { LEFT JOIN event_cnts AS Event_counts ON GA.user_pseudo_id = Event_counts.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) diff --git a/views/bqml/training_input.view.lkml b/views/bqml/purchase_propensity/training_input.view.lkml similarity index 79% rename from views/bqml/training_input.view.lkml rename to views/bqml/purchase_propensity/training_input.view.lkml index 70a2de5..f3fe331 100644 --- a/views/bqml/training_input.view.lkml +++ b/views/bqml/purchase_propensity/training_input.view.lkml @@ -1,9 +1,11 @@ +include: "/views/sessions/*.view.lkml" +include: "/views/*/*.view.lkml" view: training_input { derived_table: { - datagroup_trigger: bqml_datagroup + #datagroup_trigger: bqml_datagroup + sql_trigger_value: ${sessions.SQL_TABLE_NAME} ;; sql: - select * from - (WITH + WITH visitors_labeled AS ( SELECT user_pseudo_id, @@ -21,9 +23,9 @@ view: training_input { THEN 1 ELSE 0 END) AS label FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH)) AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH) AND DATE(session_date) < DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) GROUP BY user_pseudo_id ), @@ -34,11 +36,11 @@ view: training_input { MAX(geo.region) AS region, MAX(geo.country) AS country FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH)) AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH) AND DATE(session_date) < DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -57,11 +59,11 @@ view: training_input { END) AS pages_viewed FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH)) AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH) AND DATE(session_date) < DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -99,11 +101,11 @@ view: training_input { -- SUM(IF(event_name = 'my custom event', 1, 0)) AS cnt_my_custom_event -- Don't forget to add a comma after 'cnt_session_start' when adding a new field. FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH)) AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH) AND DATE(session_date) < DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -137,11 +139,11 @@ view: training_input { WHERE key = 'engagement_time_msec' )) AS engagement_time_msec FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH)) AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH) AND DATE(session_date) < DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -186,7 +188,7 @@ view: training_input { -- IFNULL(MAX(Event_counts.cnt_my_custom_event), 0) AS cnt_my_custom_event -- Don't forget to add a comma after 'cnt_session_start' when adding a new field. FROM - `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` AS GA + ${session_list_with_event_history.SQL_TABLE_NAME} AS GA LEFT JOIN visitors_labeled AS Labels ON GA.user_pseudo_id = Labels.user_pseudo_id LEFT JOIN engagement AS Engagement @@ -198,7 +200,7 @@ view: training_input { LEFT JOIN event_cnts AS Event_counts ON GA.user_pseudo_id = Event_counts.user_pseudo_id WHERE - _TABLE_SUFFIX > FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH)) AND _TABLE_SUFFIX < FORMAT_DATE('%Y%m%d', DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH)) + DATE(session_date) > DATE_SUB(CURRENT_DATE(), INTERVAL (@{GA4_BQML_train_months} + @{GA4_BQML_test_months}) MONTH) AND DATE(session_date) < DATE_SUB(CURRENT_DATE(), INTERVAL @{GA4_BQML_test_months} MONTH) AND ( GA.event_timestamp < IFNULL(event_session, 0) OR event_session IS NULL) @@ -209,6 +211,6 @@ view: training_input { SELECT * FROM - user_model);; + user_model;; } } diff --git a/views/events.view.lkml b/views/events.view.lkml index 04eb308..40ef97b 100644 --- a/views/events.view.lkml +++ b/views/events.view.lkml @@ -53,11 +53,22 @@ view: events { dimension_group: event_time { type: time - timeframes: [date,day_of_month,day_of_week,day_of_week_index,day_of_year,month,month_name,month_num,fiscal_quarter,fiscal_quarter_of_year,year,time,hour,hour_of_day] + timeframes: [date,day_of_month,day_of_week,day_of_week_index,day_of_year,month,month_name,month_num,fiscal_quarter,fiscal_quarter_of_year,year,time,hour,hour_of_day,minute,second] label: "Event" sql: TIMESTAMP_MICROS(${TABLE}.event_timestamp) ;; description: "Event Date/Time from Event Timestamp." } + parameter: event_time_window { + allowed_value: { + label: "Hourly window" + value: "hour" + } + allowed_value: { + label: "Minute window" + value: "minute" + } + default_value: "Daily" + } dimension: event_timestamp { hidden: yes sql: ${TABLE}.event_timestamp ;; } diff --git a/views/sessions.view.lkml b/views/sessions.view.lkml index 3431b39..d8c5bbf 100644 --- a/views/sessions.view.lkml +++ b/views/sessions.view.lkml @@ -1,20 +1,21 @@ include: "/views/event_data_dimensions/event_funnel.view" include: "/views/event_data_dimensions/page_funnel.view" include: "/views/sessions/*.view" +include: "/views/bqml/*/*.view" view: sessions { derived_table: { - datagroup_trigger: ga4_default_datagroup + sql_trigger_value: ${device_geo.SQL_TABLE_NAME} ;; partition_keys: ["session_date"] cluster_keys: ["session_date"] increment_key: "session_date" increment_offset: 3 sql: -- Final Select Statement: -select se.session_date session_date - , se.ga_session_id ga_session_id - , se.ga_session_number ga_session_number - , se.user_pseudo_id user_pseudo_id +select se.session_date as session_date + , se.ga_session_id as ga_session_id + , se.ga_session_number as ga_session_number + , se.user_pseudo_id as user_pseudo_id , se.sl_key -- packing session-level data into structs by category , (SELECT AS STRUCT coalesce(sa.medium,'(none)') medium -- sessions missing last-non-direct are direct @@ -53,14 +54,14 @@ select se.session_date session_date , d.geo__sub_continent , d.geo__region) geo_data , se.event_data event_data -from ${session_event_packing.SQL_TABLE_NAME} AS se -left join ${session_tags.SQL_TABLE_NAME} sa +from ${session_event_packing.SQL_TABLE_NAME} as se +left join ${session_tags.SQL_TABLE_NAME} as sa on se.sl_key = sa.sl_key -left join ${session_facts.SQL_TABLE_NAME} sf +left join ${session_facts.SQL_TABLE_NAME} as sf on se.sl_key = sf.sl_key -left join ${device_geo.SQL_TABLE_NAME} d +left join ${device_geo.SQL_TABLE_NAME} as d on se.sl_key = d.sl_key -where {% incrementcondition %} session_date {% endincrementcondition %} +where {% incrementcondition %} se.session_date {% endincrementcondition %} ;; } @@ -186,7 +187,7 @@ extends: [event_funnel, page_funnel] } dimension: session_data_session_event_count { type: number - sql: ${session_data}.session_event_count ;; + sql:${session_data}.session_event_count;; label: "Session Event Count" } dimension: session_data_engaged_events { @@ -566,6 +567,12 @@ extends: [event_funnel, page_funnel] sql: ${user_pseudo_id} ;; value_format_name: formatted_number } + #measure: total_event_count { + # type: sum + # description: "Total times an event occured on a specific date" + # sql: ${session_data_session_event_count} ;; + # value_format_name: formatted_number + #} measure: total_new_users { view_label: "Audience" diff --git a/views/sessions/device_geo.view.lkml b/views/sessions/device_geo.view.lkml index 0c5c973..0c83e55 100644 --- a/views/sessions/device_geo.view.lkml +++ b/views/sessions/device_geo.view.lkml @@ -1,7 +1,7 @@ -include: "/views/sessions/session_list_with_event_history.view.lkml" +include: "/views/sessions/*.view.lkml" view: device_geo{ derived_table:{ - sql_trigger_value:${session_list_with_event_history.SQL_TABLE_NAME};; + sql_trigger_value: ${session_event_packing.SQL_TABLE_NAME} ;; sql:select sl.sl_key , sl.device.category device__category , sl.device.mobile_brand_name device__mobile_brand_name diff --git a/views/sessions/session_event_packing.view.lkml b/views/sessions/session_event_packing.view.lkml index a2f3da2..f4d2b3f 100644 --- a/views/sessions/session_event_packing.view.lkml +++ b/views/sessions/session_event_packing.view.lkml @@ -1,12 +1,12 @@ -include: "/views/sessions/session_list_with_event_history.view.lkml" +include: "/views/sessions/*.view.lkml" view: session_event_packing { derived_table:{ - datagroup_trigger: ga4_default_datagroup + sql_trigger_value: ${session_facts.SQL_TABLE_NAME} ;; partition_keys: ["session_date"] - cluster_keys: ["session_date"] + cluster_keys: ["sl_key","user_pseudo_id","session_date"] increment_key: "session_date" - increment_offset: 10 - sql: SELECT sl.session_date session_date + increment_offset: 0 + sql:select sl.session_date session_date , sl.ga_session_id ga_session_id , sl.ga_session_number ga_session_number , sl.user_pseudo_id user_pseudo_id @@ -39,17 +39,15 @@ view: session_event_packing { , sl.event_dimensions , sl.ecommerce , sl.items)) event_data - FROM ${session_list_with_event_history.SQL_TABLE_NAME} AS sl - WHERE {% incrementcondition %} session_date {% endincrementcondition %} - AND sl.sl_key IN ( - SELECT sl_key FROM ${session_facts.SQL_TABLE_NAME} - WHERE CASE WHEN "@{EVENT_COUNT}" = "" THEN 1=1 - ELSE session_event_count = ((TIMESTAMP_ADD(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY), INTERVAL -15 DAY))) - --and timestamp(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+'))) <= ((TIMESTAMP_ADD(TIMESTAMP_ADD(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY), INTERVAL -15 DAY), INTERVAL 16 DAY))) - ;; -} -dimension: session_date{ - type: date - hidden: yes -} -} + select timestamp(SAFE.PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'(\d{8})'))) session_date + , (select value.int_value from UNNEST(events.event_params) where key = "ga_session_id") ga_session_id + , (select value.int_value from UNNEST(events.event_params) where key = "ga_session_number") ga_session_number + , events.user_pseudo_id + -- unique key for session: + , timestamp(SAFE.PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+')))||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_id")||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_number")||events.user_pseudo_id as sl_key + , row_number() over (partition by (timestamp(SAFE.PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+')))||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_id")||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_number")||events.user_pseudo_id) order by events.event_timestamp) event_rank + , (TIMESTAMP_DIFF(TIMESTAMP_MICROS(LEAD(events.event_timestamp) OVER (PARTITION BY timestamp(SAFE.PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+')))||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_id")||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_number")||events.user_pseudo_id ORDER BY events.event_timestamp asc)) + ,TIMESTAMP_MICROS(events.event_timestamp),second)/86400.0) time_to_next_event + , case when events.event_name = 'page_view' then row_number() over (partition by (timestamp(SAFE.PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+')))||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_id")||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_number")||events.user_pseudo_id), case when events.event_name = 'page_view' then true else false end order by events.event_timestamp) + else 0 end as page_view_rank + , case when events.event_name = 'page_view' then row_number() over (partition by (timestamp(SAFE.PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+')))||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_id")||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_number")||events.user_pseudo_id), case when events.event_name = 'page_view' then true else false end order by events.event_timestamp desc) + else 0 end as page_view_reverse_rank + , case when events.event_name = 'page_view' then (TIMESTAMP_DIFF(TIMESTAMP_MICROS(LEAD(events.event_timestamp) OVER (PARTITION BY timestamp(SAFE.PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+')))||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_id")||(select value.int_value from UNNEST(events.event_params) where key = "ga_session_number")||events.user_pseudo_id , case when events.event_name = 'page_view' then true else false end ORDER BY events.event_timestamp asc)) + ,TIMESTAMP_MICROS(events.event_timestamp),second)/86400.0) else null end as time_to_next_page -- this window function yields 0 duration results when session page_view count = 1. + -- raw event data: + , events.event_date + , events.event_timestamp + , events.event_name + , events.event_params + , events.event_previous_timestamp + , events.event_value_in_usd + , events.event_bundle_sequence_id + , events.event_server_timestamp_offset + , events.user_id + -- , events.user_pseudo_id + , events.user_properties + , events.user_first_touch_timestamp + , events.user_ltv + , events.device + , events.geo + , events.app_info + , events.traffic_source + , events.stream_id + , events.platform + , events.event_dimensions + , events.ecommerce + , ARRAY(select as STRUCT it.* EXCEPT(item_params) from unnest(events.items) as it) as items + from `@{GA4_SCHEMA}.@{GA4_TABLE_VARIABLE}` events + where {% incrementcondition %} timestamp(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+'))) {% endincrementcondition %} + --where timestamp(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+'))) >= ((TIMESTAMP_ADD(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY), INTERVAL -15 DAY))) + --and timestamp(PARSE_DATE('%Y%m%d', REGEXP_EXTRACT(_TABLE_SUFFIX,r'[0-9]+'))) <= ((TIMESTAMP_ADD(TIMESTAMP_ADD(TIMESTAMP_TRUNC(CURRENT_TIMESTAMP(), DAY), INTERVAL -15 DAY), INTERVAL 16 DAY))) + ;; + } + dimension: session_date { + type: date + hidden: yes + sql: ${TABLE}.session_date;; + } + } diff --git a/views/sessions/session_tags.view.lkml b/views/sessions/session_tags.view.lkml index 9851522..8940171 100644 --- a/views/sessions/session_tags.view.lkml +++ b/views/sessions/session_tags.view.lkml @@ -1,15 +1,26 @@ -include: "/views/sessions/session_list_with_event_history.view.lkml" +include: "/views/sessions/*.view.lkml" view: session_tags{ derived_table:{ - datagroup_trigger: ga4_default_datagroup - sql: select distinct sl.sl_key - , first_value((select value.string_value from unnest(sl.event_params) where key = 'medium')) over (partition by sl.sl_key order by sl.event_timestamp desc) medium - , first_value((select value.string_value from unnest(sl.event_params) where key = 'source')) over (partition by sl.sl_key order by sl.event_timestamp desc) source - , first_value((select value.string_value from unnest(sl.event_params) where key = 'campaign')) over (partition by sl.sl_key order by sl.event_timestamp desc) campaign - , first_value((select value.string_value from unnest(sl.event_params) where key = 'page_referrer')) over (partition by sl.sl_key order by sl.event_timestamp desc) page_referrer - from ${session_list_with_event_history.SQL_TABLE_NAME} AS sl - where sl.event_name in ('page_view') - and (select value.string_value from unnest(sl.event_params) where key = 'medium') is not null -- NULL medium is direct, filtering out nulls to ensure last non-direct. + increment_key: "session_date" + partition_keys: ["session_date"] + cluster_keys: ["sl_key","session_date"] + #datagroup_trigger: ga4_default_datagroup + sql_trigger_value: ${session_list_with_event_history.SQL_TABLE_NAME} ;; + sql:select distinct sl.sl_key, sl.session_date as session_date + , first_value(case when ep.key = 'medium' then ep.value.string_value end) over (partition by sl.sl_key order by sl.event_timestamp desc) medium + , first_value(case when ep.key = 'source' then ep.value.string_value end) over (partition by sl.sl_key order by sl.event_timestamp desc) source + , first_value(case when ep.key = 'campaign' then ep.value.string_value end) over (partition by sl.sl_key order by sl.event_timestamp desc) campaign + , first_value(case when ep.key = 'page_referrer' then ep.value.string_value end) over (partition by sl.sl_key order by sl.event_timestamp desc) page_referrer +from ${session_list_with_event_history.SQL_TABLE_NAME} AS sl + , UNNEST(sl.event_params) AS ep +where sl.event_name in ('page_view') +and {% incrementcondition %} session_date {% endincrementcondition %} +-- NULL medium is direct, filtering out nulls to ensure last non-direct. ;; } + dimension: session_date { + type: date + hidden: yes + sql: ${TABLE}.session_date ;; + } }