|
| 1 | +CREATE TEMP FUNCTION findAllInitiators(rootPage STRING, data ARRAY<STRUCT<root_page STRING, third_party STRING, initiator_etld STRING>>) |
| 2 | +RETURNS ARRAY<STRING> |
| 3 | +LANGUAGE js AS """ |
| 4 | + // Helper function to find all initiator_etlds for a given root_page |
| 5 | + function findInitiators(page, visited, data) { |
| 6 | + // Find all entries where the root_page matches and the initiator_etld hasn't been visited |
| 7 | + const initiators = data |
| 8 | + .filter(row => row.root_page === page && !visited.includes(row.initiator_etld)) |
| 9 | + .map(row => row.initiator_etld); |
| 10 | +
|
| 11 | + // Add the newly found initiators to the visited list |
| 12 | + visited = visited.concat(initiators); |
| 13 | +
|
| 14 | + // Recursively process all new initiators |
| 15 | + initiators.forEach(initiator => { |
| 16 | + visited = findInitiators(initiator, visited, data); |
| 17 | + }); |
| 18 | +
|
| 19 | + return visited; |
| 20 | + } |
| 21 | +
|
| 22 | + // Main call: Start recursion from the rootPage |
| 23 | + // Use a Set to ensure that all returned values are distinct |
| 24 | + return Array.from(new Set(findInitiators(rootPage, [], data))); |
| 25 | +"""; |
| 26 | + |
| 27 | + |
| 28 | + |
| 29 | +CREATE TEMP FUNCTION mean_depth_and_next_element_after_gtm(input_array ARRAY<STRING>) |
| 30 | +RETURNS STRUCT<mean_depth FLOAT64, next_elements ARRAY<STRING>> |
| 31 | +LANGUAGE js AS """ |
| 32 | + // Initialize the array to hold names of next elements |
| 33 | + const nextElements = []; |
| 34 | +
|
| 35 | + // Traverse the input array to find "googletagmanager.com" and capture the next element |
| 36 | + for (let i = 0; i < input_array.length - 1; i++) { // -1 to avoid out-of-bounds |
| 37 | + if (input_array[i] === 'googletagmanager.com') { |
| 38 | + nextElements.push(input_array[i + 1]); |
| 39 | + } |
| 40 | + } |
| 41 | +
|
| 42 | + // If no "googletagmanager.com" is found, return NULL |
| 43 | + if (nextElements.length === 0) { |
| 44 | + return { mean_depth: null, next_elements: [] }; |
| 45 | + } |
| 46 | +
|
| 47 | + // Calculate mean depth for all next elements |
| 48 | + const meanDepth = nextElements.length > 0 |
| 49 | + ? nextElements.reduce((sum, _, idx) => sum + (idx + 2), 0) / nextElements.length |
| 50 | + : null; |
| 51 | +
|
| 52 | + // Return the result as a struct |
| 53 | + return { mean_depth: meanDepth, next_elements: nextElements }; |
| 54 | +"""; |
| 55 | + |
| 56 | + |
| 57 | +WITH data AS ( |
| 58 | + -- TP interact with other tps |
| 59 | + SELECT |
| 60 | + * |
| 61 | + FROM ( |
| 62 | + SELECT |
| 63 | + client, |
| 64 | + NET.REG_DOMAIN(root_page) AS root_page, |
| 65 | + NET.REG_DOMAIN(url) AS third_party, |
| 66 | + NET.REG_DOMAIN(JSON_VALUE(payload, '$._initiator')) AS initiator_etld |
| 67 | + FROM |
| 68 | + `httparchive.all.requests` |
| 69 | + WHERE |
| 70 | + NET.REG_DOMAIN(root_page) != NET.REG_DOMAIN(url) AND |
| 71 | + date = '2024-06-01') |
| 72 | + WHERE third_party != initiator_etld AND |
| 73 | + root_page != initiator_etld |
| 74 | + GROUP BY client, root_page, third_party, initiator_etld |
| 75 | +) |
| 76 | + |
| 77 | +SELECT client, next_elements_after_gtm, count(0) AS c FROM( |
| 78 | + SELECT |
| 79 | + client, |
| 80 | + result.mean_depth AS mean_depth_after_gtm, |
| 81 | + result.next_elements AS next_elements_after_gtm |
| 82 | + FROM ( |
| 83 | + SELECT |
| 84 | + root_page, |
| 85 | + client, |
| 86 | + findAllInitiators(root_page, ARRAY_AGG(STRUCT(root_page, third_party, initiator_etld))) AS all_initiators |
| 87 | + FROM data |
| 88 | + GROUP BY root_page, client), |
| 89 | + UNNEST([mean_depth_and_next_element_after_gtm(all_initiators)]) AS result |
| 90 | + WHERE result.mean_depth IS NOT NULL |
| 91 | + ORDER BY mean_depth_after_gtm) GROUP BY client, next_elements_after_gtm ORDER BY c; |
0 commit comments