a tool for shared writing and social publishing
1/**
2 * Tinybird Definitions
3 *
4 * Datasource matching the Vercel Web Analytics drain schema,
5 * endpoint pipes for publication analytics, and typed client.
6 *
7 * Column names use camelCase to match the JSON keys sent by
8 * Vercel's analytics drain (NDJSON format).
9 */
10
11import {
12 defineDatasource,
13 defineEndpoint,
14 Tinybird,
15 node,
16 t,
17 p,
18 engine,
19 type InferRow,
20 type InferParams,
21 type InferOutputRow,
22 TokenDefinition,
23} from "@tinybirdco/sdk";
24
25const PROD_READ_TOKEN = { name: "prod_read_token_v1", scopes: ["READ"] };
26
27// ============================================================================
28// Datasources
29// ============================================================================
30
31/**
32 * Vercel Web Analytics drain events.
33 * Column names match the Vercel drain JSON keys exactly.
34 * `timestamp` is stored as UInt64 (Unix millis) as sent by Vercel.
35 */
36export const analyticsEvents = defineDatasource("analytics_events", {
37 description: "Vercel Web Analytics drain events",
38 schema: {
39 timestamp: t.uint64(),
40 eventType: t.string().lowCardinality(),
41 eventName: t.string().default(""),
42 eventData: t.string().default(""),
43 sessionId: t.uint64(),
44 deviceId: t.uint64(),
45 origin: t.string(),
46 path: t.string(),
47 referrer: t.string().default(""),
48 queryParams: t.string().default(""),
49 route: t.string().default(""),
50 country: t.string().lowCardinality().default(""),
51 region: t.string().default(""),
52 city: t.string().default(""),
53 osName: t.string().lowCardinality().default(""),
54 osVersion: t.string().default(""),
55 clientName: t.string().lowCardinality().default(""),
56 clientType: t.string().lowCardinality().default(""),
57 clientVersion: t.string().default(""),
58 deviceType: t.string().lowCardinality().default(""),
59 deviceBrand: t.string().default(""),
60 deviceModel: t.string().default(""),
61 browserEngine: t.string().default(""),
62 browserEngineVersion: t.string().default(""),
63 sdkVersion: t.string().default(""),
64 sdkName: t.string().default(""),
65 sdkVersionFull: t.string().default(""),
66 vercelEnvironment: t.string().lowCardinality().default(""),
67 vercelUrl: t.string().default(""),
68 flags: t.string().default(""),
69 deployment: t.string().default(""),
70 schema: t.string().default(""),
71 projectId: t.string().default(""),
72 ownerId: t.string().default(""),
73 dataSourceName: t.string().default(""),
74 },
75 engine: engine.mergeTree({
76 sortingKey: ["origin", "timestamp"],
77 partitionKey: "toYYYYMM(fromUnixTimestamp64Milli(timestamp))",
78 }),
79});
80
81export type AnalyticsEventsRow = InferRow<typeof analyticsEvents>;
82
83// ============================================================================
84// Endpoints
85// ============================================================================
86
87/**
88 * publication_traffic – daily pageview time series for a publication domain.
89 */
90export const publicationTraffic = defineEndpoint("publication_traffic", {
91 description: "Daily pageview time series for a publication domain",
92 params: {
93 domains: p.string(),
94 date_from: p.string().optional(),
95 date_to: p.string().optional(),
96 path: p.string().optional(),
97 },
98 tokens: [PROD_READ_TOKEN],
99 nodes: [
100 node({
101 name: "endpoint",
102 sql: `
103 SELECT
104 toDate(fromUnixTimestamp64Milli(timestamp)) AS day,
105 count() AS pageviews,
106 uniq(deviceId) AS visitors
107 FROM analytics_events
108 WHERE eventType = 'pageview'
109 AND domain(origin) IN splitByChar(',', {{String(domains)}})
110 {% if defined(date_from) %}
111 AND fromUnixTimestamp64Milli(timestamp) >= parseDateTimeBestEffort({{String(date_from)}})
112 {% end %}
113 {% if defined(date_to) %}
114 AND fromUnixTimestamp64Milli(timestamp) <= parseDateTimeBestEffort({{String(date_to)}})
115 {% end %}
116 {% if defined(path) %}
117 AND path = {{String(path)}}
118 {% end %}
119 GROUP BY day
120 ORDER BY day ASC
121 `,
122 }),
123 ],
124 output: {
125 day: t.date(),
126 pageviews: t.uint64(),
127 visitors: t.uint64(),
128 },
129});
130
131export type PublicationTrafficParams = InferParams<typeof publicationTraffic>;
132export type PublicationTrafficOutput = InferOutputRow<
133 typeof publicationTraffic
134>;
135
136/**
137 * publication_top_referrers – top referring domains for a publication.
138 */
139export const publicationTopReferrers = defineEndpoint(
140 "publication_top_referrers",
141 {
142 tokens: [PROD_READ_TOKEN],
143 description: "Top referrers for a publication domain",
144 params: {
145 domains: p.string(),
146 date_from: p.string().optional(),
147 date_to: p.string().optional(),
148 path: p.string().optional(),
149 limit: p.int32().optional(10),
150 },
151 nodes: [
152 node({
153 name: "endpoint",
154 sql: `
155 SELECT
156 domain(referrer) AS referrer_host,
157 count() AS pageviews
158 FROM analytics_events
159 WHERE eventType = 'pageview'
160 AND domain(origin) IN splitByChar(',', {{String(domains)}})
161 AND referrer != ''
162 AND domain(referrer) NOT IN splitByChar(',', {{String(domains)}})
163 {% if defined(date_from) %}
164 AND fromUnixTimestamp64Milli(timestamp) >= parseDateTimeBestEffort({{String(date_from)}})
165 {% end %}
166 {% if defined(date_to) %}
167 AND fromUnixTimestamp64Milli(timestamp) <= parseDateTimeBestEffort({{String(date_to)}})
168 {% end %}
169 {% if defined(path) %}
170 AND path = {{String(path)}}
171 {% end %}
172 GROUP BY referrer_host
173 ORDER BY pageviews DESC
174 LIMIT {{Int32(limit, 10)}}
175 `,
176 }),
177 ],
178 output: {
179 referrer_host: t.string(),
180 pageviews: t.uint64(),
181 },
182 },
183);
184
185export type PublicationTopReferrersParams = InferParams<
186 typeof publicationTopReferrers
187>;
188export type PublicationTopReferrersOutput = InferOutputRow<
189 typeof publicationTopReferrers
190>;
191
192/**
193 * publication_top_pages – top pages by pageviews for a publication.
194 */
195export const publicationTopPages = defineEndpoint("publication_top_pages", {
196 description: "Top pages for a publication domain",
197 tokens: [PROD_READ_TOKEN],
198 params: {
199 domains: p.string(),
200 date_from: p.string().optional(),
201 date_to: p.string().optional(),
202 limit: p.int32().optional(10),
203 },
204 nodes: [
205 node({
206 name: "endpoint",
207 sql: `
208 SELECT
209 path,
210 count() AS pageviews
211 FROM analytics_events
212 WHERE eventType = 'pageview'
213 AND domain(origin) IN splitByChar(',', {{String(domains)}})
214 {% if defined(date_from) %}
215 AND fromUnixTimestamp64Milli(timestamp) >= parseDateTimeBestEffort({{String(date_from)}})
216 {% end %}
217 {% if defined(date_to) %}
218 AND fromUnixTimestamp64Milli(timestamp) <= parseDateTimeBestEffort({{String(date_to)}})
219 {% end %}
220 GROUP BY path
221 ORDER BY pageviews DESC
222 LIMIT {{Int32(limit, 10)}}
223 `,
224 }),
225 ],
226 output: {
227 path: t.string(),
228 pageviews: t.uint64(),
229 },
230});
231
232export type PublicationTopPagesParams = InferParams<typeof publicationTopPages>;
233export type PublicationTopPagesOutput = InferOutputRow<
234 typeof publicationTopPages
235>;
236
237// ============================================================================
238// Client
239// ============================================================================
240
241export const tinybird = new Tinybird({
242 datasources: { analyticsEvents },
243 pipes: { publicationTraffic, publicationTopReferrers, publicationTopPages },
244 devMode: false,
245});