+36
config.go
+36
config.go
···
1
+
package main
2
+
3
+
import (
4
+
"fmt"
5
+
"os"
6
+
7
+
"gopkg.in/yaml.v3"
8
+
)
9
+
10
+
type Config struct {
11
+
Server struct {
12
+
Port int `yaml:"port"`
13
+
RefreshInterval int `yaml:"refresh_interval"`
14
+
} `yaml:"server"`
15
+
Stations []Station `yaml:"stations"`
16
+
}
17
+
18
+
type Station struct {
19
+
Name string `yaml:"name"`
20
+
Code string `yaml:"code"`
21
+
}
22
+
23
+
func LoadConfig(filename string) (*Config, error) {
24
+
data, err := os.ReadFile(filename)
25
+
if err != nil {
26
+
return nil, fmt.Errorf("failed to read config file: %w", err)
27
+
}
28
+
29
+
var config Config
30
+
err = yaml.Unmarshal(data, &config)
31
+
if err != nil {
32
+
return nil, fmt.Errorf("failed to parse config file: %w", err)
33
+
}
34
+
35
+
return &config, nil
36
+
}
+9
config.yaml
+9
config.yaml
+60
departure.go
+60
departure.go
···
1
+
package main
2
+
3
+
import (
4
+
"regexp"
5
+
"strconv"
6
+
"strings"
7
+
)
8
+
9
+
type Departure struct {
10
+
ScheduledTime string `json:"scheduled_time"`
11
+
ExpectedTime string `json:"expected_time,omitempty"`
12
+
Status string `json:"status"` // "On time", "Expected", "Delayed", etc.
13
+
Destination string `json:"destination"`
14
+
Via string `json:"via,omitempty"`
15
+
Platform string `json:"platform"`
16
+
Duration string `json:"duration"`
17
+
Stops int `json:"stops"`
18
+
Operator string `json:"operator"`
19
+
DelayReason string `json:"delay_reason,omitempty"`
20
+
ServiceID string `json:"service_id,omitempty"`
21
+
}
22
+
23
+
// ParseAriaLabel extracts departure information from the aria-label attribute
24
+
func ParseAriaLabel(ariaLabel string) (*Departure, error) {
25
+
departure := &Departure{}
26
+
27
+
// Handle delayed trains first
28
+
delayPattern := regexp.MustCompile(`^(.*?), (\d{2}:\d{2}), Expected (\d{2}:\d{2}), service for ([^,]+)(?:, via ([^,]+))?, calling at [^,]+, from platform (\d+), duration (\d+) minutes, (\d+) stops, operated by (.+)$`)
29
+
if matches := delayPattern.FindStringSubmatch(ariaLabel); matches != nil {
30
+
departure.DelayReason = matches[1]
31
+
departure.ScheduledTime = matches[2]
32
+
departure.ExpectedTime = matches[3]
33
+
departure.Status = "Expected " + matches[3]
34
+
departure.Destination = strings.TrimSpace(matches[4])
35
+
departure.Via = strings.TrimSpace(matches[5])
36
+
departure.Platform = matches[6]
37
+
departure.Duration = matches[7] + " minutes"
38
+
stops, _ := strconv.Atoi(matches[8])
39
+
departure.Stops = stops
40
+
departure.Operator = strings.TrimSpace(matches[9])
41
+
return departure, nil
42
+
}
43
+
44
+
// Handle normal trains
45
+
normalPattern := regexp.MustCompile(`^(\d{2}:\d{2}), (On time|Cancelled), service for ([^,]+)(?:, via ([^,]+))?, calling at [^,]+, from platform (\d+), duration (\d+) minutes, (\d+) stops, operated by (.+)$`)
46
+
if matches := normalPattern.FindStringSubmatch(ariaLabel); matches != nil {
47
+
departure.ScheduledTime = matches[1]
48
+
departure.Status = matches[2]
49
+
departure.Destination = strings.TrimSpace(matches[3])
50
+
departure.Via = strings.TrimSpace(matches[4])
51
+
departure.Platform = matches[5]
52
+
departure.Duration = matches[6] + " minutes"
53
+
stops, _ := strconv.Atoi(matches[7])
54
+
departure.Stops = stops
55
+
departure.Operator = strings.TrimSpace(matches[8])
56
+
return departure, nil
57
+
}
58
+
59
+
return nil, nil // Unable to parse
60
+
}
+79
fetcher.go
+79
fetcher.go
···
1
+
package main
2
+
3
+
import (
4
+
"fmt"
5
+
"io"
6
+
"net/http"
7
+
"time"
8
+
)
9
+
10
+
const BaseURL = "https://www.nationalrail.co.uk/live-trains/departures/"
11
+
12
+
// FetchStationDepartures fetches the HTML content for a station's departures
13
+
func FetchStationDepartures(stationCode string) (string, error) {
14
+
client := &http.Client{
15
+
Timeout: 30 * time.Second,
16
+
}
17
+
18
+
url := BaseURL + stationCode + "/"
19
+
req, err := http.NewRequest("GET", url, nil)
20
+
if err != nil {
21
+
return "", fmt.Errorf("failed to create request: %w", err)
22
+
}
23
+
24
+
// Add headers to mimic a browser request
25
+
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
26
+
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8")
27
+
28
+
resp, err := client.Do(req)
29
+
if err != nil {
30
+
return "", fmt.Errorf("failed to fetch data: %w", err)
31
+
}
32
+
defer resp.Body.Close()
33
+
34
+
if resp.StatusCode != http.StatusOK {
35
+
return "", fmt.Errorf("unexpected status code: %d", resp.StatusCode)
36
+
}
37
+
38
+
body, err := io.ReadAll(resp.Body)
39
+
if err != nil {
40
+
return "", fmt.Errorf("failed to read response body: %w", err)
41
+
}
42
+
43
+
return string(body), nil
44
+
}
45
+
46
+
type StationData struct {
47
+
Station Station `json:"station"`
48
+
Departures []Departure `json:"departures"`
49
+
LastUpdate time.Time `json:"last_update"`
50
+
Error string `json:"error,omitempty"`
51
+
}
52
+
53
+
// FetchAllStationsData fetches departure data for all configured stations
54
+
func FetchAllStationsData(stations []Station) []StationData {
55
+
results := make([]StationData, len(stations))
56
+
57
+
for i, station := range stations {
58
+
results[i] = StationData{
59
+
Station: station,
60
+
LastUpdate: time.Now(),
61
+
}
62
+
63
+
htmlContent, err := FetchStationDepartures(station.Code)
64
+
if err != nil {
65
+
results[i].Error = err.Error()
66
+
continue
67
+
}
68
+
69
+
departures, err := ParseDeparturesFromHTML(htmlContent)
70
+
if err != nil {
71
+
results[i].Error = err.Error()
72
+
continue
73
+
}
74
+
75
+
results[i].Departures = departures
76
+
}
77
+
78
+
return results
79
+
}
+202
json_parser.go
+202
json_parser.go
···
1
+
package main
2
+
3
+
import (
4
+
"encoding/json"
5
+
"fmt"
6
+
"regexp"
7
+
"strings"
8
+
"time"
9
+
10
+
"github.com/PuerkitoBio/goquery"
11
+
)
12
+
13
+
// NextData represents the structure of the __NEXT_DATA__ JSON
14
+
type NextData struct {
15
+
Props struct {
16
+
PageProps struct {
17
+
LiveTrainsState struct {
18
+
Queries []struct {
19
+
State struct {
20
+
Data struct {
21
+
Pages []struct {
22
+
Services []JSONService `json:"services"`
23
+
} `json:"pages"`
24
+
} `json:"data"`
25
+
} `json:"state"`
26
+
} `json:"queries"`
27
+
} `json:"liveTrainsState"`
28
+
} `json:"pageProps"`
29
+
} `json:"props"`
30
+
}
31
+
32
+
type JSONService struct {
33
+
RID string `json:"rid"`
34
+
TrainUID string `json:"trainUid"`
35
+
Origin []struct {
36
+
LocationName string `json:"locationName"`
37
+
CRS string `json:"crs"`
38
+
Via *string `json:"via"`
39
+
} `json:"origin"`
40
+
Destination []struct {
41
+
LocationName string `json:"locationName"`
42
+
CRS string `json:"crs"`
43
+
Via *string `json:"via"`
44
+
} `json:"destination"`
45
+
JourneyDetails struct {
46
+
From struct {
47
+
LocationName string `json:"locationName"`
48
+
CRS string `json:"crs"`
49
+
} `json:"from"`
50
+
To struct {
51
+
LocationName string `json:"locationName"`
52
+
CRS string `json:"crs"`
53
+
} `json:"to"`
54
+
Stops int `json:"stops"`
55
+
DepartureInfo struct {
56
+
Scheduled string `json:"scheduled"`
57
+
Estimated *string `json:"estimated"`
58
+
Actual *string `json:"actual"`
59
+
} `json:"departureInfo"`
60
+
ArrivalInfo struct {
61
+
Scheduled string `json:"scheduled"`
62
+
Estimated *string `json:"estimated"`
63
+
Actual *string `json:"actual"`
64
+
} `json:"arrivalInfo"`
65
+
} `json:"journeyDetails"`
66
+
Operator struct {
67
+
Name string `json:"name"`
68
+
Code string `json:"code"`
69
+
} `json:"operator"`
70
+
Status struct {
71
+
Status string `json:"status"`
72
+
DelayReason *string `json:"delayReason"`
73
+
CancelReason *string `json:"cancelReason"`
74
+
} `json:"status"`
75
+
Platform string `json:"platform"`
76
+
}
77
+
78
+
// ParseDeparturesFromJSONHTML extracts departure information from HTML containing __NEXT_DATA__ JSON
79
+
func ParseDeparturesFromJSONHTML(htmlContent string) ([]Departure, error) {
80
+
doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
81
+
if err != nil {
82
+
return nil, fmt.Errorf("failed to parse HTML: %w", err)
83
+
}
84
+
85
+
// Find the __NEXT_DATA__ script tag
86
+
var jsonContent string
87
+
doc.Find("script#__NEXT_DATA__").Each(func(i int, s *goquery.Selection) {
88
+
rawContent := s.Text()
89
+
jsonContent = cleanJSONString(rawContent)
90
+
})
91
+
92
+
if jsonContent == "" {
93
+
return nil, fmt.Errorf("no __NEXT_DATA__ script tag found")
94
+
}
95
+
96
+
// Parse the JSON
97
+
var nextData NextData
98
+
err = json.Unmarshal([]byte(jsonContent), &nextData)
99
+
if err != nil {
100
+
return nil, fmt.Errorf("failed to parse JSON: %w", err)
101
+
}
102
+
103
+
// Extract services from the nested structure
104
+
var departures []Departure
105
+
if len(nextData.Props.PageProps.LiveTrainsState.Queries) > 0 &&
106
+
len(nextData.Props.PageProps.LiveTrainsState.Queries[0].State.Data.Pages) > 0 {
107
+
108
+
services := nextData.Props.PageProps.LiveTrainsState.Queries[0].State.Data.Pages[0].Services
109
+
110
+
for _, service := range services {
111
+
departure := convertJSONServiceToDeparture(service)
112
+
departures = append(departures, departure)
113
+
}
114
+
}
115
+
116
+
return departures, nil
117
+
}
118
+
119
+
func convertJSONServiceToDeparture(service JSONService) Departure {
120
+
departure := Departure{
121
+
ServiceID: service.RID,
122
+
Platform: service.Platform,
123
+
Stops: service.JourneyDetails.Stops,
124
+
Operator: service.Operator.Name,
125
+
}
126
+
127
+
// Extract destination
128
+
if len(service.Destination) > 0 {
129
+
departure.Destination = service.Destination[0].LocationName
130
+
if service.Destination[0].Via != nil && *service.Destination[0].Via != "" {
131
+
departure.Via = *service.Destination[0].Via
132
+
}
133
+
}
134
+
135
+
// Parse scheduled time
136
+
if scheduledTime, err := time.Parse(time.RFC3339, service.JourneyDetails.DepartureInfo.Scheduled); err == nil {
137
+
departure.ScheduledTime = scheduledTime.Format("15:04")
138
+
}
139
+
140
+
// Parse estimated/actual time and set status
141
+
if service.JourneyDetails.DepartureInfo.Actual != nil {
142
+
// Train has already departed
143
+
if actualTime, err := time.Parse(time.RFC3339, *service.JourneyDetails.DepartureInfo.Actual); err == nil {
144
+
departure.ExpectedTime = actualTime.Format("15:04")
145
+
departure.Status = "Departed " + departure.ExpectedTime
146
+
}
147
+
} else if service.JourneyDetails.DepartureInfo.Estimated != nil {
148
+
// Train has estimated departure time
149
+
if estimatedTime, err := time.Parse(time.RFC3339, *service.JourneyDetails.DepartureInfo.Estimated); err == nil {
150
+
departure.ExpectedTime = estimatedTime.Format("15:04")
151
+
if departure.ExpectedTime == departure.ScheduledTime {
152
+
departure.Status = "On time"
153
+
} else {
154
+
departure.Status = "Expected " + departure.ExpectedTime
155
+
}
156
+
}
157
+
} else {
158
+
// Use service status
159
+
switch service.Status.Status {
160
+
case "OnTime":
161
+
departure.Status = "On time"
162
+
case "Late":
163
+
departure.Status = "Late"
164
+
case "Cancelled":
165
+
departure.Status = "Cancelled"
166
+
default:
167
+
departure.Status = service.Status.Status
168
+
}
169
+
}
170
+
171
+
// Calculate duration (rough estimate)
172
+
if scheduledDep, err1 := time.Parse(time.RFC3339, service.JourneyDetails.DepartureInfo.Scheduled); err1 == nil {
173
+
if scheduledArr, err2 := time.Parse(time.RFC3339, service.JourneyDetails.ArrivalInfo.Scheduled); err2 == nil {
174
+
duration := scheduledArr.Sub(scheduledDep)
175
+
departure.Duration = fmt.Sprintf("%d minutes", int(duration.Minutes()))
176
+
}
177
+
}
178
+
179
+
// Add delay reason if available
180
+
if service.Status.DelayReason != nil && *service.Status.DelayReason != "" {
181
+
departure.DelayReason = *service.Status.DelayReason
182
+
}
183
+
184
+
return departure
185
+
}
186
+
187
+
// cleanJSONString fixes common JSON issues from HTML extraction
188
+
func cleanJSONString(s string) string {
189
+
s = strings.TrimSpace(s)
190
+
191
+
// Fix newlines within string literals
192
+
// This is a simplified approach - we replace newlines within quoted strings
193
+
re := regexp.MustCompile(`"([^"]*\n[^"]*)"`)
194
+
s = re.ReplaceAllStringFunc(s, func(match string) string {
195
+
// Remove quotes, replace newlines with spaces, add quotes back
196
+
inner := match[1 : len(match)-1] // Remove surrounding quotes
197
+
inner = regexp.MustCompile(`\s*\n\s*`).ReplaceAllString(inner, " ")
198
+
return `"` + inner + `"`
199
+
})
200
+
201
+
return s
202
+
}
+72
main.go
+72
main.go
···
1
+
package main
2
+
3
+
import (
4
+
"encoding/json"
5
+
"fmt"
6
+
"log"
7
+
"os"
8
+
)
9
+
10
+
func main() {
11
+
if len(os.Args) < 2 {
12
+
fmt.Println("Sundial - Live Train Departures")
13
+
fmt.Println("Usage:")
14
+
fmt.Println(" go run . server - Start web server")
15
+
fmt.Println(" go run . cli - Start interactive CLI")
16
+
fmt.Println(" go run . parse <html_file> - Parse HTML file")
17
+
fmt.Println("Examples:")
18
+
fmt.Println(" go run . server")
19
+
fmt.Println(" go run . cli")
20
+
fmt.Println(" go run . parse sample.html")
21
+
os.Exit(1)
22
+
}
23
+
24
+
command := os.Args[1]
25
+
26
+
switch command {
27
+
case "server":
28
+
runServer()
29
+
case "cli":
30
+
err := runCLI()
31
+
if err != nil {
32
+
log.Fatalf("CLI error: %v", err)
33
+
}
34
+
case "parse":
35
+
if len(os.Args) < 3 {
36
+
fmt.Println("Usage: go run . parse <html_file>")
37
+
os.Exit(1)
38
+
}
39
+
parseFile(os.Args[2])
40
+
default:
41
+
// Backward compatibility - treat first arg as filename
42
+
parseFile(command)
43
+
}
44
+
}
45
+
46
+
func runServer() {
47
+
config, err := LoadConfig("config.yaml")
48
+
if err != nil {
49
+
log.Fatalf("Error loading config: %v", err)
50
+
}
51
+
52
+
server := NewServer(config)
53
+
log.Fatal(server.Start())
54
+
}
55
+
56
+
func parseFile(filename string) {
57
+
departures, err := ParseDeparturesFromFile(filename)
58
+
if err != nil {
59
+
log.Fatalf("Error parsing departures: %v", err)
60
+
}
61
+
62
+
fmt.Printf("Found %d departures:\n\n", len(departures))
63
+
64
+
// Print as formatted JSON
65
+
jsonData, err := json.MarshalIndent(departures, "", " ")
66
+
if err != nil {
67
+
log.Fatalf("Error marshaling to JSON: %v", err)
68
+
}
69
+
70
+
fmt.Println(string(jsonData))
71
+
}
72
+
+80
parser.go
+80
parser.go
···
1
+
package main
2
+
3
+
import (
4
+
"fmt"
5
+
"os"
6
+
"strings"
7
+
8
+
"github.com/PuerkitoBio/goquery"
9
+
)
10
+
11
+
// ParseDeparturesFromHTML extracts departure information from the National Rail HTML
12
+
func ParseDeparturesFromHTML(htmlContent string) ([]Departure, error) {
13
+
// Try JSON approach first (for new React-based site)
14
+
if departures, err := ParseDeparturesFromJSONHTML(htmlContent); err == nil && len(departures) > 0 {
15
+
return departures, nil
16
+
}
17
+
18
+
// Fallback to old HTML parsing approach
19
+
return parseFromAriaLabels(htmlContent)
20
+
}
21
+
22
+
func parseFromAriaLabels(htmlContent string) ([]Departure, error) {
23
+
doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent))
24
+
if err != nil {
25
+
return nil, fmt.Errorf("failed to parse HTML: %w", err)
26
+
}
27
+
28
+
seenServices := make(map[string]bool)
29
+
var departures []Departure
30
+
31
+
// Find all anchor tags with aria-label containing service information
32
+
doc.Find("a[aria-label*='service for']").Each(func(i int, s *goquery.Selection) {
33
+
ariaLabel, exists := s.Attr("aria-label")
34
+
if !exists {
35
+
return
36
+
}
37
+
38
+
// Extract service ID from href if available
39
+
href, _ := s.Attr("href")
40
+
serviceID := extractServiceID(href)
41
+
42
+
// Skip if we've already processed this service
43
+
if seenServices[serviceID] {
44
+
return
45
+
}
46
+
seenServices[serviceID] = true
47
+
48
+
departure, err := ParseAriaLabel(ariaLabel)
49
+
if err != nil || departure == nil {
50
+
return
51
+
}
52
+
53
+
departure.ServiceID = serviceID
54
+
departures = append(departures, *departure)
55
+
})
56
+
57
+
return departures, nil
58
+
}
59
+
60
+
// extractServiceID extracts the service ID from the href URL
61
+
func extractServiceID(href string) string {
62
+
if strings.Contains(href, "sid=") {
63
+
parts := strings.Split(href, "sid=")
64
+
if len(parts) > 1 {
65
+
sidPart := strings.Split(parts[1], "&")[0]
66
+
return sidPart
67
+
}
68
+
}
69
+
return ""
70
+
}
71
+
72
+
// ParseDeparturesFromFile reads an HTML file and extracts departure information
73
+
func ParseDeparturesFromFile(filename string) ([]Departure, error) {
74
+
content, err := os.ReadFile(filename)
75
+
if err != nil {
76
+
return nil, fmt.Errorf("failed to read file %s: %w", filename, err)
77
+
}
78
+
79
+
return ParseDeparturesFromHTML(string(content))
80
+
}