dashboard of nationalrail train times
at main 8.2 kB view raw
1package main 2 3import ( 4 "encoding/json" 5 "fmt" 6 "os" 7 "regexp" 8 "strings" 9 "time" 10 11 "github.com/PuerkitoBio/goquery" 12) 13 14// ParseDeparturesFromHTML extracts departure information from the National Rail HTML 15func ParseDeparturesFromHTML(htmlContent string) ([]Departure, error) { 16 // Try JSON approach first (for new React-based site) 17 if departures, err := ParseDeparturesFromJSONHTML(htmlContent); err == nil && len(departures) > 0 { 18 return departures, nil 19 } 20 21 // Fallback to old HTML parsing approach 22 return parseFromAriaLabels(htmlContent) 23} 24 25func parseFromAriaLabels(htmlContent string) ([]Departure, error) { 26 doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent)) 27 if err != nil { 28 return nil, fmt.Errorf("failed to parse HTML: %w", err) 29 } 30 31 seenServices := make(map[string]bool) 32 var departures []Departure 33 34 // Find all anchor tags with aria-label containing service information 35 doc.Find("a[aria-label*='service for']").Each(func(i int, s *goquery.Selection) { 36 ariaLabel, exists := s.Attr("aria-label") 37 if !exists { 38 return 39 } 40 41 // Extract service ID from href if available 42 href, _ := s.Attr("href") 43 serviceID := extractServiceID(href) 44 45 // Skip if we've already processed this service 46 if seenServices[serviceID] { 47 return 48 } 49 seenServices[serviceID] = true 50 51 departure, err := ParseAriaLabel(ariaLabel) 52 if err != nil || departure == nil { 53 return 54 } 55 56 departure.ServiceID = serviceID 57 departures = append(departures, *departure) 58 }) 59 60 return departures, nil 61} 62 63// extractServiceID extracts the service ID from the href URL 64func extractServiceID(href string) string { 65 if strings.Contains(href, "sid=") { 66 parts := strings.Split(href, "sid=") 67 if len(parts) > 1 { 68 sidPart := strings.Split(parts[1], "&")[0] 69 return sidPart 70 } 71 } 72 return "" 73} 74 75// ParseDeparturesFromFile reads an HTML file and extracts departure information 76func ParseDeparturesFromFile(filename string) ([]Departure, error) { 77 content, err := os.ReadFile(filename) 78 if err != nil { 79 return nil, fmt.Errorf("failed to read file %s: %w", filename, err) 80 } 81 82 return ParseDeparturesFromHTML(string(content)) 83} 84 85// NextData represents the structure of the __NEXT_DATA__ JSON 86type NextData struct { 87 Props struct { 88 PageProps struct { 89 LiveTrainsState struct { 90 Queries []struct { 91 State struct { 92 Data struct { 93 Pages []struct { 94 Services []JSONService `json:"services"` 95 } `json:"pages"` 96 } `json:"data"` 97 } `json:"state"` 98 } `json:"queries"` 99 } `json:"liveTrainsState"` 100 } `json:"pageProps"` 101 } `json:"props"` 102} 103 104type JSONService struct { 105 RID string `json:"rid"` 106 TrainUID string `json:"trainUid"` 107 Origin []struct { 108 LocationName string `json:"locationName"` 109 CRS string `json:"crs"` 110 Via *string `json:"via"` 111 } `json:"origin"` 112 Destination []struct { 113 LocationName string `json:"locationName"` 114 CRS string `json:"crs"` 115 Via *string `json:"via"` 116 } `json:"destination"` 117 JourneyDetails struct { 118 From struct { 119 LocationName string `json:"locationName"` 120 CRS string `json:"crs"` 121 } `json:"from"` 122 To struct { 123 LocationName string `json:"locationName"` 124 CRS string `json:"crs"` 125 } `json:"to"` 126 Stops int `json:"stops"` 127 DepartureInfo struct { 128 Scheduled string `json:"scheduled"` 129 Estimated *string `json:"estimated"` 130 Actual *string `json:"actual"` 131 } `json:"departureInfo"` 132 ArrivalInfo struct { 133 Scheduled string `json:"scheduled"` 134 Estimated *string `json:"estimated"` 135 Actual *string `json:"actual"` 136 } `json:"arrivalInfo"` 137 } `json:"journeyDetails"` 138 Operator struct { 139 Name string `json:"name"` 140 Code string `json:"code"` 141 } `json:"operator"` 142 Status struct { 143 Status string `json:"status"` 144 DelayReason *string `json:"delayReason"` 145 CancelReason *string `json:"cancelReason"` 146 } `json:"status"` 147 Platform string `json:"platform"` 148} 149 150// ParseDeparturesFromJSONHTML extracts departure information from HTML containing __NEXT_DATA__ JSON 151func ParseDeparturesFromJSONHTML(htmlContent string) ([]Departure, error) { 152 doc, err := goquery.NewDocumentFromReader(strings.NewReader(htmlContent)) 153 if err != nil { 154 return nil, fmt.Errorf("failed to parse HTML: %w", err) 155 } 156 157 // Find the __NEXT_DATA__ script tag 158 var jsonContent string 159 doc.Find("script#__NEXT_DATA__").Each(func(i int, s *goquery.Selection) { 160 rawContent := s.Text() 161 jsonContent = cleanJSONString(rawContent) 162 }) 163 164 if jsonContent == "" { 165 return nil, fmt.Errorf("no __NEXT_DATA__ script tag found") 166 } 167 168 // Parse the JSON 169 var nextData NextData 170 err = json.Unmarshal([]byte(jsonContent), &nextData) 171 if err != nil { 172 return nil, fmt.Errorf("failed to parse JSON: %w", err) 173 } 174 175 // Extract services from the nested structure 176 var departures []Departure 177 if len(nextData.Props.PageProps.LiveTrainsState.Queries) > 0 && 178 len(nextData.Props.PageProps.LiveTrainsState.Queries[0].State.Data.Pages) > 0 { 179 180 services := nextData.Props.PageProps.LiveTrainsState.Queries[0].State.Data.Pages[0].Services 181 182 for _, service := range services { 183 departure := convertJSONServiceToDeparture(service) 184 departures = append(departures, departure) 185 } 186 } 187 188 return departures, nil 189} 190 191func convertJSONServiceToDeparture(service JSONService) Departure { 192 departure := Departure{ 193 ServiceID: service.RID, 194 Platform: service.Platform, 195 Stops: service.JourneyDetails.Stops, 196 Operator: service.Operator.Name, 197 } 198 199 // Extract destination 200 if len(service.Destination) > 0 { 201 departure.Destination = service.Destination[0].LocationName 202 if service.Destination[0].Via != nil && *service.Destination[0].Via != "" { 203 departure.Via = *service.Destination[0].Via 204 } 205 } 206 207 // Parse scheduled time 208 if scheduledTime, err := time.Parse(time.RFC3339, service.JourneyDetails.DepartureInfo.Scheduled); err == nil { 209 departure.ScheduledTime = scheduledTime.Format("15:04") 210 } 211 212 // Parse estimated/actual time and set status 213 if service.JourneyDetails.DepartureInfo.Actual != nil { 214 // Train has already departed 215 if actualTime, err := time.Parse(time.RFC3339, *service.JourneyDetails.DepartureInfo.Actual); err == nil { 216 departure.ExpectedTime = actualTime.Format("15:04") 217 departure.Status = "Departed " + departure.ExpectedTime 218 } 219 } else if service.JourneyDetails.DepartureInfo.Estimated != nil { 220 // Train has estimated departure time 221 if estimatedTime, err := time.Parse(time.RFC3339, *service.JourneyDetails.DepartureInfo.Estimated); err == nil { 222 departure.ExpectedTime = estimatedTime.Format("15:04") 223 if departure.ExpectedTime == departure.ScheduledTime { 224 departure.Status = "On time" 225 } else { 226 departure.Status = "Expected " + departure.ExpectedTime 227 } 228 } 229 } else { 230 // Use service status 231 switch service.Status.Status { 232 case "OnTime": 233 departure.Status = "On time" 234 case "Late": 235 departure.Status = "Late" 236 case "Cancelled": 237 departure.Status = "Cancelled" 238 default: 239 departure.Status = service.Status.Status 240 } 241 } 242 243 // Calculate duration (rough estimate) 244 if scheduledDep, err1 := time.Parse(time.RFC3339, service.JourneyDetails.DepartureInfo.Scheduled); err1 == nil { 245 if scheduledArr, err2 := time.Parse(time.RFC3339, service.JourneyDetails.ArrivalInfo.Scheduled); err2 == nil { 246 duration := scheduledArr.Sub(scheduledDep) 247 departure.Duration = fmt.Sprintf("%d minutes", int(duration.Minutes())) 248 } 249 } 250 251 // Add delay reason if available 252 if service.Status.DelayReason != nil && *service.Status.DelayReason != "" { 253 departure.DelayReason = *service.Status.DelayReason 254 } 255 256 return departure 257} 258 259// cleanJSONString fixes common JSON issues from HTML extraction 260func cleanJSONString(s string) string { 261 s = strings.TrimSpace(s) 262 263 // Fix newlines within string literals 264 // This is a simplified approach - we replace newlines within quoted strings 265 re := regexp.MustCompile(`"([^"]*\n[^"]*)"`) 266 s = re.ReplaceAllStringFunc(s, func(match string) string { 267 // Remove quotes, replace newlines with spaces, add quotes back 268 inner := match[1 : len(match)-1] // Remove surrounding quotes 269 inner = regexp.MustCompile(`\s*\n\s*`).ReplaceAllString(inner, " ") 270 return `"` + inner + `"` 271 }) 272 273 return s 274}