porting all github actions from bluesky-social/indigo to tangled CI

Move all regexp MustCompiles outside (#510)

Lots of CPU time spent compiling reused regexps in CPU profiles

authored by Jaz and committed by GitHub 71e21e5d 05273c62

Changed files
+16 -7
atproto
search
+3 -1
atproto/syntax/cid.go
··· 13 13 // Always use [ParseCID] instead of wrapping strings directly, especially when working with network input. 14 14 type CID string 15 15 16 + var cidRegex = regexp.MustCompile(`^[a-zA-Z0-9+=]{8,256}$`) 17 + 16 18 func ParseCID(raw string) (CID, error) { 17 19 if len(raw) > 256 { 18 20 return "", fmt.Errorf("CID is too long (256 chars max)") ··· 20 22 if len(raw) < 8 { 21 23 return "", fmt.Errorf("CID is too short (8 chars min)") 22 24 } 23 - var cidRegex = regexp.MustCompile(`^[a-zA-Z0-9+=]{8,256}$`) 25 + 24 26 if !cidRegex.MatchString(raw) { 25 27 return "", fmt.Errorf("CID syntax didn't validate via regex") 26 28 }
+5 -2
atproto/syntax/datetime.go
··· 21 21 // Syntax is specified at: https://atproto.com/specs/lexicon#datetime 22 22 type Datetime string 23 23 24 + var datetimeRegex = regexp.MustCompile(`^[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9](.[0-9]{1,20})?(Z|([+-][0-2][0-9]:[0-5][0-9]))$`) 25 + 24 26 func ParseDatetime(raw string) (Datetime, error) { 25 27 if len(raw) > 64 { 26 28 return "", fmt.Errorf("Datetime too long (max 64 chars)") 27 29 } 28 - var datetimeRegex = regexp.MustCompile(`^[0-9]{4}-[01][0-9]-[0-3][0-9]T[0-2][0-9]:[0-6][0-9]:[0-6][0-9](.[0-9]{1,20})?(Z|([+-][0-2][0-9]:[0-5][0-9]))$`) 30 + 29 31 if !datetimeRegex.MatchString(raw) { 30 32 return "", fmt.Errorf("Datetime syntax didn't validate via regex") 31 33 } ··· 53 55 // Similar to ParseDatetime, but more flexible about some parsing. 54 56 // 55 57 // Note that this may mutate the internal string, so a round-trip will fail. This is intended for working with legacy/broken records, not to be used in an ongoing way. 58 + var hasTimezoneRegex = regexp.MustCompile(`^.*(([+-]\d\d:?\d\d)|[a-zA-Z])$`) 59 + 56 60 func ParseDatetimeLenient(raw string) (Datetime, error) { 57 61 // fast path: it is a valid overall datetime 58 62 valid, err := ParseDatetime(raw) ··· 71 75 } 72 76 73 77 // try adding timezone if it is missing 74 - var hasTimezoneRegex = regexp.MustCompile(`^.*(([+-]\d\d:?\d\d)|[a-zA-Z])$`) 75 78 if !hasTimezoneRegex.MatchString(raw) { 76 79 withTZ, err := ParseDatetime(raw + "Z") 77 80 if nil == err {
+2 -1
atproto/syntax/did.go
··· 13 13 // Syntax specification: https://atproto.com/specs/did 14 14 type DID string 15 15 16 + var didRegex = regexp.MustCompile(`^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$`) 17 + 16 18 func ParseDID(raw string) (DID, error) { 17 19 if len(raw) > 2*1024 { 18 20 return "", fmt.Errorf("DID is too long (2048 chars max)") 19 21 } 20 - var didRegex = regexp.MustCompile(`^did:[a-z]+:[a-zA-Z0-9._:%-]*[a-zA-Z0-9._-]$`) 21 22 if !didRegex.MatchString(raw) { 22 23 return "", fmt.Errorf("DID syntax didn't validate via regex") 23 24 }
+2 -1
atproto/syntax/language.go
··· 12 12 // The syntax is BCP-47. This is a partial/naive parsing implementation, designed for fast validation and exact-string passthrough with no normaliztion. For actually working with BCP-47 language specifiers in atproto code bases, we recommend the golang.org/x/text/language package. 13 13 type Language string 14 14 15 + var langRegex = regexp.MustCompile(`^(i|[a-z]{2,3})(-[a-zA-Z0-9]+)*$`) 16 + 15 17 func ParseLanguage(raw string) (Language, error) { 16 18 if len(raw) > 128 { 17 19 return "", fmt.Errorf("Language is too long (128 chars max)") 18 20 } 19 - var langRegex = regexp.MustCompile(`^(i|[a-z]{2,3})(-[a-zA-Z0-9]+)*$`) 20 21 if !langRegex.MatchString(raw) { 21 22 return "", fmt.Errorf("Language syntax didn't validate via regex") 22 23 }
+2 -1
atproto/syntax/tid.go
··· 24 24 // Syntax specification: https://atproto.com/specs/record-key 25 25 type TID string 26 26 27 + var tidRegex = regexp.MustCompile(`^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$`) 28 + 27 29 func ParseTID(raw string) (TID, error) { 28 30 if len(raw) != 13 { 29 31 return "", fmt.Errorf("TID is wrong length (expected 13 chars)") 30 32 } 31 - var tidRegex = regexp.MustCompile(`^[234567abcdefghij][234567abcdefghijklmnopqrstuvwxyz]{12}$`) 32 33 if !tidRegex.MatchString(raw) { 33 34 return "", fmt.Errorf("TID syntax didn't validate via regex") 34 35 }
+2 -1
search/indexing.go
··· 48 48 return nil 49 49 } 50 50 51 + var tidRegex = regexp.MustCompile(`^[234567abcdefghijklmnopqrstuvwxyz]{13}$`) 52 + 51 53 func (s *Server) indexPost(ctx context.Context, ident *identity.Identity, rec *appbsky.FeedPost, path string, rcid cid.Cid) error { 52 54 ctx, span := tracer.Start(ctx, "indexPost") 53 55 defer span.End() ··· 56 58 log := s.logger.With("repo", ident.DID, "path", path, "op", "indexPost") 57 59 parts := strings.SplitN(path, "/", 3) 58 60 // TODO: replace with an atproto/syntax package type for TID 59 - var tidRegex = regexp.MustCompile(`^[234567abcdefghijklmnopqrstuvwxyz]{13}$`) 60 61 if len(parts) != 2 || !tidRegex.MatchString(parts[1]) { 61 62 log.Warn("skipping index post record with weird path/TID", "did", ident.DID, "path", path) 62 63 return nil