fork of indigo with slightly nicer lexgen
at main 1.2 kB view raw
1package search 2 3import ( 4 "net/url" 5 6 "github.com/PuerkitoBio/purell" 7) 8 9var trackingParams = []string{ 10 "__s", 11 "_ga", 12 "campaign_id", 13 "ceid", 14 "emci", 15 "emdi", 16 "fbclid", 17 "gclid", 18 "hootPostID", 19 "mc_eid", 20 "mkclid", 21 "mkt_tok", 22 "msclkid", 23 "pk_campaign", 24 "pk_kwd", 25 "sessionid", 26 "sourceid", 27 "utm_campaign", 28 "utm_content", 29 "utm_id", 30 "utm_medium", 31 "utm_source", 32 "utm_term", 33 "xpid", 34} 35 36// aggressively normalizes URL, for search indexing and matching. it is possible the URL won't be directly functional after this normalization 37func NormalizeLossyURL(raw string) string { 38 clean, err := purell.NormalizeURLString(raw, purell.FlagsUsuallySafeGreedy|purell.FlagRemoveDirectoryIndex|purell.FlagRemoveFragment|purell.FlagRemoveDuplicateSlashes|purell.FlagRemoveWWW|purell.FlagSortQuery) 39 if err != nil { 40 return raw 41 } 42 43 // remove tracking params 44 u, err := url.Parse(clean) 45 if err != nil { 46 return clean 47 } 48 if u.RawQuery == "" { 49 return clean 50 } 51 params := u.Query() 52 53 // there is probably a more efficient way to do this 54 for _, p := range trackingParams { 55 if params.Has(p) { 56 params.Del(p) 57 } 58 } 59 u.RawQuery = params.Encode() 60 return u.String() 61}