My agentic slop goes here. Not intended for anyone else!
0
fork

Configure Feed

Select the types of activity you want to include in your feed.

vendor river

+907
+52
stack/river/.github/workflows/ci.yml
··· 1 + name: CI 2 + 3 + on: 4 + push: 5 + branches: [ main ] 6 + pull_request: 7 + branches: [ main ] 8 + 9 + jobs: 10 + build-and-test: 11 + strategy: 12 + fail-fast: false 13 + 14 + matrix: 15 + os: 16 + - macos-latest 17 + - ubuntu-latest 18 + - windows-latest 19 + 20 + ocaml-compiler: 21 + - 4.12.x 22 + 23 + runs-on: ${{ matrix.os }} 24 + 25 + steps: 26 + 27 + - name: Checkout code 28 + uses: actions/checkout@v2 29 + 30 + - name: Use OCaml ${{ matrix.ocaml-compiler }} 31 + uses: ocaml/setup-ocaml@v2 32 + with: 33 + ocaml-compiler: ${{ matrix.ocaml-compiler }} 34 + dune-cache: ${{ matrix.os != 'macos-latest' }} 35 + 36 + - name: Install ocamlformat 37 + run: opam install ocamlformat.0.18.0 38 + if: ${{ matrix.os == 'ubuntu-latest' }} 39 + 40 + - name: Install opam packages 41 + run: opam install . --with-test 42 + 43 + - name: Check formatting 44 + run: make fmt 45 + if: ${{ matrix.os == 'ubuntu-latest' && always() }} 46 + 47 + - name: Run build 48 + run: make build 49 + 50 + - name: Run the unit tests 51 + run: make test 52 + timeout-minutes: 1
+9
stack/river/.gitignore
··· 1 + # Dune generated files 2 + _build/ 3 + *.install 4 + 5 + # Merlin configuring file for Vim and Emacs 6 + .merlin 7 + 8 + # Local OPAM switch 9 + _opam/
+4
stack/river/.ocamlformat
··· 1 + version = 0.20.1 2 + profile = conventional 3 + parse-docstrings = true 4 + wrap-comments = true
+29
stack/river/CHANGES.md
··· 1 + # 0.4 - 2024-11-08 2 + 3 + - Replace ocamlnet HTML parser with Lambda Soup (#15, @aantron) 4 + 5 + # 0.3 - 2023-11-21 6 + 7 + - Fall back to entry id if entry links doesn't exist (#11, @sabine) 8 + 9 + # 0.2 - 2022-04-14 10 + 11 + - Build with dune. 12 + - Make the types abstract and add accessor functions. 13 + - Support fetching meta description and SEO image from the posts links. 14 + 15 + # 0.1.3 - 2015-07-28 16 + 17 + - Make river compatible with the latest syndic API 18 + 19 + # 0.1.2 - 2015-03-24 20 + 21 + - Refactoring modules. 22 + 23 + # 0.1.1 - 2015-03-19 24 + 25 + - Upgrading version number. 26 + 27 + # 0.1 - 2015-03-15 28 + 29 + - Initial release
+13
stack/river/LICENSE
··· 1 + Copyright (c) 2015, KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 2 + 3 + Permission to use, copy, modify, and/or distribute this software for any 4 + purpose with or without fee is hereby granted, provided that the above 5 + copyright notice and this permission notice appear in all copies. 6 + 7 + THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 + WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 + MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 10 + ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 + WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 12 + ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 13 + OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+43
stack/river/Makefile
··· 1 + .DEFAULT_GOAL := all 2 + 3 + .PHONY: all 4 + all: 5 + opam exec -- dune build --root . @install 6 + 7 + .PHONY: deps 8 + deps: ## Install development dependencies 9 + opam install -y dune-release ocamlformat utop ocaml-lsp-server 10 + opam install --deps-only --with-test --with-doc -y . 11 + 12 + .PHONY: create_switch 13 + create_switch: ## Create an opam switch without any dependency 14 + opam switch create . --no-install -y 15 + 16 + .PHONY: switch 17 + switch: ## Create an opam switch and install development dependencies 18 + opam install . --deps-only --with-doc --with-test 19 + opam install -y dune-release ocamlformat utop ocaml-lsp-server 20 + 21 + .PHONY: build 22 + build: ## Build the project, including non installable libraries and executables 23 + opam exec -- dune build --root . 24 + 25 + .PHONY: test 26 + test: ## Run the unit tests 27 + opam exec -- dune runtest --root . 28 + 29 + .PHONY: clean 30 + clean: ## Clean build artifacts and other generated files 31 + opam exec -- dune clean --root . 32 + 33 + .PHONY: doc 34 + doc: ## Generate odoc documentation 35 + opam exec -- dune build --root . @doc 36 + 37 + .PHONY: fmt 38 + fmt: ## Format the codebase with ocamlformat 39 + opam exec -- dune build --root . --auto-promote @fmt 40 + 41 + .PHONY: watch 42 + watch: ## Watch for the filesystem and rebuild on every change 43 + opam exec -- dune build --root . --watch
+58
stack/river/README.md
··· 1 + # River 2 + 3 + [![Actions Status](https://github.com/kayceesrk/river/workflows/CI/badge.svg)](https://github.com/kayceesrk/river/actions) 4 + 5 + RSS2 and Atom feed aggregator for OCaml 6 + 7 + 8 + ## Features 9 + 10 + - Performs deduplication. 11 + - Supports pagination and generating well-formed html prefix snippets. 12 + - Support for generating aggregate feeds. 13 + - Sorts the posts from most recent to oldest. 14 + - Depends on Lambda Soup for html parsing. 15 + 16 + ## Installation 17 + 18 + ```bash 19 + opam install river 20 + ``` 21 + 22 + ## Usage 23 + 24 + Here's an example program that aggregates the feeds from different sources: 25 + 26 + ```ocaml 27 + let sources = 28 + River. 29 + [ 30 + { name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" }; 31 + { 32 + name = "Amir Chaudhry"; 33 + url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml"; 34 + }; 35 + ] 36 + 37 + let () = 38 + let feeds = List.map River.fetch sources in 39 + let posts = River.posts feeds in 40 + let entries = River.create_atom_entries posts in 41 + let feed = 42 + let authors = [ Syndic.Atom.author "OCaml Blog" ] in 43 + let id = Uri.of_string "https://ocaml.org/atom.xml" in 44 + let links = [ Syndic.Atom.link ~rel:Self id ] in 45 + let title : Syndic.Atom.text_construct = 46 + Text "OCaml Blog: Read the latest OCaml news from the community." 47 + in 48 + let updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get in 49 + Syndic.Atom.feed ~authors ~links ~id ~title ~updated entries 50 + in 51 + let out_channel = open_out "example/atom.xml" in 52 + Syndic.Atom.output feed (`Channel out_channel); 53 + close_out out_channel 54 + ``` 55 + 56 + ## Contributing 57 + 58 + Take a look at our [Contributing Guide](CONTRIBUTING.md).
+38
stack/river/dune-project
··· 1 + (lang dune 3.0) 2 + 3 + (name river) 4 + 5 + (documentation "https://kayceesrk.github.io/river/") 6 + 7 + (source 8 + (github kayceesrk/river)) 9 + 10 + (license MIT) 11 + 12 + (authors "KC Sivaramakrishnan <sk826@cl.cam.ac.uk>") 13 + 14 + (maintainers "KC Sivaramakrishnan <sk826@cl.cam.ac.uk>") 15 + 16 + (generate_opam_files true) 17 + 18 + (package 19 + (name river) 20 + (synopsis "RSS2 and Atom feed aggregator for OCaml") 21 + (description "RSS2 and Atom feed aggregator for OCaml") 22 + (depends 23 + (ocaml 24 + (>= 4.08.0)) 25 + dune 26 + (syndic 27 + (>= 1.5)) 28 + (cohttp 29 + (>= 5.0.0)) 30 + (cohttp-lwt 31 + (>= 5.0.0)) 32 + (cohttp-lwt-unix 33 + (>= 5.0.0)) 34 + ptime 35 + lwt 36 + ocamlnet 37 + lambdasoup 38 + (odoc :with-doc)))
+31
stack/river/example/aggregate_feeds.ml
··· 1 + let sources = 2 + River. 3 + [ 4 + { name = "KC Sivaramakrishnan"; url = "http://kcsrk.info/atom-ocaml.xml" }; 5 + { 6 + name = "Amir Chaudhry"; 7 + url = "http://amirchaudhry.com/tags/ocamllabs-atom.xml"; 8 + }; 9 + ] 10 + 11 + let main () = 12 + let feeds = List.map River.fetch sources in 13 + let posts = River.posts feeds in 14 + let entries = River.create_atom_entries posts in 15 + let feed = 16 + let authors = [ Syndic.Atom.author "OCaml Blog" ] in 17 + let id = Uri.of_string "https://ocaml.org/atom.xml" in 18 + let links = [ Syndic.Atom.link ~rel:Self id ] in 19 + let title : Syndic.Atom.text_construct = 20 + Text "OCaml Blog: Read the latest OCaml news from the community." 21 + in 22 + let updated = Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get in 23 + Syndic.Atom.feed ~authors ~links ~id ~title ~updated entries 24 + in 25 + let out_channel = open_out "example/atom.xml" in 26 + Syndic.Atom.output feed (`Channel out_channel); 27 + close_out out_channel 28 + 29 + let () = 30 + Printexc.record_backtrace true; 31 + main ()
+3
stack/river/example/dune
··· 1 + (executable 2 + (name aggregate_feeds) 3 + (libraries river))
+4
stack/river/lib/dune
··· 1 + (library 2 + (name river) 3 + (public_name river) 4 + (libraries cohttp cohttp-lwt cohttp-lwt-unix str syndic lambdasoup))
+40
stack/river/lib/feed.ml
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + type source = { name : string; url : string } 19 + type content = Atom of Syndic.Atom.feed | Rss2 of Syndic.Rss2.channel 20 + 21 + let string_of_feed = function Atom _ -> "Atom" | Rss2 _ -> "Rss2" 22 + 23 + type t = { name : string; title : string; url : string; content : content } 24 + 25 + let classify_feed ~xmlbase (xml : string) = 26 + try Atom (Syndic.Atom.parse ~xmlbase (Xmlm.make_input (`String (0, xml)))) 27 + with Syndic.Atom.Error.Error _ -> ( 28 + try Rss2 (Syndic.Rss2.parse ~xmlbase (Xmlm.make_input (`String (0, xml)))) 29 + with Syndic.Rss2.Error.Error _ -> failwith "Neither Atom nor RSS2 feed") 30 + 31 + let fetch (source : source) = 32 + let xmlbase = Uri.of_string @@ source.url in 33 + let response = Http.get source.url in 34 + let content = classify_feed ~xmlbase response in 35 + let title = 36 + match content with 37 + | Atom atom -> Util.string_of_text_construct atom.Syndic.Atom.title 38 + | Rss2 ch -> ch.Syndic.Rss2.title 39 + in 40 + { name = source.name; title; content; url = source.url }
+73
stack/river/lib/http.ml
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + (* Download urls and cache them — especially during development, it slows down 19 + the rendering to download over and over again the same URL. *) 20 + 21 + open Printf 22 + open Lwt 23 + open Cohttp 24 + open Cohttp.Response 25 + open Cohttp.Code 26 + 27 + exception Status_unhandled of string 28 + exception Timeout 29 + 30 + let max_num_redirects = 5 31 + 32 + let get_location_exn headers = 33 + match Header.get headers "location" with 34 + | Some x -> x 35 + | None -> raise @@ Status_unhandled "Location HTTP header not found" 36 + 37 + let rec get_uri uri = function 38 + | 0 -> raise (Status_unhandled "Too many redirects") 39 + | n -> 40 + let main = 41 + Cohttp_lwt_unix.Client.get uri >>= fun (resp, body) -> 42 + match resp.status with 43 + | `OK -> Cohttp_lwt.Body.to_string body 44 + | `Found | `See_other | `Moved_permanently | `Temporary_redirect 45 + | `Permanent_redirect -> ( 46 + let l = Uri.of_string @@ get_location_exn resp.headers in 47 + match Uri.host l with 48 + | Some _ -> get_uri l (n - 1) 49 + | None -> 50 + let host = Uri.host uri in 51 + let scheme = Uri.scheme uri in 52 + let new_uri = Uri.with_scheme (Uri.with_host l host) scheme in 53 + get_uri new_uri (n - 1)) 54 + | _ -> raise @@ Status_unhandled (string_of_status resp.status) 55 + in 56 + let timeout = 57 + Lwt_unix.sleep (float_of_int 3) >>= fun () -> Lwt.fail Timeout 58 + in 59 + Lwt.pick [ main; timeout ] 60 + 61 + let get url = 62 + eprintf "Downloading %s ... %!" url; 63 + try 64 + let data = Lwt_main.run @@ get_uri (Uri.of_string url) max_num_redirects in 65 + eprintf "done %!\n"; 66 + data 67 + with 68 + | (Status_unhandled s | Failure s) as e -> 69 + eprintf "Failed: %s\n" s; 70 + raise e 71 + | Timeout as e -> 72 + eprintf "Failed: Timeout\n"; 73 + raise e
+28
stack/river/lib/http.mli
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + exception Status_unhandled of string 19 + exception Timeout 20 + 21 + val get : string -> string 22 + (** [get uri] returns the body of the response of the HTTP GET request on [uri]. 23 + 24 + If the answer of is a redirection, it will follow the redirections up to 5 25 + redirects. 26 + 27 + The answer is cached for [cache_secs] seconds, where [cache_secs] is 3600 28 + seconds (1 hour) by default. *)
+80
stack/river/lib/meta.ml
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + (** This module determines an image to be used as preview of a website. 19 + 20 + It does this by following the same logic Google+ and other websites use, and 21 + described in this article: 22 + https://www.raymondcamden.com/2011/07/26/How-are-Facebook-and-Google-creating-link-previews *) 23 + 24 + let og_image html = 25 + let open Soup in 26 + let soup = parse html in 27 + try soup $ "meta[property=og:image]" |> R.attribute "content" |> Option.some 28 + with Failure _ -> None 29 + 30 + let image_src html = 31 + let open Soup in 32 + let soup = parse html in 33 + try soup $ "link[rel=\"image_src\"]" |> R.attribute "href" |> Option.some 34 + with Failure _ -> None 35 + 36 + let twitter_image html = 37 + let open Soup in 38 + let soup = parse html in 39 + try 40 + soup $ "meta[name=\"twitter:image\"]" |> R.attribute "content" 41 + |> Option.some 42 + with Failure _ -> None 43 + 44 + let og_description html = 45 + let open Soup in 46 + let soup = parse html in 47 + try 48 + soup $ "meta[property=og:description]" |> R.attribute "content" 49 + |> Option.some 50 + with Failure _ -> None 51 + 52 + let description html = 53 + let open Soup in 54 + let soup = parse html in 55 + try 56 + soup $ "meta[property=description]" |> R.attribute "content" |> Option.some 57 + with Failure _ -> None 58 + 59 + let preview_image html = 60 + let preview_image = 61 + match og_image html with 62 + | None -> ( 63 + match image_src html with 64 + | None -> twitter_image html 65 + | Some x -> Some x) 66 + | Some x -> Some x 67 + in 68 + match Option.map String.trim preview_image with 69 + | Some "" -> None 70 + | Some x -> Some x 71 + | None -> None 72 + 73 + let description html = 74 + let preview_image = 75 + match og_description html with None -> description html | Some x -> Some x 76 + in 77 + match Option.map String.trim preview_image with 78 + | Some "" -> None 79 + | Some x -> Some x 80 + | None -> None
+215
stack/river/lib/post.ml
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + type t = { 19 + title : string; 20 + link : Uri.t option; 21 + date : Syndic.Date.t option; 22 + feed : Feed.t; 23 + author : string; 24 + email : string; 25 + content : Soup.soup Soup.node; 26 + mutable link_response : (string, string) result option; 27 + } 28 + 29 + let resolve_links_attr ~xmlbase attr el = 30 + Soup.R.attribute attr el 31 + |> Uri.of_string 32 + |> Syndic.XML.resolve ~xmlbase 33 + |> Uri.to_string 34 + |> fun value -> Soup.set_attribute attr value el 35 + 36 + (* Things that posts should not contain *) 37 + let undesired_tags = [ "style"; "script" ] 38 + let undesired_attr = [ "id" ] 39 + 40 + let html_of_text ?xmlbase s = 41 + let soup = Soup.parse s in 42 + let ($$) = Soup.($$) in 43 + soup $$ "a[href]" |> Soup.iter (resolve_links_attr ~xmlbase "href"); 44 + soup $$ "img[src]" |> Soup.iter (resolve_links_attr ~xmlbase "src"); 45 + undesired_tags |> List.iter (fun tag -> soup $$ tag |> Soup.iter Soup.delete); 46 + soup $$ "*" |> Soup.iter (fun el -> 47 + undesired_attr |> List.iter (fun attr -> Soup.delete_attribute attr el)); 48 + soup 49 + 50 + (* Do not trust sites using XML for HTML content. Convert to string and parse 51 + back. (Does not always fix bad HTML unfortunately.) *) 52 + let html_of_syndic = 53 + let ns_prefix _ = Some "" in 54 + fun ?xmlbase h -> 55 + html_of_text ?xmlbase 56 + (String.concat "" (List.map (Syndic.XML.to_string ~ns_prefix) h)) 57 + 58 + let string_of_option = function None -> "" | Some s -> s 59 + 60 + (* Email on the forge contain the name in parenthesis *) 61 + let forge_name_re = Str.regexp ".*(\\([^()]*\\))" 62 + 63 + let post_compare p1 p2 = 64 + (* Most recent posts first. Posts with no date are always last *) 65 + match (p1.date, p2.date) with 66 + | Some d1, Some d2 -> Syndic.Date.compare d2 d1 67 + | None, Some _ -> 1 68 + | Some _, None -> -1 69 + | None, None -> 1 70 + 71 + let rec remove n l = 72 + if n <= 0 then l else match l with [] -> [] | _ :: tl -> remove (n - 1) tl 73 + 74 + let rec take n = function 75 + | [] -> [] 76 + | e :: tl -> if n > 0 then e :: take (n - 1) tl else [] 77 + 78 + (* Blog feed 79 + ***********************************************************************) 80 + 81 + let post_of_atom ~(feed : Feed.t) (e : Syndic.Atom.entry) = 82 + let link = 83 + try 84 + Some 85 + (List.find (fun l -> l.Syndic.Atom.rel = Syndic.Atom.Alternate) e.links) 86 + .href 87 + with Not_found -> ( 88 + match e.links with 89 + | l :: _ -> Some l.href 90 + | [] -> ( 91 + match Uri.scheme e.id with 92 + | Some "http" -> Some e.id 93 + | Some "https" -> Some e.id 94 + | _ -> None)) 95 + in 96 + let date = 97 + match e.published with Some _ -> e.published | None -> Some e.updated 98 + in 99 + let content = 100 + match e.content with 101 + | Some (Text s) -> html_of_text s 102 + | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s 103 + | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h 104 + | Some (Mime _) | Some (Src _) | None -> ( 105 + match e.summary with 106 + | Some (Text s) -> html_of_text s 107 + | Some (Html (xmlbase, s)) -> html_of_text ?xmlbase s 108 + | Some (Xhtml (xmlbase, h)) -> html_of_syndic ?xmlbase h 109 + | None -> Soup.parse "") 110 + in 111 + let author, _ = e.authors in 112 + { 113 + title = Util.string_of_text_construct e.title; 114 + link; 115 + date; 116 + feed; 117 + author = author.name; 118 + email = ""; 119 + content; 120 + link_response = None; 121 + } 122 + 123 + let post_of_rss2 ~(feed : Feed.t) it = 124 + let title, content = 125 + match it.Syndic.Rss2.story with 126 + | All (t, xmlbase, d) -> ( 127 + ( t, 128 + match it.content with 129 + | _, "" -> html_of_text ?xmlbase d 130 + | xmlbase, c -> html_of_text ?xmlbase c )) 131 + | Title t -> 132 + let xmlbase, c = it.content in 133 + (t, html_of_text ?xmlbase c) 134 + | Description (xmlbase, d) -> ( 135 + ( "", 136 + match it.content with 137 + | _, "" -> html_of_text ?xmlbase d 138 + | xmlbase, c -> html_of_text ?xmlbase c )) 139 + in 140 + let link = 141 + match (it.guid, it.link) with 142 + | Some u, _ when u.permalink -> Some u.data 143 + | _, Some _ -> it.link 144 + | Some u, _ -> 145 + (* Sometimes the guid is indicated with isPermaLink="false" but is 146 + nonetheless the only URL we get (e.g. ocamlpro). *) 147 + Some u.data 148 + | None, None -> None 149 + in 150 + { 151 + title; 152 + link; 153 + feed; 154 + author = feed.name; 155 + email = string_of_option it.author; 156 + content; 157 + date = it.pubDate; 158 + link_response = None; 159 + } 160 + 161 + let posts_of_feed c = 162 + match c.Feed.content with 163 + | Feed.Atom f -> List.map (post_of_atom ~feed:c) f.Syndic.Atom.entries 164 + | Feed.Rss2 ch -> List.map (post_of_rss2 ~feed:c) ch.Syndic.Rss2.items 165 + 166 + let mk_entry post = 167 + let content = Syndic.Atom.Html (None, Soup.to_string post.content) in 168 + let contributors = 169 + [ Syndic.Atom.author ~uri:(Uri.of_string post.feed.url) post.feed.name ] 170 + in 171 + let links = 172 + match post.link with 173 + | Some l -> [ Syndic.Atom.link ~rel:Syndic.Atom.Alternate l ] 174 + | None -> [] 175 + in 176 + (* TODO: include source *) 177 + let id = 178 + match post.link with 179 + | Some l -> l 180 + | None -> Uri.of_string (Digest.to_hex (Digest.string post.title)) 181 + in 182 + let authors = (Syndic.Atom.author ~email:post.email post.author, []) in 183 + let title : Syndic.Atom.text_construct = Syndic.Atom.Text post.title in 184 + let updated = 185 + match post.date with 186 + (* Atom entry requires a date but RSS2 does not. So if a date 187 + * is not available, just capture the current date. *) 188 + | None -> Ptime.of_float_s (Unix.gettimeofday ()) |> Option.get 189 + | Some d -> d 190 + in 191 + Syndic.Atom.entry ~content ~contributors ~links ~id ~authors ~title ~updated 192 + () 193 + 194 + let mk_entries posts = List.map mk_entry posts 195 + 196 + let get_posts ?n ?(ofs = 0) planet_feeds = 197 + let posts = List.concat @@ List.map posts_of_feed planet_feeds in 198 + let posts = List.sort post_compare posts in 199 + let posts = remove ofs posts in 200 + match n with None -> posts | Some n -> take n posts 201 + 202 + (* Fetch the link response and cache it. *) 203 + let fetch_link t = 204 + match (t.link, t.link_response) with 205 + | None, _ -> None 206 + | Some _, Some (Ok x) -> Some x 207 + | Some _, Some (Error _) -> None 208 + | Some link, None -> ( 209 + try 210 + let response = Http.get (Uri.to_string link) in 211 + t.link_response <- Some (Ok response); 212 + Some response 213 + with _exn -> 214 + t.link_response <- Some (Error ""); 215 + None)
+44
stack/river/lib/river.ml
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + type source = Feed.source = { name : string; url : string } 19 + type feed = Feed.t 20 + type post = Post.t 21 + 22 + let fetch = Feed.fetch 23 + let name feed = feed.Feed.name 24 + let url feed = feed.Feed.url 25 + let posts feeds = Post.get_posts feeds 26 + let title post = post.Post.title 27 + let link post = post.Post.link 28 + let date post = post.Post.date 29 + let feed post = post.Post.feed 30 + let author post = post.Post.author 31 + let email post = post.Post.email 32 + let content post = Soup.to_string post.Post.content 33 + 34 + let meta_description post = 35 + match Post.fetch_link post with 36 + | None -> None 37 + | Some response -> Meta.description response 38 + 39 + let seo_image post = 40 + match Post.fetch_link post with 41 + | None -> None 42 + | Some response -> Meta.preview_image response 43 + 44 + let create_atom_entries = Post.mk_entries
+73
stack/river/lib/river.mli
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + type source = { name : string; url : string } 19 + (** The source of a feed. *) 20 + 21 + type feed 22 + type post 23 + 24 + val fetch : source -> feed 25 + (** [fetch source] returns an Atom or RSS feed from a source. *) 26 + 27 + val name : feed -> string 28 + (** [name feed] is the name of the feed source passed to [fetch]. *) 29 + 30 + val url : feed -> string 31 + (** [url feed] is the url of the feed source passed to [fetch]. *) 32 + 33 + val posts : feed list -> post list 34 + (** [posts feeds] is the list of deduplicated posts of the given feeds. *) 35 + 36 + val feed : post -> feed 37 + (** [feed post] is the feed the post originates from. *) 38 + 39 + val title : post -> string 40 + (** [title post] is the title of the post. *) 41 + 42 + val link : post -> Uri.t option 43 + (** [link post] is the link of the post. *) 44 + 45 + val date : post -> Syndic.Date.t option 46 + (** [date post] is the date of the post. *) 47 + 48 + val author : post -> string 49 + (** [author post] is the author of the post. *) 50 + 51 + val email : post -> string 52 + (** [email post] is the email of the post. *) 53 + 54 + val content : post -> string 55 + (** [content post] is the content of the post. *) 56 + 57 + val meta_description : post -> string option 58 + (** [meta_description post] is the meta description of the post on the origin 59 + site. 60 + 61 + To get the meta description, we make get the content of [link post] and look 62 + for an HTML meta tag with the name "description" or "og:description".*) 63 + 64 + val seo_image : post -> string option 65 + (** [seo_image post] is the image to be used by social networks and links to the 66 + post. 67 + 68 + To get the seo image, we make get the content of [link post] and look for an 69 + HTML meta tag with the name "og:image" or "twitter:image". *) 70 + 71 + val create_atom_entries : post list -> Syndic.Atom.entry list 72 + (** [create_atom_feed posts] creates a list of atom entries, which can then be 73 + used to create an atom feed that is an aggregate of the posts. *)
+33
stack/river/lib/util.ml
··· 1 + (* 2 + * Copyright (c) 2014, OCaml.org project 3 + * Copyright (c) 2015 KC Sivaramakrishnan <sk826@cl.cam.ac.uk> 4 + * 5 + * Permission to use, copy, modify, and distribute this software for any 6 + * purpose with or without fee is hereby granted, provided that the above 7 + * copyright notice and this permission notice appear in all copies. 8 + * 9 + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 + *) 17 + 18 + open Syndic 19 + 20 + (* Remove all tags *) 21 + let rec syndic_to_buffer b = function 22 + | XML.Node (_, _, subs) -> List.iter (syndic_to_buffer b) subs 23 + | XML.Data (_, d) -> Buffer.add_string b d 24 + 25 + let syndic_to_string x = 26 + let b = Buffer.create 1024 in 27 + List.iter (syndic_to_buffer b) x; 28 + Buffer.contents b 29 + 30 + let string_of_text_construct : Atom.text_construct -> string = function 31 + (* FIXME: we probably would like to parse the HTML and remove the tags *) 32 + | Atom.Text s | Atom.Html (_, s) -> s 33 + | Atom.Xhtml (_, x) -> syndic_to_string x
+37
stack/river/river.opam
··· 1 + # This file is generated by dune, edit dune-project instead 2 + opam-version: "2.0" 3 + synopsis: "RSS2 and Atom feed aggregator for OCaml" 4 + description: "RSS2 and Atom feed aggregator for OCaml" 5 + maintainer: ["KC Sivaramakrishnan <sk826@cl.cam.ac.uk>"] 6 + authors: ["KC Sivaramakrishnan <sk826@cl.cam.ac.uk>"] 7 + license: "MIT" 8 + homepage: "https://github.com/kayceesrk/river" 9 + doc: "https://kayceesrk.github.io/river/" 10 + bug-reports: "https://github.com/kayceesrk/river/issues" 11 + depends: [ 12 + "ocaml" {>= "4.08.0"} 13 + "dune" {>= "3.0"} 14 + "syndic" {>= "1.5"} 15 + "cohttp" {>= "5.0.0"} 16 + "cohttp-lwt" {>= "5.0.0"} 17 + "cohttp-lwt-unix" {>= "5.0.0"} 18 + "ptime" 19 + "lwt" 20 + "lambdasoup" 21 + "odoc" {with-doc} 22 + ] 23 + build: [ 24 + ["dune" "subst"] {dev} 25 + [ 26 + "dune" 27 + "build" 28 + "-p" 29 + name 30 + "-j" 31 + jobs 32 + "@install" 33 + "@runtest" {with-test} 34 + "@doc" {with-doc} 35 + ] 36 + ] 37 + dev-repo: "git+https://github.com/kayceesrk/river.git"