jcs's openbsd hax
openbsd
1/* $OpenBSD: cgi.c,v 1.120 2022/12/26 19:16:02 jmc Exp $ */
2/*
3 * Copyright (c) 2014-2019, 2021, 2022 Ingo Schwarze <schwarze@usta.de>
4 * Copyright (c) 2011, 2012 Kristaps Dzonsons <kristaps@bsd.lv>
5 * Copyright (c) 2022 Anna Vyalkova <cyber@sysrq.in>
6 *
7 * Permission to use, copy, modify, and distribute this software for any
8 * purpose with or without fee is hereby granted, provided that the above
9 * copyright notice and this permission notice appear in all copies.
10 *
11 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHORS DISCLAIM ALL WARRANTIES
12 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
13 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR
14 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
15 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
16 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
17 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
18 *
19 * Implementation of the man.cgi(8) program.
20 */
21#include <sys/types.h>
22#include <sys/time.h>
23
24#include <ctype.h>
25#include <err.h>
26#include <errno.h>
27#include <fcntl.h>
28#include <limits.h>
29#include <stdint.h>
30#include <stdio.h>
31#include <stdlib.h>
32#include <string.h>
33#include <unistd.h>
34
35#include "mandoc_aux.h"
36#include "mandoc.h"
37#include "roff.h"
38#include "mdoc.h"
39#include "man.h"
40#include "mandoc_parse.h"
41#include "main.h"
42#include "manconf.h"
43#include "mansearch.h"
44#include "cgi.h"
45
46/*
47 * A query as passed to the search function.
48 */
49struct query {
50 char *manpath; /* desired manual directory */
51 char *arch; /* architecture */
52 char *sec; /* manual section */
53 char *query; /* unparsed query expression */
54 int equal; /* match whole names, not substrings */
55};
56
57struct req {
58 struct query q;
59 char **p; /* array of available manpaths */
60 size_t psz; /* number of available manpaths */
61 int isquery; /* QUERY_STRING used, not PATH_INFO */
62};
63
64enum focus {
65 FOCUS_NONE = 0,
66 FOCUS_QUERY
67};
68
69static void html_print(const char *);
70static void html_putchar(char);
71static int http_decode(char *);
72static void http_encode(const char *);
73static void parse_manpath_conf(struct req *);
74static void parse_path_info(struct req *, const char *);
75static void parse_query_string(struct req *, const char *);
76static void pg_error_badrequest(const char *);
77static void pg_error_internal(void);
78static void pg_index(const struct req *);
79static void pg_noresult(const struct req *, int, const char *,
80 const char *);
81static void pg_redirect(const struct req *, const char *);
82static void pg_search(const struct req *);
83static void pg_searchres(const struct req *,
84 struct manpage *, size_t);
85static void pg_show(struct req *, const char *);
86static int resp_begin_html(int, const char *, const char *);
87static void resp_begin_http(int, const char *);
88static void resp_catman(const struct req *, const char *);
89static int resp_copy(const char *, const char *);
90static void resp_end_html(void);
91static void resp_format(const struct req *, const char *);
92static void resp_searchform(const struct req *, enum focus);
93static void resp_show(const struct req *, const char *);
94static void set_query_attr(char **, char **);
95static int validate_arch(const char *);
96static int validate_filename(const char *);
97static int validate_manpath(const struct req *, const char *);
98static int validate_urifrag(const char *);
99
100static const char *scriptname = SCRIPT_NAME;
101
102static const int sec_prios[] = {1, 4, 5, 8, 6, 3, 7, 2, 9};
103static const char *const sec_numbers[] = {
104 "0", "1", "2", "3", "3p", "4", "5", "6", "7", "8", "9"
105};
106static const char *const sec_names[] = {
107 "All Sections",
108 "1 - General Commands",
109 "2 - System Calls",
110 "3 - Library Functions",
111 "3p - Perl Library",
112 "4 - Device Drivers",
113 "5 - File Formats",
114 "6 - Games",
115 "7 - Miscellaneous Information",
116 "8 - System Manager\'s Manual",
117 "9 - Kernel Developer\'s Manual"
118};
119static const int sec_MAX = sizeof(sec_names) / sizeof(char *);
120
121static const char *const arch_names[] = {
122 "amd64", "alpha", "armv7", "arm64",
123 "hppa", "i386", "landisk", "loongson",
124 "luna88k", "macppc", "mips64", "octeon",
125 "powerpc64", "riscv64", "sparc64",
126
127 "amiga", "arc", "armish", "arm32",
128 "atari", "aviion", "beagle", "cats",
129 "hppa64", "hp300",
130 "ia64", "mac68k", "mvme68k", "mvme88k",
131 "mvmeppc", "palm", "pc532", "pegasos",
132 "pmax", "powerpc", "sgi", "socppc",
133 "solbourne", "sparc",
134 "sun3", "vax", "wgrisc", "x68k",
135 "zaurus"
136};
137static const int arch_MAX = sizeof(arch_names) / sizeof(char *);
138
139/*
140 * Print a character, escaping HTML along the way.
141 * This will pass non-ASCII straight to output: be warned!
142 */
143static void
144html_putchar(char c)
145{
146
147 switch (c) {
148 case '"':
149 printf(""");
150 break;
151 case '&':
152 printf("&");
153 break;
154 case '>':
155 printf(">");
156 break;
157 case '<':
158 printf("<");
159 break;
160 default:
161 putchar((unsigned char)c);
162 break;
163 }
164}
165
166/*
167 * Call through to html_putchar().
168 * Accepts NULL strings.
169 */
170static void
171html_print(const char *p)
172{
173
174 if (NULL == p)
175 return;
176 while ('\0' != *p)
177 html_putchar(*p++);
178}
179
180/*
181 * Transfer the responsibility for the allocated string *val
182 * to the query structure.
183 */
184static void
185set_query_attr(char **attr, char **val)
186{
187
188 free(*attr);
189 if (**val == '\0') {
190 *attr = NULL;
191 free(*val);
192 } else
193 *attr = *val;
194 *val = NULL;
195}
196
197/*
198 * Parse the QUERY_STRING for key-value pairs
199 * and store the values into the query structure.
200 */
201static void
202parse_query_string(struct req *req, const char *qs)
203{
204 char *key, *val;
205 size_t keysz, valsz;
206
207 req->isquery = 1;
208 req->q.manpath = NULL;
209 req->q.arch = NULL;
210 req->q.sec = NULL;
211 req->q.query = NULL;
212 req->q.equal = 1;
213
214 key = val = NULL;
215 while (*qs != '\0') {
216
217 /* Parse one key. */
218
219 keysz = strcspn(qs, "=;&");
220 key = mandoc_strndup(qs, keysz);
221 qs += keysz;
222 if (*qs != '=')
223 goto next;
224
225 /* Parse one value. */
226
227 valsz = strcspn(++qs, ";&");
228 val = mandoc_strndup(qs, valsz);
229 qs += valsz;
230
231 /* Decode and catch encoding errors. */
232
233 if ( ! (http_decode(key) && http_decode(val)))
234 goto next;
235
236 /* Handle key-value pairs. */
237
238 if ( ! strcmp(key, "query"))
239 set_query_attr(&req->q.query, &val);
240
241 else if ( ! strcmp(key, "apropos"))
242 req->q.equal = !strcmp(val, "0");
243
244 else if ( ! strcmp(key, "manpath")) {
245#ifdef COMPAT_OLDURI
246 if ( ! strncmp(val, "OpenBSD ", 8)) {
247 val[7] = '-';
248 if ('C' == val[8])
249 val[8] = 'c';
250 }
251#endif
252 set_query_attr(&req->q.manpath, &val);
253 }
254
255 else if ( ! (strcmp(key, "sec")
256#ifdef COMPAT_OLDURI
257 && strcmp(key, "sektion")
258#endif
259 )) {
260 if ( ! strcmp(val, "0"))
261 *val = '\0';
262 set_query_attr(&req->q.sec, &val);
263 }
264
265 else if ( ! strcmp(key, "arch")) {
266 if ( ! strcmp(val, "default"))
267 *val = '\0';
268 set_query_attr(&req->q.arch, &val);
269 }
270
271 /*
272 * The key must be freed in any case.
273 * The val may have been handed over to the query
274 * structure, in which case it is now NULL.
275 */
276next:
277 free(key);
278 key = NULL;
279 free(val);
280 val = NULL;
281
282 if (*qs != '\0')
283 qs++;
284 }
285}
286
287/*
288 * HTTP-decode a string. The standard explanation is that this turns
289 * "%4e+foo" into "n foo" in the regular way. This is done in-place
290 * over the allocated string.
291 */
292static int
293http_decode(char *p)
294{
295 char hex[3];
296 char *q;
297 int c;
298
299 hex[2] = '\0';
300
301 q = p;
302 for ( ; '\0' != *p; p++, q++) {
303 if ('%' == *p) {
304 if ('\0' == (hex[0] = *(p + 1)))
305 return 0;
306 if ('\0' == (hex[1] = *(p + 2)))
307 return 0;
308 if (1 != sscanf(hex, "%x", &c))
309 return 0;
310 if ('\0' == c)
311 return 0;
312
313 *q = (char)c;
314 p += 2;
315 } else
316 *q = '+' == *p ? ' ' : *p;
317 }
318
319 *q = '\0';
320 return 1;
321}
322
323static void
324http_encode(const char *p)
325{
326 for (; *p != '\0'; p++) {
327 if (isalnum((unsigned char)*p) == 0 &&
328 strchr("-._~", *p) == NULL)
329 printf("%%%2.2X", (unsigned char)*p);
330 else
331 putchar(*p);
332 }
333}
334
335static void
336resp_begin_http(int code, const char *msg)
337{
338
339 if (200 != code)
340 printf("Status: %d %s\r\n", code, msg);
341
342 printf("Content-Type: text/html; charset=utf-8\r\n"
343 "Cache-Control: no-cache\r\n"
344 "Content-Security-Policy: default-src 'none'; "
345 "style-src 'self' 'unsafe-inline'\r\n"
346 "Pragma: no-cache\r\n"
347 "\r\n");
348
349 fflush(stdout);
350}
351
352static int
353resp_copy(const char *element, const char *filename)
354{
355 char buf[4096];
356 ssize_t sz;
357 int fd;
358
359 if ((fd = open(filename, O_RDONLY)) == -1)
360 return 0;
361
362 if (element != NULL)
363 printf("<%s>\n", element);
364 fflush(stdout);
365 while ((sz = read(fd, buf, sizeof(buf))) > 0)
366 write(STDOUT_FILENO, buf, sz);
367 close(fd);
368 return 1;
369}
370
371static int
372resp_begin_html(int code, const char *msg, const char *file)
373{
374 const char *name, *sec, *cp;
375 int namesz, secsz;
376
377 resp_begin_http(code, msg);
378
379 printf("<!DOCTYPE html>\n"
380 "<html>\n"
381 "<head>\n"
382 " <meta charset=\"UTF-8\"/>\n"
383 " <meta name=\"viewport\""
384 " content=\"width=device-width, initial-scale=1.0\">\n"
385 " <link rel=\"stylesheet\" href=\"%s/mandoc.css\""
386 " type=\"text/css\" media=\"all\">\n"
387 " <title>",
388 CSS_DIR);
389 if (file != NULL) {
390 cp = strrchr(file, '/');
391 name = cp == NULL ? file : cp + 1;
392 cp = strrchr(name, '.');
393 namesz = cp == NULL ? strlen(name) : cp - name;
394 sec = NULL;
395 if (cp != NULL && cp[1] != '0') {
396 sec = cp + 1;
397 secsz = strlen(sec);
398 } else if (name - file > 1) {
399 for (cp = name - 2; cp >= file; cp--) {
400 if (*cp < '1' || *cp > '9')
401 continue;
402 sec = cp;
403 secsz = name - cp - 1;
404 break;
405 }
406 }
407 printf("%.*s", namesz, name);
408 if (sec != NULL)
409 printf("(%.*s)", secsz, sec);
410 fputs(" - ", stdout);
411 }
412 printf("%s</title>\n"
413 "</head>\n"
414 "<body>\n",
415 CUSTOMIZE_TITLE);
416
417 return resp_copy("header", MAN_DIR "/header.html");
418}
419
420static void
421resp_end_html(void)
422{
423 if (resp_copy("footer", MAN_DIR "/footer.html"))
424 puts("</footer>");
425
426 puts("</body>\n"
427 "</html>");
428}
429
430static void
431resp_searchform(const struct req *req, enum focus focus)
432{
433 int i;
434
435 printf("<form role=\"search\" action=\"/%s\" method=\"get\" "
436 "autocomplete=\"off\" autocapitalize=\"none\">\n"
437 " <fieldset>\n"
438 " <legend>Manual Page Search Parameters</legend>\n",
439 scriptname);
440
441 /* Write query input box. */
442
443 printf(" <label>Search query:\n"
444 " <input type=\"search\" name=\"query\" value=\"");
445 if (req->q.query != NULL)
446 html_print(req->q.query);
447 printf("\" size=\"40\"");
448 if (focus == FOCUS_QUERY)
449 printf(" autofocus");
450 puts(">\n </label>");
451
452 /* Write submission buttons. */
453
454 printf( " <button type=\"submit\" name=\"apropos\" value=\"0\">"
455 "man</button>\n"
456 " <button type=\"submit\" name=\"apropos\" value=\"1\">"
457 "apropos</button>\n"
458 " <br/>\n");
459
460 /* Write section selector. */
461
462 puts(" <select name=\"sec\" aria-label=\"Manual section\">");
463 for (i = 0; i < sec_MAX; i++) {
464 printf(" <option value=\"%s\"", sec_numbers[i]);
465 if (NULL != req->q.sec &&
466 0 == strcmp(sec_numbers[i], req->q.sec))
467 printf(" selected=\"selected\"");
468 printf(">%s</option>\n", sec_names[i]);
469 }
470 puts(" </select>");
471
472 /* Write architecture selector. */
473
474 printf( " <select name=\"arch\" aria-label=\"CPU architecture\">\n"
475 " <option value=\"default\"");
476 if (NULL == req->q.arch)
477 printf(" selected=\"selected\"");
478 puts(">All Architectures</option>");
479 for (i = 0; i < arch_MAX; i++) {
480 printf(" <option");
481 if (NULL != req->q.arch &&
482 0 == strcmp(arch_names[i], req->q.arch))
483 printf(" selected=\"selected\"");
484 printf(">%s</option>\n", arch_names[i]);
485 }
486 puts(" </select>");
487
488 /* Write manpath selector. */
489
490 if (req->psz > 1) {
491 puts(" <select name=\"manpath\""
492 " aria-label=\"Manual path\">");
493 for (i = 0; i < (int)req->psz; i++) {
494 printf(" <option");
495 if (strcmp(req->q.manpath, req->p[i]) == 0)
496 printf(" selected=\"selected\"");
497 printf(">");
498 html_print(req->p[i]);
499 puts("</option>");
500 }
501 puts(" </select>");
502 }
503
504 puts(" </fieldset>\n"
505 "</form>");
506}
507
508static int
509validate_urifrag(const char *frag)
510{
511
512 while ('\0' != *frag) {
513 if ( ! (isalnum((unsigned char)*frag) ||
514 '-' == *frag || '.' == *frag ||
515 '/' == *frag || '_' == *frag))
516 return 0;
517 frag++;
518 }
519 return 1;
520}
521
522static int
523validate_manpath(const struct req *req, const char* manpath)
524{
525 size_t i;
526
527 for (i = 0; i < req->psz; i++)
528 if ( ! strcmp(manpath, req->p[i]))
529 return 1;
530
531 return 0;
532}
533
534static int
535validate_arch(const char *arch)
536{
537 int i;
538
539 for (i = 0; i < arch_MAX; i++)
540 if (strcmp(arch, arch_names[i]) == 0)
541 return 1;
542
543 return 0;
544}
545
546static int
547validate_filename(const char *file)
548{
549
550 if ('.' == file[0] && '/' == file[1])
551 file += 2;
552
553 return ! (strstr(file, "../") || strstr(file, "/..") ||
554 (strncmp(file, "man", 3) && strncmp(file, "cat", 3)));
555}
556
557static void
558pg_index(const struct req *req)
559{
560 if (resp_begin_html(200, NULL, NULL) == 0)
561 puts("<header>");
562 resp_searchform(req, FOCUS_QUERY);
563 printf("</header>\n"
564 "<main>\n"
565 "<p role=\"doc-notice\" aria-label=\"Usage\">\n"
566 "This web interface is documented in the\n"
567 "<a class=\"Xr\" href=\"/%s%sman.cgi.8\""
568 " aria-label=\"man dot CGI, section 8\">man.cgi(8)</a>\n"
569 "manual, and the\n"
570 "<a class=\"Xr\" href=\"/%s%sapropos.1\""
571 " aria-label=\"apropos, section 1\">apropos(1)</a>\n"
572 "manual explains the query syntax.\n"
573 "</p>\n"
574 "</main>\n",
575 scriptname, *scriptname == '\0' ? "" : "/",
576 scriptname, *scriptname == '\0' ? "" : "/");
577 resp_end_html();
578}
579
580static void
581pg_noresult(const struct req *req, int code, const char *http_msg,
582 const char *user_msg)
583{
584 if (resp_begin_html(code, http_msg, NULL) == 0)
585 puts("<header>");
586 resp_searchform(req, FOCUS_QUERY);
587 puts("</header>");
588 puts("<main>");
589 puts("<p role=\"doc-notice\" aria-label=\"No result\">");
590 puts(user_msg);
591 puts("</p>");
592 puts("</main>");
593 resp_end_html();
594}
595
596static void
597pg_error_badrequest(const char *msg)
598{
599 if (resp_begin_html(400, "Bad Request", NULL))
600 puts("</header>");
601 puts("<main>\n"
602 "<h1>Bad Request</h1>\n"
603 "<p role=\"doc-notice\" aria-label=\"Bad Request\">");
604 puts(msg);
605 printf("Try again from the\n"
606 "<a href=\"/%s\">main page</a>.\n"
607 "</p>\n"
608 "</main>\n", scriptname);
609 resp_end_html();
610}
611
612static void
613pg_error_internal(void)
614{
615 if (resp_begin_html(500, "Internal Server Error", NULL))
616 puts("</header>");
617 puts("<main><p role=\"doc-notice\">Internal Server Error</p></main>");
618 resp_end_html();
619}
620
621static void
622pg_redirect(const struct req *req, const char *name)
623{
624 printf("Status: 303 See Other\r\n"
625 "Location: /");
626 if (*scriptname != '\0')
627 printf("%s/", scriptname);
628 if (strcmp(req->q.manpath, req->p[0]))
629 printf("%s/", req->q.manpath);
630 if (req->q.arch != NULL)
631 printf("%s/", req->q.arch);
632 http_encode(name);
633 if (req->q.sec != NULL) {
634 putchar('.');
635 http_encode(req->q.sec);
636 }
637 printf("\r\nContent-Type: text/html; charset=utf-8\r\n\r\n");
638}
639
640static void
641pg_searchres(const struct req *req, struct manpage *r, size_t sz)
642{
643 char *arch, *archend;
644 const char *sec;
645 size_t i, iuse;
646 int archprio, archpriouse;
647 int prio, priouse;
648 int have_header;
649
650 for (i = 0; i < sz; i++) {
651 if (validate_filename(r[i].file))
652 continue;
653 warnx("invalid filename %s in %s database",
654 r[i].file, req->q.manpath);
655 pg_error_internal();
656 return;
657 }
658
659 if (req->isquery && sz == 1) {
660 /*
661 * If we have just one result, then jump there now
662 * without any delay.
663 */
664 printf("Status: 303 See Other\r\n"
665 "Location: /");
666 if (*scriptname != '\0')
667 printf("%s/", scriptname);
668 if (strcmp(req->q.manpath, req->p[0]))
669 printf("%s/", req->q.manpath);
670 printf("%s\r\n"
671 "Content-Type: text/html; charset=utf-8\r\n\r\n",
672 r[0].file);
673 return;
674 }
675
676 /*
677 * In man(1) mode, show one of the pages
678 * even if more than one is found.
679 */
680
681 iuse = 0;
682 if (req->q.equal || sz == 1) {
683 priouse = 20;
684 archpriouse = 3;
685 for (i = 0; i < sz; i++) {
686 sec = r[i].file;
687 sec += strcspn(sec, "123456789");
688 if (sec[0] == '\0')
689 continue;
690 prio = sec_prios[sec[0] - '1'];
691 if (sec[1] != '/')
692 prio += 10;
693 if (req->q.arch == NULL) {
694 archprio =
695 ((arch = strchr(sec + 1, '/'))
696 == NULL) ? 3 :
697 ((archend = strchr(arch + 1, '/'))
698 == NULL) ? 0 :
699 strncmp(arch, "amd64/",
700 archend - arch) ? 2 : 1;
701 if (archprio < archpriouse) {
702 archpriouse = archprio;
703 priouse = prio;
704 iuse = i;
705 continue;
706 }
707 if (archprio > archpriouse)
708 continue;
709 }
710 if (prio >= priouse)
711 continue;
712 priouse = prio;
713 iuse = i;
714 }
715 have_header = resp_begin_html(200, NULL, r[iuse].file);
716 } else
717 have_header = resp_begin_html(200, NULL, NULL);
718
719 if (have_header == 0)
720 puts("<header>");
721 resp_searchform(req,
722 req->q.equal || sz == 1 ? FOCUS_NONE : FOCUS_QUERY);
723 puts("</header>");
724
725 if (sz > 1) {
726 puts("<nav>");
727 puts("<table class=\"results\">");
728 for (i = 0; i < sz; i++) {
729 printf(" <tr>\n"
730 " <td>"
731 "<a class=\"Xr\" href=\"/");
732 if (*scriptname != '\0')
733 printf("%s/", scriptname);
734 if (strcmp(req->q.manpath, req->p[0]))
735 printf("%s/", req->q.manpath);
736 printf("%s\">", r[i].file);
737 html_print(r[i].names);
738 printf("</a></td>\n"
739 " <td><span class=\"Nd\">");
740 html_print(r[i].output);
741 puts("</span></td>\n"
742 " </tr>");
743 }
744 puts("</table>");
745 puts("</nav>");
746 }
747
748 if (req->q.equal || sz == 1) {
749 puts("<hr>");
750 resp_show(req, r[iuse].file);
751 }
752
753 resp_end_html();
754}
755
756static void
757resp_catman(const struct req *req, const char *file)
758{
759 FILE *f;
760 char *p;
761 size_t sz;
762 ssize_t len;
763 int i;
764 int italic, bold;
765
766 if ((f = fopen(file, "r")) == NULL) {
767 puts("<p role=\"doc-notice\">\n"
768 " You specified an invalid manual file.\n"
769 "</p>");
770 return;
771 }
772
773 puts("<div class=\"catman\">\n"
774 "<pre>");
775
776 p = NULL;
777 sz = 0;
778
779 while ((len = getline(&p, &sz, f)) != -1) {
780 bold = italic = 0;
781 for (i = 0; i < len - 1; i++) {
782 /*
783 * This means that the catpage is out of state.
784 * Ignore it and keep going (although the
785 * catpage is bogus).
786 */
787
788 if ('\b' == p[i] || '\n' == p[i])
789 continue;
790
791 /*
792 * Print a regular character.
793 * Close out any bold/italic scopes.
794 * If we're in back-space mode, make sure we'll
795 * have something to enter when we backspace.
796 */
797
798 if ('\b' != p[i + 1]) {
799 if (italic)
800 printf("</i>");
801 if (bold)
802 printf("</b>");
803 italic = bold = 0;
804 html_putchar(p[i]);
805 continue;
806 } else if (i + 2 >= len)
807 continue;
808
809 /* Italic mode. */
810
811 if ('_' == p[i]) {
812 if (bold)
813 printf("</b>");
814 if ( ! italic)
815 printf("<i>");
816 bold = 0;
817 italic = 1;
818 i += 2;
819 html_putchar(p[i]);
820 continue;
821 }
822
823 /*
824 * Handle funny behaviour troff-isms.
825 * These grok'd from the original man2html.c.
826 */
827
828 if (('+' == p[i] && 'o' == p[i + 2]) ||
829 ('o' == p[i] && '+' == p[i + 2]) ||
830 ('|' == p[i] && '=' == p[i + 2]) ||
831 ('=' == p[i] && '|' == p[i + 2]) ||
832 ('*' == p[i] && '=' == p[i + 2]) ||
833 ('=' == p[i] && '*' == p[i + 2]) ||
834 ('*' == p[i] && '|' == p[i + 2]) ||
835 ('|' == p[i] && '*' == p[i + 2])) {
836 if (italic)
837 printf("</i>");
838 if (bold)
839 printf("</b>");
840 italic = bold = 0;
841 putchar('*');
842 i += 2;
843 continue;
844 } else if (('|' == p[i] && '-' == p[i + 2]) ||
845 ('-' == p[i] && '|' == p[i + 1]) ||
846 ('+' == p[i] && '-' == p[i + 1]) ||
847 ('-' == p[i] && '+' == p[i + 1]) ||
848 ('+' == p[i] && '|' == p[i + 1]) ||
849 ('|' == p[i] && '+' == p[i + 1])) {
850 if (italic)
851 printf("</i>");
852 if (bold)
853 printf("</b>");
854 italic = bold = 0;
855 putchar('+');
856 i += 2;
857 continue;
858 }
859
860 /* Bold mode. */
861
862 if (italic)
863 printf("</i>");
864 if ( ! bold)
865 printf("<b>");
866 bold = 1;
867 italic = 0;
868 i += 2;
869 html_putchar(p[i]);
870 }
871
872 /*
873 * Clean up the last character.
874 * We can get to a newline; don't print that.
875 */
876
877 if (italic)
878 printf("</i>");
879 if (bold)
880 printf("</b>");
881
882 if (i == len - 1 && p[i] != '\n')
883 html_putchar(p[i]);
884
885 putchar('\n');
886 }
887 free(p);
888
889 puts("</pre>\n"
890 "</div>");
891
892 fclose(f);
893}
894
895static void
896resp_format(const struct req *req, const char *file)
897{
898 struct manoutput conf;
899 struct mparse *mp;
900 struct roff_meta *meta;
901 void *vp;
902 int fd;
903 int usepath;
904
905 if (-1 == (fd = open(file, O_RDONLY))) {
906 puts("<p role=\"doc-notice\">\n"
907 " You specified an invalid manual file.\n"
908 "</p>");
909 return;
910 }
911
912 mchars_alloc();
913 mp = mparse_alloc(MPARSE_SO | MPARSE_UTF8 | MPARSE_LATIN1 |
914 MPARSE_VALIDATE, MANDOC_OS_OTHER, req->q.manpath);
915 mparse_readfd(mp, fd, file);
916 close(fd);
917 meta = mparse_result(mp);
918
919 memset(&conf, 0, sizeof(conf));
920 conf.fragment = 1;
921 conf.style = mandoc_strdup(CSS_DIR "/mandoc.css");
922 usepath = strcmp(req->q.manpath, req->p[0]);
923 mandoc_asprintf(&conf.man, "/%s%s%s%s%%N.%%S",
924 scriptname, *scriptname == '\0' ? "" : "/",
925 usepath ? req->q.manpath : "", usepath ? "/" : "");
926
927 vp = html_alloc(&conf);
928 if (meta->macroset == MACROSET_MDOC)
929 html_mdoc(vp, meta);
930 else
931 html_man(vp, meta);
932
933 html_free(vp);
934 mparse_free(mp);
935 mchars_free();
936 free(conf.man);
937 free(conf.style);
938}
939
940static void
941resp_show(const struct req *req, const char *file)
942{
943
944 if ('.' == file[0] && '/' == file[1])
945 file += 2;
946
947 if ('c' == *file)
948 resp_catman(req, file);
949 else
950 resp_format(req, file);
951}
952
953static void
954pg_show(struct req *req, const char *fullpath)
955{
956 char *manpath;
957 const char *file;
958
959 if ((file = strchr(fullpath, '/')) == NULL) {
960 pg_error_badrequest(
961 "You did not specify a page to show.");
962 return;
963 }
964 manpath = mandoc_strndup(fullpath, file - fullpath);
965 file++;
966
967 if ( ! validate_manpath(req, manpath)) {
968 pg_error_badrequest(
969 "You specified an invalid manpath.");
970 free(manpath);
971 return;
972 }
973
974 /*
975 * Begin by chdir()ing into the manpath.
976 * This way we can pick up the database files, which are
977 * relative to the manpath root.
978 */
979
980 if (chdir(manpath) == -1) {
981 warn("chdir %s", manpath);
982 pg_error_internal();
983 free(manpath);
984 return;
985 }
986 free(manpath);
987
988 if ( ! validate_filename(file)) {
989 pg_error_badrequest(
990 "You specified an invalid manual file.");
991 return;
992 }
993
994 if (resp_begin_html(200, NULL, file) == 0)
995 puts("<header>");
996 resp_searchform(req, FOCUS_NONE);
997 puts("</header>");
998 resp_show(req, file);
999 resp_end_html();
1000}
1001
1002static void
1003pg_search(const struct req *req)
1004{
1005 struct mansearch search;
1006 struct manpaths paths;
1007 struct manpage *res;
1008 char **argv;
1009 char *query, *rp, *wp;
1010 size_t ressz;
1011 int argc;
1012
1013 /*
1014 * Begin by chdir()ing into the root of the manpath.
1015 * This way we can pick up the database files, which are
1016 * relative to the manpath root.
1017 */
1018
1019 if (chdir(req->q.manpath) == -1) {
1020 warn("chdir %s", req->q.manpath);
1021 pg_error_internal();
1022 return;
1023 }
1024
1025 search.arch = req->q.arch;
1026 search.sec = req->q.sec;
1027 search.outkey = "Nd";
1028 search.argmode = req->q.equal ? ARG_NAME : ARG_EXPR;
1029 search.firstmatch = 1;
1030
1031 paths.sz = 1;
1032 paths.paths = mandoc_malloc(sizeof(char *));
1033 paths.paths[0] = mandoc_strdup(".");
1034
1035 /*
1036 * Break apart at spaces with backslash-escaping.
1037 */
1038
1039 argc = 0;
1040 argv = NULL;
1041 rp = query = mandoc_strdup(req->q.query);
1042 for (;;) {
1043 while (isspace((unsigned char)*rp))
1044 rp++;
1045 if (*rp == '\0')
1046 break;
1047 argv = mandoc_reallocarray(argv, argc + 1, sizeof(char *));
1048 argv[argc++] = wp = rp;
1049 for (;;) {
1050 if (isspace((unsigned char)*rp)) {
1051 *wp = '\0';
1052 rp++;
1053 break;
1054 }
1055 if (rp[0] == '\\' && rp[1] != '\0')
1056 rp++;
1057 if (wp != rp)
1058 *wp = *rp;
1059 if (*rp == '\0')
1060 break;
1061 wp++;
1062 rp++;
1063 }
1064 }
1065
1066 res = NULL;
1067 ressz = 0;
1068 if (req->isquery && req->q.equal && argc == 1)
1069 pg_redirect(req, argv[0]);
1070 else if (mansearch(&search, &paths, argc, argv, &res, &ressz) == 0)
1071 pg_noresult(req, 400, "Bad Request",
1072 "You entered an invalid query.");
1073 else if (ressz == 0)
1074 pg_noresult(req, 404, "Not Found", "No results found.");
1075 else
1076 pg_searchres(req, res, ressz);
1077
1078 free(query);
1079 mansearch_free(res, ressz);
1080 free(paths.paths[0]);
1081 free(paths.paths);
1082}
1083
1084int
1085main(void)
1086{
1087 struct req req;
1088 struct itimerval itimer;
1089 const char *path;
1090 const char *querystring;
1091 int i;
1092
1093 /*
1094 * The "rpath" pledge could be revoked after mparse_readfd()
1095 * if the file descriptor to "/footer.html" would be opened
1096 * up front, but it's probably not worth the complication
1097 * of the code it would cause: it would require scattering
1098 * pledge() calls in multiple low-level resp_*() functions.
1099 */
1100
1101 if (pledge("stdio rpath", NULL) == -1) {
1102 warn("pledge");
1103 pg_error_internal();
1104 return EXIT_FAILURE;
1105 }
1106
1107 /* Poor man's ReDoS mitigation. */
1108
1109 itimer.it_value.tv_sec = 2;
1110 itimer.it_value.tv_usec = 0;
1111 itimer.it_interval.tv_sec = 2;
1112 itimer.it_interval.tv_usec = 0;
1113 if (setitimer(ITIMER_VIRTUAL, &itimer, NULL) == -1) {
1114 warn("setitimer");
1115 pg_error_internal();
1116 return EXIT_FAILURE;
1117 }
1118
1119 /*
1120 * First we change directory into the MAN_DIR so that
1121 * subsequent scanning for manpath directories is rooted
1122 * relative to the same position.
1123 */
1124
1125 if (chdir(MAN_DIR) == -1) {
1126 warn("MAN_DIR: %s", MAN_DIR);
1127 pg_error_internal();
1128 return EXIT_FAILURE;
1129 }
1130
1131 memset(&req, 0, sizeof(struct req));
1132 req.q.equal = 1;
1133 parse_manpath_conf(&req);
1134
1135 /* Parse the path info and the query string. */
1136
1137 if ((path = getenv("PATH_INFO")) == NULL)
1138 path = "";
1139 else if (*path == '/')
1140 path++;
1141
1142 if (*path != '\0') {
1143 parse_path_info(&req, path);
1144 if (req.q.manpath == NULL || req.q.sec == NULL ||
1145 *req.q.query == '\0' || access(path, F_OK) == -1)
1146 path = "";
1147 } else if ((querystring = getenv("QUERY_STRING")) != NULL)
1148 parse_query_string(&req, querystring);
1149
1150 /* Validate parsed data and add defaults. */
1151
1152 if (req.q.manpath == NULL)
1153 req.q.manpath = mandoc_strdup(req.p[0]);
1154 else if ( ! validate_manpath(&req, req.q.manpath)) {
1155 pg_error_badrequest(
1156 "You specified an invalid manpath.");
1157 return EXIT_FAILURE;
1158 }
1159
1160 if (req.q.arch != NULL && validate_arch(req.q.arch) == 0) {
1161 pg_error_badrequest(
1162 "You specified an invalid architecture.");
1163 return EXIT_FAILURE;
1164 }
1165
1166 /* Dispatch to the three different pages. */
1167
1168 if ('\0' != *path)
1169 pg_show(&req, path);
1170 else if (NULL != req.q.query)
1171 pg_search(&req);
1172 else
1173 pg_index(&req);
1174
1175 free(req.q.manpath);
1176 free(req.q.arch);
1177 free(req.q.sec);
1178 free(req.q.query);
1179 for (i = 0; i < (int)req.psz; i++)
1180 free(req.p[i]);
1181 free(req.p);
1182 return EXIT_SUCCESS;
1183}
1184
1185/*
1186 * Translate PATH_INFO to a query.
1187 */
1188static void
1189parse_path_info(struct req *req, const char *path)
1190{
1191 const char *name, *sec, *end;
1192
1193 req->isquery = 0;
1194 req->q.equal = 1;
1195 req->q.manpath = NULL;
1196 req->q.arch = NULL;
1197
1198 /* Mandatory manual page name. */
1199 if ((name = strrchr(path, '/')) == NULL)
1200 name = path;
1201 else
1202 name++;
1203
1204 /* Optional trailing section. */
1205 sec = strrchr(name, '.');
1206 if (sec != NULL && isdigit((unsigned char)*++sec)) {
1207 req->q.query = mandoc_strndup(name, sec - name - 1);
1208 req->q.sec = mandoc_strdup(sec);
1209 } else {
1210 req->q.query = mandoc_strdup(name);
1211 req->q.sec = NULL;
1212 }
1213
1214 /* Handle the case of name[.section] only. */
1215 if (name == path)
1216 return;
1217
1218 /* Optional manpath. */
1219 end = strchr(path, '/');
1220 req->q.manpath = mandoc_strndup(path, end - path);
1221 if (validate_manpath(req, req->q.manpath)) {
1222 path = end + 1;
1223 if (name == path)
1224 return;
1225 } else {
1226 free(req->q.manpath);
1227 req->q.manpath = NULL;
1228 }
1229
1230 /* Optional section. */
1231 if (strncmp(path, "man", 3) == 0 || strncmp(path, "cat", 3) == 0) {
1232 path += 3;
1233 end = strchr(path, '/');
1234 free(req->q.sec);
1235 req->q.sec = mandoc_strndup(path, end - path);
1236 path = end + 1;
1237 if (name == path)
1238 return;
1239 }
1240
1241 /* Optional architecture. */
1242 end = strchr(path, '/');
1243 if (end + 1 != name) {
1244 pg_error_badrequest(
1245 "You specified too many directory components.");
1246 exit(EXIT_FAILURE);
1247 }
1248 req->q.arch = mandoc_strndup(path, end - path);
1249 if (validate_arch(req->q.arch) == 0) {
1250 pg_error_badrequest(
1251 "You specified an invalid directory component.");
1252 exit(EXIT_FAILURE);
1253 }
1254}
1255
1256/*
1257 * Scan for indexable paths.
1258 */
1259static void
1260parse_manpath_conf(struct req *req)
1261{
1262 FILE *fp;
1263 char *dp;
1264 size_t dpsz;
1265 ssize_t len;
1266
1267 if ((fp = fopen("manpath.conf", "r")) == NULL) {
1268 warn("%s/manpath.conf", MAN_DIR);
1269 pg_error_internal();
1270 exit(EXIT_FAILURE);
1271 }
1272
1273 dp = NULL;
1274 dpsz = 0;
1275
1276 while ((len = getline(&dp, &dpsz, fp)) != -1) {
1277 if (dp[len - 1] == '\n')
1278 dp[--len] = '\0';
1279 req->p = mandoc_realloc(req->p,
1280 (req->psz + 1) * sizeof(char *));
1281 if ( ! validate_urifrag(dp)) {
1282 warnx("%s/manpath.conf contains "
1283 "unsafe path \"%s\"", MAN_DIR, dp);
1284 pg_error_internal();
1285 exit(EXIT_FAILURE);
1286 }
1287 if (strchr(dp, '/') != NULL) {
1288 warnx("%s/manpath.conf contains "
1289 "path with slash \"%s\"", MAN_DIR, dp);
1290 pg_error_internal();
1291 exit(EXIT_FAILURE);
1292 }
1293 req->p[req->psz++] = dp;
1294 dp = NULL;
1295 dpsz = 0;
1296 }
1297 free(dp);
1298
1299 if (req->p == NULL) {
1300 warnx("%s/manpath.conf is empty", MAN_DIR);
1301 pg_error_internal();
1302 exit(EXIT_FAILURE);
1303 }
1304}