jcs's openbsd hax
openbsd
at jcs 168 lines 4.9 kB view raw
1/* $OpenBSD: text.c,v 1.3 2017/04/18 14:16:48 nicm Exp $ */ 2 3/* 4 * Copyright (c) 2015 Nicholas Marriott <nicm@openbsd.org> 5 * 6 * Permission to use, copy, modify, and distribute this software for any 7 * purpose with or without fee is hereby granted, provided that the above 8 * copyright notice and this permission notice appear in all copies. 9 * 10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 14 * WHATSOEVER RESULTING FROM LOSS OF MIND, USE, DATA OR PROFITS, WHETHER 15 * IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING 16 * OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 17 */ 18 19#include <sys/types.h> 20 21#include <ctype.h> 22#include <string.h> 23 24#include "file.h" 25#include "magic.h" 26#include "xmalloc.h" 27 28static const char *text_words[][3] = { 29 { "msgid", "PO (gettext message catalogue)", "text/x-po" }, 30 { "dnl", "M4 macro language pre-processor", "text/x-m4" }, 31 { "import", "Java program", "text/x-java" }, 32 { "\"libhdr\"", "BCPL program", "text/x-bcpl" }, 33 { "\"LIBHDR\"", "BCPL program", "text/x-bcpl" }, 34 { "//", "C++ program", "text/x-c++" }, 35 { "virtual", "C++ program", "text/x-c++" }, 36 { "class", "C++ program", "text/x-c++" }, 37 { "public:", "C++ program", "text/x-c++" }, 38 { "private:", "C++ program", "text/x-c++" }, 39 { "/*", "C program", "text/x-c" }, 40 { "#include", "C program", "text/x-c" }, 41 { "char", "C program", "text/x-c" }, 42 { "The", "English", "text/plain" }, 43 { "the", "English", "text/plain" }, 44 { "double", "C program", "text/x-c" }, 45 { "extern", "C program", "text/x-c" }, 46 { "float", "C program", "text/x-c" }, 47 { "struct", "C program", "text/x-c" }, 48 { "union", "C program", "text/x-c" }, 49 { "CFLAGS", "make commands", "text/x-makefile" }, 50 { "LDFLAGS", "make commands", "text/x-makefile" }, 51 { "all:", "make commands", "text/x-makefile" }, 52 { ".PRECIOUS", "make commands", "text/x-makefile" }, 53 { ".ascii", "assembler program", "text/x-asm" }, 54 { ".asciiz", "assembler program", "text/x-asm" }, 55 { ".byte", "assembler program", "text/x-asm" }, 56 { ".even", "assembler program", "text/x-asm" }, 57 { ".globl", "assembler program", "text/x-asm" }, 58 { ".text", "assembler program", "text/x-asm" }, 59 { "clr", "assembler program", "text/x-asm" }, 60 { "(input", "Pascal program", "text/x-pascal" }, 61 { "program", "Pascal program", "text/x-pascal" }, 62 { "record", "Pascal program", "text/x-pascal" }, 63 { "dcl", "PL/1 program", "text/x-pl1" }, 64 { "Received:", "mail", "text/x-mail" }, 65 { ">From", "mail", "text/x-mail" }, 66 { "Return-Path:", "mail", "text/x-mail" }, 67 { "Cc:", "mail", "text/x-mail" }, 68 { "Newsgroups:", "news", "text/x-news" }, 69 { "Path:", "news", "text/x-news" }, 70 { "Organization:", "news", "text/x-news" }, 71 { "href=", "HTML document", "text/html" }, 72 { "HREF=", "HTML document", "text/html" }, 73 { "<body", "HTML document", "text/html" }, 74 { "<BODY", "HTML document", "text/html" }, 75 { "<html", "HTML document", "text/html" }, 76 { "<HTML", "HTML document", "text/html" }, 77 { "<!--", "HTML document", "text/html" }, 78 { NULL, NULL, NULL } 79}; 80 81static int 82text_is_ascii(u_char c) 83{ 84 const char cc[] = "\007\010\011\012\014\015\033"; 85 86 if (c == '\0') 87 return (0); 88 if (strchr(cc, c) != NULL) 89 return (1); 90 return (c > 31 && c < 127); 91} 92 93static int 94text_is_latin1(u_char c) 95{ 96 if (c >= 160) 97 return (1); 98 return (text_is_ascii(c)); 99} 100 101static int 102text_is_extended(u_char c) 103{ 104 if (c >= 128) 105 return (1); 106 return (text_is_ascii(c)); 107} 108 109static int 110text_try_test(const void *base, size_t size, int (*f)(u_char)) 111{ 112 const u_char *data = base; 113 size_t offset; 114 115 for (offset = 0; offset < size; offset++) { 116 if (!f(data[offset])) 117 return (0); 118 } 119 return (1); 120} 121 122const char * 123text_get_type(const void *base, size_t size) 124{ 125 if (text_try_test(base, size, text_is_ascii)) 126 return ("ASCII"); 127 if (text_try_test(base, size, text_is_latin1)) 128 return ("ISO-8859"); 129 if (text_try_test(base, size, text_is_extended)) 130 return ("Non-ISO extended-ASCII"); 131 return (NULL); 132} 133 134const char * 135text_try_words(const void *base, size_t size, int flags) 136{ 137 const char *cp, *end, *next, *word; 138 size_t wordlen; 139 u_int i; 140 141 end = (const char *)base + size; 142 for (cp = base; cp != end; /* nothing */) { 143 while (cp != end && isspace((u_char)*cp)) 144 cp++; 145 146 next = cp; 147 while (next != end && !isspace((u_char)*next)) 148 next++; 149 150 for (i = 0; /* nothing */; i++) { 151 word = text_words[i][0]; 152 if (word == NULL) 153 break; 154 wordlen = strlen(word); 155 156 if ((size_t)(next - cp) != wordlen) 157 continue; 158 if (memcmp(cp, word, wordlen) != 0) 159 continue; 160 if (flags & MAGIC_TEST_MIME) 161 return (text_words[i][2]); 162 return (text_words[i][1]); 163 } 164 165 cp = next; 166 } 167 return (NULL); 168}