IRC parsing, tokenization, and state handling in C#
at ircrobotsv2 342 lines 7.6 kB view raw
1# IRC parser tests 2# splitting messages into usable atoms 3 4# Written in 2015 by Daniel Oaks <daniel@danieloaks.net> 5# 6# To the extent possible under law, the author(s) have dedicated all copyright 7# and related and neighboring rights to this software to the public domain 8# worldwide. This software is distributed without any warranty. 9# 10# You should have received a copy of the CC0 Public Domain Dedication along 11# with this software. If not, see 12# <http://creativecommons.org/publicdomain/zero/1.0/>. 13 14# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed 15# https://github.com/grawity/code/tree/master/lib/tests 16# some of the tests here originate from Mozilla's test vectors, which is public domain 17# https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js 18# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here 19# https://github.com/SaberUK/ircparser/tree/master/test 20 21# we follow RFC1459 with regards to multiple ascii spaces splitting atoms: 22# The prefix, command, and all parameters are 23# separated by one (or more) ASCII space character(s) (0x20). 24# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane 25 26# input is the string coming directly from the server to parse 27 28# the atoms dict has the keys: 29# * tags: tags dict 30# tags with no value are an empty string 31# * source: source string, without single leading colon 32# * verb: verb string 33# * params: params split up as a list 34# if the params key does not exist, assume it is empty 35# if any other keys do no exist, assume they are null 36# a key that is null does not exist or is not specified with the 37# given input string 38 39# simple 40- input: "foo bar baz asdf" 41 atoms: 42 verb: "foo" 43 params: 44 - "bar" 45 - "baz" 46 - "asdf" 47 48# with source 49- input: ":coolguy foo bar baz asdf" 50 atoms: 51 source: "coolguy" 52 verb: "foo" 53 params: 54 - "bar" 55 - "baz" 56 - "asdf" 57 58# with trailing param 59- input: "foo bar baz :asdf quux" 60 atoms: 61 verb: "foo" 62 params: 63 - "bar" 64 - "baz" 65 - "asdf quux" 66 67- input: "foo bar baz :" 68 atoms: 69 verb: "foo" 70 params: 71 - "bar" 72 - "baz" 73 - "" 74 75- input: "foo bar baz ::asdf" 76 atoms: 77 verb: "foo" 78 params: 79 - "bar" 80 - "baz" 81 - ":asdf" 82 83# with source and trailing param 84- input: ":coolguy foo bar baz :asdf quux" 85 atoms: 86 source: "coolguy" 87 verb: "foo" 88 params: 89 - "bar" 90 - "baz" 91 - "asdf quux" 92 93- input: ":coolguy foo bar baz : asdf quux " 94 atoms: 95 source: "coolguy" 96 verb: "foo" 97 params: 98 - "bar" 99 - "baz" 100 - " asdf quux " 101 102- input: ":coolguy PRIVMSG bar :lol :) " 103 atoms: 104 source: "coolguy" 105 verb: "PRIVMSG" 106 params: 107 - "bar" 108 - "lol :) " 109 110- input: ":coolguy foo bar baz :" 111 atoms: 112 source: "coolguy" 113 verb: "foo" 114 params: 115 - "bar" 116 - "baz" 117 - "" 118 119- input: ":coolguy foo bar baz : " 120 atoms: 121 source: "coolguy" 122 verb: "foo" 123 params: 124 - "bar" 125 - "baz" 126 - " " 127 128# with tags 129- input: "@a=b;c=32;k;rt=ql7 foo" 130 atoms: 131 verb: "foo" 132 tags: 133 "a": "b" 134 "c": "32" 135 "k": 136 "rt": "ql7" 137 138# with escaped tags 139- input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo" 140 atoms: 141 verb: "foo" 142 tags: 143 "a": "b\\and\nk" 144 "c": "72 45" 145 "d": "gh;764" 146 147# with tags and source 148- input: "@c;h=;a=b :quux ab cd" 149 atoms: 150 tags: 151 "c": 152 "h": "" 153 "a": "b" 154 source: "quux" 155 verb: "ab" 156 params: 157 - "cd" 158 159# different forms of last param 160- input: ":src JOIN #chan" 161 atoms: 162 source: "src" 163 verb: "JOIN" 164 params: 165 - "#chan" 166 167- input: ":src JOIN :#chan" 168 atoms: 169 source: "src" 170 verb: "JOIN" 171 params: 172 - "#chan" 173 174# with and without last param 175- input: ":src AWAY" 176 atoms: 177 source: "src" 178 verb: "AWAY" 179 180- input: ":src AWAY " 181 atoms: 182 source: "src" 183 verb: "AWAY" 184 185# tab is not considered <SPACE> 186- input: ":cool\tguy foo bar baz" 187 atoms: 188 source: "cool\tguy" 189 verb: "foo" 190 params: 191 - "bar" 192 - "baz" 193 194# with weird control codes in the source 195- input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz" 196 atoms: 197 source: "coolguy!ag@net\x035w\x03ork.admin" 198 verb: "PRIVMSG" 199 params: 200 - "foo" 201 - "bar baz" 202 203- input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz" 204 atoms: 205 source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin" 206 verb: "PRIVMSG" 207 params: 208 - "foo" 209 - "bar baz" 210 211- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3" 212 atoms: 213 tags: 214 tag1: "value1" 215 tag2: 216 vendor1/tag3: "value2" 217 vendor2/tag4: "" 218 source: "irc.example.com" 219 verb: "COMMAND" 220 params: 221 - "param1" 222 - "param2" 223 - "param3 param3" 224 225- input: ":irc.example.com COMMAND param1 param2 :param3 param3" 226 atoms: 227 source: "irc.example.com" 228 verb: "COMMAND" 229 params: 230 - "param1" 231 - "param2" 232 - "param3 param3" 233 234- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3" 235 atoms: 236 tags: 237 tag1: "value1" 238 tag2: 239 vendor1/tag3: "value2" 240 vendor2/tag4: 241 verb: "COMMAND" 242 params: 243 - "param1" 244 - "param2" 245 - "param3 param3" 246 247- input: "COMMAND" 248 atoms: 249 verb: "COMMAND" 250 251# yaml encoding + slashes is fun 252- input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND" 253 atoms: 254 tags: 255 foo: "\\\\;\\s \r\n" 256 verb: "COMMAND" 257 258# broken messages from unreal 259- input: ":gravel.mozilla.org 432 #momo :Erroneous Nickname: Illegal characters" 260 atoms: 261 source: "gravel.mozilla.org" 262 verb: "432" 263 params: 264 - "#momo" 265 - "Erroneous Nickname: Illegal characters" 266 267- input: ":gravel.mozilla.org MODE #tckk +n " 268 atoms: 269 source: "gravel.mozilla.org" 270 verb: "MODE" 271 params: 272 - "#tckk" 273 - "+n" 274 275- input: ":services.esper.net MODE #foo-bar +o foobar " 276 atoms: 277 source: "services.esper.net" 278 verb: "MODE" 279 params: 280 - "#foo-bar" 281 - "+o" 282 - "foobar" 283 284# tag values should be parsed char-at-a-time to prevent wayward replacements. 285- input: "@tag1=value\\\\ntest COMMAND" 286 atoms: 287 tags: 288 tag1: "value\\ntest" 289 verb: "COMMAND" 290 291# If a tag value has a slash followed by a character which doesn't need 292# to be escaped, the slash should be dropped. 293- input: "@tag1=value\\1 COMMAND" 294 atoms: 295 tags: 296 tag1: "value1" 297 verb: "COMMAND" 298 299# A slash at the end of a tag value should be dropped 300- input: "@tag1=value1\\ COMMAND" 301 atoms: 302 tags: 303 tag1: "value1" 304 verb: "COMMAND" 305 306# Duplicate tags: Parsers SHOULD disregard all but the final occurence 307- input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND" 308 atoms: 309 tags: 310 tag1: "5" 311 tag2: "3" 312 tag3: "4" 313 verb: "COMMAND" 314 315# vendored tags can have the same name as a non-vendored tag 316- input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND" 317 atoms: 318 tags: 319 tag1: "5" 320 tag2: "3" 321 tag3: "4" 322 vendor/tag2: "8" 323 verb: "COMMAND" 324 325# Some parsers handle /MODE in a special way, make sure they do it right 326- input: ":SomeOp MODE #channel :+i" 327 atoms: 328 source: "SomeOp" 329 verb: "MODE" 330 params: 331 - "#channel" 332 - "+i" 333 334- input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser" 335 atoms: 336 source: "SomeOp" 337 verb: "MODE" 338 params: 339 - "#channel" 340 - "+oo" 341 - "SomeUser" 342 - "AnotherUser"