IRC parsing, tokenization, and state handling in C#
1# IRC parser tests
2# splitting messages into usable atoms
3
4# Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
5#
6# To the extent possible under law, the author(s) have dedicated all copyright
7# and related and neighboring rights to this software to the public domain
8# worldwide. This software is distributed without any warranty.
9#
10# You should have received a copy of the CC0 Public Domain Dedication along
11# with this software. If not, see
12# <http://creativecommons.org/publicdomain/zero/1.0/>.
13
14# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
15# https://github.com/grawity/code/tree/master/lib/tests
16# some of the tests here originate from Mozilla's test vectors, which is public domain
17# https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
18# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
19# https://github.com/SaberUK/ircparser/tree/master/test
20
21# we follow RFC1459 with regards to multiple ascii spaces splitting atoms:
22# The prefix, command, and all parameters are
23# separated by one (or more) ASCII space character(s) (0x20).
24# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane
25
26# input is the string coming directly from the server to parse
27
28# the atoms dict has the keys:
29# * tags: tags dict
30# tags with no value are an empty string
31# * source: source string, without single leading colon
32# * verb: verb string
33# * params: params split up as a list
34# if the params key does not exist, assume it is empty
35# if any other keys do no exist, assume they are null
36# a key that is null does not exist or is not specified with the
37# given input string
38
39# simple
40- input: "foo bar baz asdf"
41 atoms:
42 verb: "foo"
43 params:
44 - "bar"
45 - "baz"
46 - "asdf"
47
48# with source
49- input: ":coolguy foo bar baz asdf"
50 atoms:
51 source: "coolguy"
52 verb: "foo"
53 params:
54 - "bar"
55 - "baz"
56 - "asdf"
57
58# with trailing param
59- input: "foo bar baz :asdf quux"
60 atoms:
61 verb: "foo"
62 params:
63 - "bar"
64 - "baz"
65 - "asdf quux"
66
67- input: "foo bar baz :"
68 atoms:
69 verb: "foo"
70 params:
71 - "bar"
72 - "baz"
73 - ""
74
75- input: "foo bar baz ::asdf"
76 atoms:
77 verb: "foo"
78 params:
79 - "bar"
80 - "baz"
81 - ":asdf"
82
83# with source and trailing param
84- input: ":coolguy foo bar baz :asdf quux"
85 atoms:
86 source: "coolguy"
87 verb: "foo"
88 params:
89 - "bar"
90 - "baz"
91 - "asdf quux"
92
93- input: ":coolguy foo bar baz : asdf quux "
94 atoms:
95 source: "coolguy"
96 verb: "foo"
97 params:
98 - "bar"
99 - "baz"
100 - " asdf quux "
101
102- input: ":coolguy PRIVMSG bar :lol :) "
103 atoms:
104 source: "coolguy"
105 verb: "PRIVMSG"
106 params:
107 - "bar"
108 - "lol :) "
109
110- input: ":coolguy foo bar baz :"
111 atoms:
112 source: "coolguy"
113 verb: "foo"
114 params:
115 - "bar"
116 - "baz"
117 - ""
118
119- input: ":coolguy foo bar baz : "
120 atoms:
121 source: "coolguy"
122 verb: "foo"
123 params:
124 - "bar"
125 - "baz"
126 - " "
127
128# with tags
129- input: "@a=b;c=32;k;rt=ql7 foo"
130 atoms:
131 verb: "foo"
132 tags:
133 "a": "b"
134 "c": "32"
135 "k":
136 "rt": "ql7"
137
138# with escaped tags
139- input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo"
140 atoms:
141 verb: "foo"
142 tags:
143 "a": "b\\and\nk"
144 "c": "72 45"
145 "d": "gh;764"
146
147# with tags and source
148- input: "@c;h=;a=b :quux ab cd"
149 atoms:
150 tags:
151 "c":
152 "h": ""
153 "a": "b"
154 source: "quux"
155 verb: "ab"
156 params:
157 - "cd"
158
159# different forms of last param
160- input: ":src JOIN #chan"
161 atoms:
162 source: "src"
163 verb: "JOIN"
164 params:
165 - "#chan"
166
167- input: ":src JOIN :#chan"
168 atoms:
169 source: "src"
170 verb: "JOIN"
171 params:
172 - "#chan"
173
174# with and without last param
175- input: ":src AWAY"
176 atoms:
177 source: "src"
178 verb: "AWAY"
179
180- input: ":src AWAY "
181 atoms:
182 source: "src"
183 verb: "AWAY"
184
185# tab is not considered <SPACE>
186- input: ":cool\tguy foo bar baz"
187 atoms:
188 source: "cool\tguy"
189 verb: "foo"
190 params:
191 - "bar"
192 - "baz"
193
194# with weird control codes in the source
195- input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz"
196 atoms:
197 source: "coolguy!ag@net\x035w\x03ork.admin"
198 verb: "PRIVMSG"
199 params:
200 - "foo"
201 - "bar baz"
202
203- input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz"
204 atoms:
205 source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin"
206 verb: "PRIVMSG"
207 params:
208 - "foo"
209 - "bar baz"
210
211- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3"
212 atoms:
213 tags:
214 tag1: "value1"
215 tag2:
216 vendor1/tag3: "value2"
217 vendor2/tag4: ""
218 source: "irc.example.com"
219 verb: "COMMAND"
220 params:
221 - "param1"
222 - "param2"
223 - "param3 param3"
224
225- input: ":irc.example.com COMMAND param1 param2 :param3 param3"
226 atoms:
227 source: "irc.example.com"
228 verb: "COMMAND"
229 params:
230 - "param1"
231 - "param2"
232 - "param3 param3"
233
234- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3"
235 atoms:
236 tags:
237 tag1: "value1"
238 tag2:
239 vendor1/tag3: "value2"
240 vendor2/tag4:
241 verb: "COMMAND"
242 params:
243 - "param1"
244 - "param2"
245 - "param3 param3"
246
247- input: "COMMAND"
248 atoms:
249 verb: "COMMAND"
250
251# yaml encoding + slashes is fun
252- input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
253 atoms:
254 tags:
255 foo: "\\\\;\\s \r\n"
256 verb: "COMMAND"
257
258# broken messages from unreal
259- input: ":gravel.mozilla.org 432 #momo :Erroneous Nickname: Illegal characters"
260 atoms:
261 source: "gravel.mozilla.org"
262 verb: "432"
263 params:
264 - "#momo"
265 - "Erroneous Nickname: Illegal characters"
266
267- input: ":gravel.mozilla.org MODE #tckk +n "
268 atoms:
269 source: "gravel.mozilla.org"
270 verb: "MODE"
271 params:
272 - "#tckk"
273 - "+n"
274
275- input: ":services.esper.net MODE #foo-bar +o foobar "
276 atoms:
277 source: "services.esper.net"
278 verb: "MODE"
279 params:
280 - "#foo-bar"
281 - "+o"
282 - "foobar"
283
284# tag values should be parsed char-at-a-time to prevent wayward replacements.
285- input: "@tag1=value\\\\ntest COMMAND"
286 atoms:
287 tags:
288 tag1: "value\\ntest"
289 verb: "COMMAND"
290
291# If a tag value has a slash followed by a character which doesn't need
292# to be escaped, the slash should be dropped.
293- input: "@tag1=value\\1 COMMAND"
294 atoms:
295 tags:
296 tag1: "value1"
297 verb: "COMMAND"
298
299# A slash at the end of a tag value should be dropped
300- input: "@tag1=value1\\ COMMAND"
301 atoms:
302 tags:
303 tag1: "value1"
304 verb: "COMMAND"
305
306# Duplicate tags: Parsers SHOULD disregard all but the final occurence
307- input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND"
308 atoms:
309 tags:
310 tag1: "5"
311 tag2: "3"
312 tag3: "4"
313 verb: "COMMAND"
314
315# vendored tags can have the same name as a non-vendored tag
316- input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND"
317 atoms:
318 tags:
319 tag1: "5"
320 tag2: "3"
321 tag3: "4"
322 vendor/tag2: "8"
323 verb: "COMMAND"
324
325# Some parsers handle /MODE in a special way, make sure they do it right
326- input: ":SomeOp MODE #channel :+i"
327 atoms:
328 source: "SomeOp"
329 verb: "MODE"
330 params:
331 - "#channel"
332 - "+i"
333
334- input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser"
335 atoms:
336 source: "SomeOp"
337 verb: "MODE"
338 params:
339 - "#channel"
340 - "+oo"
341 - "SomeUser"
342 - "AnotherUser"