Reactos

[CMD] FOR: Additional Windows' CMD compatibility "fixes" for FOR /F token parsing command.

This compatibility behaviour implements the buggy behaviour of FOR /F
token parsing that can be observed in Windows' CMD, and that is tested
by the cmd_winetests.
It can be disabled at compile time via the MSCMD_FOR_QUIRKS define.

It fixes additional cmd_winetests, in concert with commit cb2a9c31.

Explanation of the implemented buggy behaviour
==============================================

In principle, the "tokens=x,y,m-n[*]" option describes a list of token
numbers (must be between 1 and 31) that will be assigned into variables.
Theoretically this option does not cumulate: only the latest 'tokens='
specification should be taken into account.

However things are not that simple in practice. First, not all of the
"tokens=" option state is reset when more than one specification is
provided. Second, when specifying a token range, e.g. "1-5", Windows'
CMD just ignores without error ranges that are not specified in
increasing order. Thus for example, a range "5-1" is ignored without
error. Then, token numbers strictly greater than 31 are just ignored,
and if they appear in a range, the whole range is ignored.

Another bug is the following one: suppose that the 'tokens'
specification reads:
"tokens=1-5,1-30" , or: "tokens=1-5,3" ,
i.e. more than one range, that overlap partially. Then the actual total
number of variables will not be of the larger range size, but will be
the sum, instead.
Thus, in the first example, a total of 5 + 30 == 35 variables (> 31) is
allocated, while in the second example, a total of 5 + 1 == 6 variables
is allocated, even if they won't all store data !!
In the first example, only the first 30 FOR variables will be used, and
the 5 others will contain an empty string. In the second example, only
the first 5 FOR variables will be used, and the other one will be empty.

We also see that due to that, the "Variables" buffer of fixed size
cannot always be used (since it can contain at most 32 variables).

Last but not least, when more than one "tokens=" specification is
provided, for example:
"tokens=1-31 tokens=1-20"
a total number of 31 FOR variables (because 31 is the max of 31 and 20)
is allocated, **but** only 20 are actually used, and the 11 others
return an empty string.

And in the specification: "tokens=1-31,* tokens=1-20", a total of
31 + 1 + 20 = 52 variables is initialized, but only the first 20 will
be used, and no "remaining-line" token (the '*' one) is used.

+135 -30
+135 -30
base/shell/cmd/for.c
··· 32 32 33 33 #include "precomp.h" 34 34 35 + /* Enable this define for "buggy" Windows' CMD FOR-command compatibility. 36 + * Currently, this enables the buggy behaviour of FOR /F token parsing. */ 37 + #define MSCMD_FOR_QUIRKS 38 + 35 39 36 40 /* FOR is a special command, so this function is only used for showing help now */ 37 41 INT cmd_for(LPTSTR param) ··· 121 125 static INT ForF(PARSED_COMMAND *Cmd, LPTSTR List, TCHAR *Buffer) 122 126 { 123 127 LPTSTR Delims = _T(" \t"); 124 - LPTSTR DelimsEndPtr = NULL; 128 + PTCHAR DelimsEndPtr = NULL; 125 129 TCHAR DelimsEndChr = _T('\0'); 126 130 TCHAR Eol = _T(';'); 127 131 INT SkipLines = 0; 128 - DWORD Tokens = (1 << 1); 129 - BOOL RemainderVar = FALSE; 132 + DWORD TokensMask = (1 << 1); 133 + #ifdef MSCMD_FOR_QUIRKS 134 + DWORD NumTokens = 1; 135 + DWORD RemainderVar = 0; 136 + #else 137 + DWORD NumTokens = 0; 138 + #endif 130 139 TCHAR StringQuote = _T('"'); 131 140 TCHAR CommandQuote = _T('\''); 132 141 LPTSTR Variables[32]; 133 - TCHAR *Start, *End; 134 - INT i; 142 + PTCHAR Start, End; 135 143 INT Ret = 0; 136 144 137 145 if (Cmd->For.Params) 138 146 { 139 147 TCHAR Quote = 0; 140 - TCHAR *Param = Cmd->For.Params; 148 + PTCHAR Param = Cmd->For.Params; 141 149 if (*Param == _T('"') || *Param == _T('\'')) 142 150 Quote = *Param++; 143 151 ··· 161 169 { 162 170 if (*Param == _T(' ')) 163 171 { 164 - TCHAR *FirstSpace = Param; 172 + PTCHAR FirstSpace = Param; 165 173 Param += _tcsspn(Param, _T(" ")); 166 174 /* Exclude trailing spaces if this is not the last parameter */ 167 175 if (*Param && *Param != Quote) ··· 197 205 } 198 206 else if (_tcsnicmp(Param, _T("tokens="), 7) == 0) 199 207 { 208 + #ifdef MSCMD_FOR_QUIRKS 209 + DWORD NumToksInSpec = 0; // Number of tokens in this specification. 210 + #endif 200 211 Param += 7; 201 - /* tokens=x,y,m-n: List of token numbers (must be between 202 - * 1 and 31) that will be assigned into variables. */ 203 - Tokens = 0; 212 + /* 213 + * tokens=x,y,m-n: List of token numbers (must be between 1 and 31) 214 + * that will be assigned into variables. This option does not cumulate: 215 + * only the latest 'tokens=' specification is taken into account. 216 + * 217 + * NOTE: In MSCMD_FOR_QUIRKS mode, for Windows' CMD compatibility, 218 + * not all the tokens-state is reset. This leads to subtle bugs. 219 + */ 220 + TokensMask = 0; 221 + #ifdef MSCMD_FOR_QUIRKS 222 + NumToksInSpec = 0; 223 + // Windows' CMD compatibility: bug: the asterisk-token's position is not reset! 224 + // RemainderVar = 0; 225 + #else 226 + NumTokens = 0; 227 + #endif 228 + 204 229 while (*Param && *Param != Quote && *Param != _T('*')) 205 230 { 206 231 INT First = _tcstol(Param, &Param, 0); 207 232 INT Last = First; 233 + #ifdef MSCMD_FOR_QUIRKS 208 234 if (First < 1) 235 + #else 236 + if ((First < 1) || (First > 31)) 237 + #endif 209 238 goto error; 210 239 if (*Param == _T('-')) 211 240 { 212 241 /* It's a range of tokens */ 213 242 Last = _tcstol(Param + 1, &Param, 0); 214 - if (Last < First || Last > 31) 243 + #ifdef MSCMD_FOR_QUIRKS 244 + /* Ignore the range if the endpoints are not in correct order */ 245 + if (Last < 1) 246 + #else 247 + if ((Last < First) || (Last > 31)) 248 + #endif 215 249 goto error; 216 250 } 217 - Tokens |= (2 << Last) - (1 << First); 251 + #ifdef MSCMD_FOR_QUIRKS 252 + /* Ignore the range if the endpoints are not in correct order */ 253 + if ((First <= Last) && (Last <= 31)) 254 + { 255 + #endif 256 + TokensMask |= (2 << Last) - (1 << First); 257 + #ifdef MSCMD_FOR_QUIRKS 258 + NumToksInSpec += (Last - First + 1); 259 + } 260 + #endif 218 261 219 262 if (*Param != _T(',')) 220 263 break; ··· 222 265 } 223 266 /* With an asterisk at the end, an additional variable 224 267 * will be created to hold the remainder of the line 225 - * (after the last token specified). */ 268 + * (after the last specified token). */ 226 269 if (*Param == _T('*')) 227 270 { 228 - RemainderVar = TRUE; 271 + #ifdef MSCMD_FOR_QUIRKS 272 + RemainderVar = ++NumToksInSpec; 273 + #else 274 + ++NumTokens; 275 + #endif 229 276 Param++; 230 277 } 278 + #ifdef MSCMD_FOR_QUIRKS 279 + NumTokens = max(NumTokens, NumToksInSpec); 280 + #endif 231 281 } 232 282 else if (_tcsnicmp(Param, _T("useback"), 7) == 0) 233 283 { ··· 248 298 } 249 299 } 250 300 301 + #ifdef MSCMD_FOR_QUIRKS 302 + /* Windows' CMD compatibility: use the wrongly evaluated number of tokens */ 303 + fc->varcount = NumTokens; 304 + /* Allocate a large enough variables array if needed */ 305 + if (NumTokens <= ARRAYSIZE(Variables)) 306 + { 307 + fc->values = Variables; 308 + } 309 + else 310 + { 311 + fc->values = cmd_alloc(fc->varcount * sizeof(*fc->values)); 312 + if (!fc->values) 313 + { 314 + error_out_of_memory(); 315 + return 1; 316 + } 317 + } 318 + #else 251 319 /* Count how many variables will be set: one for each token, 252 - * plus maybe one for the remainder */ 253 - fc->varcount = RemainderVar; 254 - for (i = 1; i < 32; i++) 255 - fc->varcount += (Tokens >> i & 1); 320 + * plus maybe one for the remainder. */ 321 + fc->varcount = NumTokens; 322 + for (NumTokens = 1; NumTokens < 32; ++NumTokens) 323 + fc->varcount += (TokensMask >> NumTokens) & 1; 256 324 fc->values = Variables; 325 + #endif 257 326 258 327 if (*List == StringQuote || *List == CommandQuote) 259 328 { ··· 267 336 End = List; 268 337 while (!ExitingOrGoto(Cmd) && GetNextElement(&Start, &End)) 269 338 { 270 - FILE *InputFile; 339 + FILE* InputFile; 271 340 LPTSTR FullInput, In, NextLine; 272 341 INT Skip; 273 342 single_element: ··· 280 349 } 281 350 else if (*Start == CommandQuote && End[-1] == CommandQuote) 282 351 { 283 - /* Read input from a command */ 352 + /* 353 + * Read input from a command. We let the CRT do the ANSI/UNICODE conversion. 354 + * NOTE: Should we do that, or instead read in binary mode and 355 + * do the conversion by ourselves, using *OUR* current codepage?? 356 + */ 284 357 End[-1] = _T('\0'); 285 358 InputFile = _tpopen(Start + 1, _T("r")); 286 359 if (!InputFile) 287 360 { 288 361 error_bad_command(Start + 1); 289 - return 1; 362 + Ret = 1; 363 + goto Quit; 290 364 } 291 365 FullInput = ReadFileContents(InputFile, Buffer); 292 366 _pclose(InputFile); ··· 302 376 if (!InputFile) 303 377 { 304 378 error_sfile_not_found(Start); 305 - return 1; 379 + Ret = 1; 380 + goto Quit; 306 381 } 307 382 FullInput = ReadFileContents(InputFile, Buffer); 308 383 fclose(InputFile); ··· 311 386 if (!FullInput) 312 387 { 313 388 error_out_of_memory(); 314 - return 1; 389 + Ret = 1; 390 + goto Quit; 315 391 } 316 392 317 393 /* Patch the delimiters string */ ··· 326 402 !ExitingOrGoto(Cmd) && (In != NULL); 327 403 In = NextLine) 328 404 { 329 - DWORD RemainingTokens = Tokens; 330 - LPTSTR *CurVar = Variables; 405 + DWORD RemainingTokens = TokensMask; 406 + LPTSTR* CurVar = fc->values; 407 + 408 + ZeroMemory(fc->values, fc->varcount * sizeof(*fc->values)); 409 + #ifdef MSCMD_FOR_QUIRKS 410 + NumTokens = fc->varcount; 411 + #endif 331 412 332 413 NextLine = _tcschr(In, _T('\n')); 333 414 if (NextLine) ··· 341 422 if (*In == Eol) 342 423 continue; 343 424 344 - while ((RemainingTokens >>= 1) != 0) 425 + /* Loop as long as we have not reached the end of 426 + * the line, and that we have tokens available. 427 + * A maximum of 31 tokens will be enumerated. */ 428 + while (*In && ((RemainingTokens >>= 1) != 0)) 345 429 { 346 430 /* Save pointer to this token in a variable if requested */ 347 431 if (RemainingTokens & 1) 432 + { 433 + #ifdef MSCMD_FOR_QUIRKS 434 + --NumTokens; 435 + #endif 348 436 *CurVar++ = In; 437 + } 349 438 /* Find end of token */ 350 439 In += _tcscspn(In, Delims); 351 440 /* NULL-terminate it and advance to next token */ ··· 355 444 In += _tcsspn(In, Delims); 356 445 } 357 446 } 358 - /* Save pointer to remainder of line */ 359 - *CurVar = In; 447 + 448 + /* Save pointer to remainder of the line if we need to do so */ 449 + if (*In) 450 + #ifdef MSCMD_FOR_QUIRKS 451 + if (RemainderVar && (fc->varcount - NumTokens + 1 == RemainderVar)) 452 + #endif 453 + { 454 + /* NOTE: This sets fc->values[0] at least, if no tokens 455 + * were initialized so far, since CurVar is initialized 456 + * originally to point to fc->values. */ 457 + *CurVar = In; 458 + } 360 459 361 - /* Don't run unless the line had enough tokens to fill at least one variable */ 362 - if (*Variables[0]) 460 + /* Don't run unless we have at least one variable filled */ 461 + if (fc->values[0]) 363 462 Ret = RunInstance(Cmd); 364 463 } 365 464 ··· 369 468 370 469 cmd_free(FullInput); 371 470 } 471 + 472 + Quit: 473 + #ifdef MSCMD_FOR_QUIRKS 474 + if (fc->values && (fc->values != Variables)) 475 + cmd_free(fc->values); 476 + #endif 372 477 373 478 return Ret; 374 479 }