param ( $PsxFile ) enum TokenType { OPEN_ELEMENT_START # < CLOSE_ELEMENT # > VOID_ELEMENT_CLOSE # /> EQUAL # = OPEN_ELEMENT_END # CLOSED_VOID = 128 #
END_OPENED = 256 #
,
ILLEGAL = 2048 VALID_ATTRIBUTE_NAME_START = (4 + 8 + 32 + 2048) } class Token { [TokenType] $Type [String] $Literal Token($Type, $Literal) { $this.Type = $Type $this.Literal = $Literal } [String] ToString() { return "{0} {1}" -f $this.Type, $this.Literal } [Boolean] Equals($that) { return (($this.Type -eq $that.Type) -and ($this.Literal -eq $that.Literal)) } } class Lexer { [String] $LexInput [Int] $Pos [Int] $NextPos [String] $Char [String] $CurrentTag = '' [ElementState] $State = [ElementState]::UNOPENED Lexer($LexInput) { $this.LexInput = $LexInput $this.Pos = 0 $this.NextPos = 1 $this.Char = [String]($LexInput[0]) } hidden [String] PopChar() { if ($this.NextPos -ge $this.LexInput.Length) { $this.Char = '' } else { $this.Char = $this.LexInput[$this.NextPos] } $this.Pos = $this.NextPos $this.NextPos++ return $this.Char } hidden [String] PeekChar() { if ($this.NextPos -ge $this.LexInput.Length) { return '' } else { return $this.LexInput[$this.NextPos] } } hidden [Boolean] CharIsIdentifier() { return $this.Char -match '[a-zA-Z0-9_-]' } hidden [String] PopIdentifier() { $Start = $this.Pos while ($this.CharIsIdentifier()) { $this.PopChar() } # js string.substring is (indexStart, indexEnd) # dotnet String.SubString is (startIndex, length) return $this.LexInput.SubString($Start, $this.Pos-$Start) } hidden [String] PeekIdentifier() { $Start = $this.Pos $OldNext = $this.NextPos $OldChar = $this.Char while ($this.CharIsIdentifier()) { $this.PopChar() } $ret = $this.LexInput.SubString($Start, $this.Pos-$Start) # there's a better way to do this but that is for later $this.Pos = $Start $this.NextPos = $OldNext $this.Char = $OldChar return $ret } hidden [String] PopQuotedAttrValue() { if ($this.Char -notin '"',"'",'{') { $this.Die($this.Char) } $ExpectingQuote = $this.Char -eq '{' ? '}' : $this.Char $this.PopChar() # first " $Start = $this.Pos while ($this.PopChar() -ne $ExpectingQuote) {} $End = $this.Pos $this.PopChar() # final " return $this.LexInput.SubString($Start, $End - $Start) } hidden [Void] ConsumeWhitespace() { while ($this.Char -match '\s') { $this.PopChar() } } hidden [Void] Die([String] $Char) { throw "Unexpected token $char (char $($this.Pos)) at state $($this.State)" } [Token] NextToken() { $Token = $null if ($this.Pos -eq $this.LexInput.Length) { return [Token]::new( [TokenType]::EOF, '' ) } if ($this.State.HasFlag([ElementState]::ILLEGAL)) { throw "Lexer found in illegal state $($this.State)" } switch -Regex ($this.Char) { '<' { switch ($this.State) { {$_ -eq [ElementState]::UNOPENED} { $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_START, $this.Char ) $this.State = [ElementState]::OPENED } {$_ -eq [ElementState]::CLOSED} { if ($this.PeekChar() -eq '/') { $Token = [Token]::new( [TokenType]::OPEN_ELEMENT_END, $this.Char + $this.PopChar() ) $this.State = [ElementState]::END_OPENED } } default { $this.Die('<') } } } '/' { switch ($this.State) { {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} { if ($this.PeekChar() -eq '>') { $Token = [Token]::new( [TokenType]::VOID_ELEMENT_CLOSE, $this.Char + $this.PopChar() ) $this.State = [ElementState]::CLOSED_VOID } } default { $this.Die('/') } } } '>' { # check for nested psx here!!! switch ($this.State) { {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} { $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char ) $this.State = [ElementState]::CLOSED } {$_ -eq [ElementState]::END_AFTER_TAG} { $Token = [Token]::new( [TokenType]::CLOSE_ELEMENT, $this.Char ) $this.State = [ElementState]::END_CLOSED } default { $this.Die('>') } } $Token = [Token]::new([TokenType]::CLOSE_ELEMENT, $this.Char) } {$_ -in '"', "'"} { # TODO: check for nested PowerShell if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) { $this.Die($_) } $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE return [Token]::new( [TokenType]::ATTR_VALUE , $this.PopQuotedAttrValue() ) } '{' { if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_EQUALS) { $this.Die($_) } $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE return [Token]::new( [TokenType]::ATTR_VALUE_SCRIPTBLOCK , $this.PopQuotedAttrValue() ) } '@' { $this.PopChar() # drop @ if ($this.State -notin [ElementState]::AFTER_TAG, [ElementState]::AFTER_ATTRIBUTE_NAME, [ElementState]::AFTER_ATTRIBUTE_VALUE ) { $this.Die('@') } return [Token]::new( [TokenType]::ATTR_SPLAT, $this.PopIdentifier() ) } '[a-zA-Z0-9_-]' { $TokType = $null switch ($this.State) { {$_ -eq [ElementState]::OPENED} { $TokType = [TokenType]::TAG_NAME $this.CurrentTag = $this.PeekIdentifier() $this.State = [ElementState]::AFTER_TAG } {[ElementState]::VALID_ATTRIBUTE_NAME_START.HasFlag($_)} { $TokType = [TokenType]::ATTR_NAME $this.State = [ElementState]::AFTER_ATTRIBUTE_NAME } {$_ -eq [ElementState]::AFTER_ATTRIBUTE_EQUALS} { $TokType = [TokenType]::ATTR_VALUE $this.State = [ElementState]::AFTER_ATTRIBUTE_VALUE } {$_ -eq [ElementState]::END_OPENED} { $Cur = $this.CurrentTag $Peek = $this.PeekIdentifier() if ($Cur -ne $Peek) { throw "Start tag name ($Cur) and end tag name ($Peek) doesn't match" } $TokType = [TokenType]::TAG_NAME $this.State = [ElementState]::END_AFTER_TAG } default { $this.Die( $this.Char ) } } return [Token]::new($TokType, $this.PopIdentifier()) } '=' { if ($this.State -ne [ElementState]::AFTER_ATTRIBUTE_NAME) { $this.Die('=') } $Token = [Token]::new([TokenType]::EQUAL, $this.Char) $this.State = [ElementState]::AFTER_ATTRIBUTE_EQUALS } '\s' { $this.ConsumeWhitespace() return $this.NextToken() } default { $Token = [Token]::new([TokenType]::ILLEGAL, $this.LexInput.Substring($this.Pos)) $this.State = [ElementState]::ILLEGAL } } $this.PopChar() return $Token } }