From 4b65ef822eb51e241111c5e2189226e2c541797e Mon Sep 17 00:00:00 2001 From: Tyrel Souza <923113+tyrelsouza@users.noreply.github.com> Date: Tue, 30 Nov 2021 15:27:22 -0500 Subject: [PATCH] lexer chapter 1 complete --- lexer/lexer.go | 43 +++++++++++++++++++++++++++++++++++++ lexer/lexer_test.go | 39 ++++++++++++++++++++++++++++++++-- token/token.go | 52 ++++++++++++++++++++++++++++----------------- 3 files changed, 112 insertions(+), 22 deletions(-) diff --git a/lexer/lexer.go b/lexer/lexer.go index 441f7ed..e7e8ce8 100644 --- a/lexer/lexer.go +++ b/lexer/lexer.go @@ -30,6 +30,8 @@ func (l *Lexer) readChar() { func (l *Lexer) NextToken() token.Token { var tok token.Token + l.skipWhitespace() + switch l.ch { case '=': tok = newToken(token.ASSIGN, l.ch) @@ -50,6 +52,18 @@ func (l *Lexer) NextToken() token.Token { case 0: tok.Literal = "" tok.Type = token.EOF + default: + if isLetter(l.ch) { + tok.Literal = l.readIdentifier() + tok.Type = token.LookupIdent(tok.Literal) + return tok + } else if isDigit(l.ch) { + tok.Type = token.INT + tok.Literal = l.readNumber() + return tok + } else { + tok = newToken(token.ILLEGAL, l.ch) + } } l.readChar() @@ -59,3 +73,32 @@ func (l *Lexer) NextToken() token.Token { func newToken(tokenType token.TokenType, ch byte) token.Token { return token.Token{Type: tokenType, Literal: string(ch)} } + +func (l *Lexer) readIdentifier() string { + position := l.position + for isLetter(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} + +func isLetter(ch byte) bool { + return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_' +} +func isDigit(ch byte) bool { + return '0' <= ch && ch <= '9' +} + +func (l *Lexer) skipWhitespace() { + for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' { + l.readChar() + } +} + +func (l *Lexer) readNumber() string { + position := l.position + for isDigit(l.ch) { + l.readChar() + } + return l.input[position:l.position] +} diff --git a/lexer/lexer_test.go b/lexer/lexer_test.go index 12717b6..7e90b2b 100644 --- a/lexer/lexer_test.go +++ b/lexer/lexer_test.go @@ -7,19 +7,54 @@ import ( ) func TestNextToken(t *testing.T) { - input := `=+(){},;` + input := `let five = 5; +let ten = 10; + +let add = fn(x,y) { + x + y; +}; +let result = add(five, ten); +` tests := []struct { expectedType token.TokenType expectedLiteral string }{ + {token.LET, "let"}, + {token.IDENT, "five"}, {token.ASSIGN, "="}, - {token.PLUS, "+"}, + {token.INT, "5"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "ten"}, + {token.ASSIGN, "="}, + {token.INT, "10"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "add"}, + {token.ASSIGN, "="}, + {token.FUNCTION, "fn"}, {token.LPAREN, "("}, + {token.IDENT, "x"}, + {token.COMMA, ","}, + {token.IDENT, "y"}, {token.RPAREN, ")"}, {token.LBRACE, "{"}, + {token.IDENT, "x"}, + {token.PLUS, "+"}, + {token.IDENT, "y"}, + {token.SEMICOLON, ";"}, {token.RBRACE, "}"}, + {token.SEMICOLON, ";"}, + {token.LET, "let"}, + {token.IDENT, "result"}, + {token.ASSIGN, "="}, + {token.IDENT, "add"}, + {token.LPAREN, "("}, + {token.IDENT, "five"}, {token.COMMA, ","}, + {token.IDENT, "ten"}, + {token.RPAREN, ")"}, {token.SEMICOLON, ";"}, {token.EOF, ""}, } diff --git a/token/token.go b/token/token.go index f3d6779..901e47d 100644 --- a/token/token.go +++ b/token/token.go @@ -3,32 +3,44 @@ package token type TokenType string type Token struct { - Type TokenType - Literal string + Type TokenType + Literal string } const ( - ILLEGAL = "ILLEGAL" - EOF = "EOF" + ILLEGAL = "ILLEGAL" + EOF = "EOF" - //Identifiers and literals - IDENT = "IDENT" //add, foo, bar, a, b, c... - INT = "INT" /// 1 2 3 4 5 + //Identifiers and literals + IDENT = "IDENT" //add, foo, bar, a, b, c... + INT = "INT" /// 1 2 3 4 5 - //OPERATORS - ASSIGN = "=" - PLUS = "+" + //OPERATORS + ASSIGN = "=" + PLUS = "+" - //DELIM - COMMA = "" - SEMICOLON = ";" + //DELIM + COMMA = "" + SEMICOLON = ";" - LPAREN = "(" - RPAREN = ")" - LBRACE = "{" - RBRACE = "}" + LPAREN = "(" + RPAREN = ")" + LBRACE = "{" + RBRACE = "}" - // KEYWORDS - FUNCTION = "FUNCTION" - LET = "LET" + // KEYWORDS + FUNCTION = "FUNCTION" + LET = "LET" ) + +var keywords = map[string]TokenType{ + "fn": FUNCTION, + "let": LET, +} + +func LookupIdent(ident string) TokenType { + if tok, ok := keywords[ident]; ok { + return tok + } + return IDENT +}