lexer chapter 1 complete

2021-11-30 15:27:22 -05:00 · 2021-11-30 15:27:22 -05:00 · 4b65ef822e
commit 4b65ef822e
parent d93f87c889
3 changed files with 112 additions and 22 deletions
--- a/lexer/lexer.go
+++ b/lexer/lexer.go
@ -30,6 +30,8 @@ func (l *Lexer) readChar() {
 func (l *Lexer) NextToken() token.Token {
 	var tok token.Token

+	l.skipWhitespace()
+
 	switch l.ch {
 	case '=':
 		tok = newToken(token.ASSIGN, l.ch)
@ -50,6 +52,18 @@ func (l *Lexer) NextToken() token.Token {
 	case 0:
 		tok.Literal = ""
 		tok.Type = token.EOF
+	default:
+		if isLetter(l.ch) {
+			tok.Literal = l.readIdentifier()
+			tok.Type = token.LookupIdent(tok.Literal)
+			return tok
+		} else if isDigit(l.ch) {
+			tok.Type = token.INT
+			tok.Literal = l.readNumber()
+			return tok
+		} else {
+			tok = newToken(token.ILLEGAL, l.ch)
+		}
 	}

 	l.readChar()
@ -59,3 +73,32 @@ func (l *Lexer) NextToken() token.Token {
 func newToken(tokenType token.TokenType, ch byte) token.Token {
 	return token.Token{Type: tokenType, Literal: string(ch)}
 }
+
+func (l *Lexer) readIdentifier() string {
+	position := l.position
+	for isLetter(l.ch) {
+		l.readChar()
+	}
+	return l.input[position:l.position]
+}
+
+func isLetter(ch byte) bool {
+	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
+}
+func isDigit(ch byte) bool {
+	return '0' <= ch && ch <= '9'
+}
+
+func (l *Lexer) skipWhitespace() {
+	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
+		l.readChar()
+	}
+}
+
+func (l *Lexer) readNumber() string {
+	position := l.position
+	for isDigit(l.ch) {
+		l.readChar()
+	}
+	return l.input[position:l.position]
+}
--- a/lexer/lexer_test.go
+++ b/lexer/lexer_test.go
@ -7,19 +7,54 @@ import (
 )

 func TestNextToken(t *testing.T) {
-	input := `=+(){},;`
+	input := `let five = 5;
+let ten = 10;
+
+let add = fn(x,y) {
+	x + y;
+};
+let result = add(five, ten);
+`

 	tests := []struct {
 		expectedType    token.TokenType
 		expectedLiteral string
 	}{
+		{token.LET, "let"},
+		{token.IDENT, "five"},
 		{token.ASSIGN, "="},
-		{token.PLUS, "+"},
+		{token.INT, "5"},
+		{token.SEMICOLON, ";"},
+		{token.LET, "let"},
+		{token.IDENT, "ten"},
+		{token.ASSIGN, "="},
+		{token.INT, "10"},
+		{token.SEMICOLON, ";"},
+		{token.LET, "let"},
+		{token.IDENT, "add"},
+		{token.ASSIGN, "="},
+		{token.FUNCTION, "fn"},
 		{token.LPAREN, "("},
+		{token.IDENT, "x"},
+		{token.COMMA, ","},
+		{token.IDENT, "y"},
 		{token.RPAREN, ")"},
 		{token.LBRACE, "{"},
+		{token.IDENT, "x"},
+		{token.PLUS, "+"},
+		{token.IDENT, "y"},
+		{token.SEMICOLON, ";"},
 		{token.RBRACE, "}"},
+		{token.SEMICOLON, ";"},
+		{token.LET, "let"},
+		{token.IDENT, "result"},
+		{token.ASSIGN, "="},
+		{token.IDENT, "add"},
+		{token.LPAREN, "("},
+		{token.IDENT, "five"},
 		{token.COMMA, ","},
+		{token.IDENT, "ten"},
+		{token.RPAREN, ")"},
 		{token.SEMICOLON, ";"},
 		{token.EOF, ""},
 	}
--- a/token/token.go
+++ b/token/token.go
@ -3,32 +3,44 @@ package token
 type TokenType string

 type Token struct {
-  Type TokenType
-  Literal string
+	Type    TokenType
+	Literal string
 }

 const (
-  ILLEGAL = "ILLEGAL"
-  EOF = "EOF"
+	ILLEGAL = "ILLEGAL"
+	EOF     = "EOF"

-  //Identifiers and literals
-  IDENT = "IDENT" //add, foo, bar, a, b, c...
-  INT = "INT" /// 1 2 3 4 5
+	//Identifiers and literals
+	IDENT = "IDENT" //add, foo, bar, a, b, c...
+	INT   = "INT"   /// 1 2 3 4 5

-  //OPERATORS
-  ASSIGN = "="
-  PLUS = "+"
+	//OPERATORS
+	ASSIGN = "="
+	PLUS   = "+"

-  //DELIM
-  COMMA = ""
-  SEMICOLON = ";"
+	//DELIM
+	COMMA     = ""
+	SEMICOLON = ";"

-  LPAREN = "("
-  RPAREN = ")"
-  LBRACE = "{"
-  RBRACE = "}"
+	LPAREN = "("
+	RPAREN = ")"
+	LBRACE = "{"
+	RBRACE = "}"

-  // KEYWORDS
-  FUNCTION = "FUNCTION"
-  LET = "LET"
+	// KEYWORDS
+	FUNCTION = "FUNCTION"
+	LET      = "LET"
 )
+
+var keywords = map[string]TokenType{
+	"fn":  FUNCTION,
+	"let": LET,
+}
+
+func LookupIdent(ident string) TokenType {
+	if tok, ok := keywords[ident]; ok {
+		return tok
+	}
+	return IDENT
+}