From 3679537a80ac84bee6ea7b5719b7edd6114b24a1 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Wed, 5 May 2021 15:57:33 +0200 Subject: [PATCH 01/17] Fix pytest-cov setup --- lark-sandbox/setup.cfg | 1 - 1 file changed, 1 deletion(-) diff --git a/lark-sandbox/setup.cfg b/lark-sandbox/setup.cfg index 77f7877..e9c84f5 100644 --- a/lark-sandbox/setup.cfg +++ b/lark-sandbox/setup.cfg @@ -19,7 +19,6 @@ addopts = # Print all `print(...)` statements in the console --capture=no # pytest-cov: - --cov=app --cov-report=term:skip-covered --cov-report=html --cov-report=xml From ecfc4829f8863fe351960173f23b9c54d96b8b5c Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Wed, 5 May 2021 15:59:01 +0200 Subject: [PATCH 02/17] Create a simple grammar for calculator --- lark-sandbox/calculator.lark | 16 ++++++++++++++++ lark-sandbox/calculator_test.py | 12 ++++++++++++ 2 files changed, 28 insertions(+) create mode 100644 lark-sandbox/calculator.lark create mode 100644 lark-sandbox/calculator_test.py diff --git a/lark-sandbox/calculator.lark b/lark-sandbox/calculator.lark new file mode 100644 index 0000000..4c6cdb0 --- /dev/null +++ b/lark-sandbox/calculator.lark @@ -0,0 +1,16 @@ +expression : term (("+" | "-") term)* + +term : factor (("*" | "/") factor)* + +factor : ("+" | "-") factor + | power + +power : primary ("**" factor)* + +primary : NUMBER + | "(" expression ")" + +%import common.WS_INLINE +%import common.NUMBER + +%ignore WS_INLINE diff --git a/lark-sandbox/calculator_test.py b/lark-sandbox/calculator_test.py new file mode 100644 index 0000000..85b1516 --- /dev/null +++ b/lark-sandbox/calculator_test.py @@ -0,0 +1,12 @@ +import os + +from lark import Lark + +grammar = open(os.path.join(os.path.dirname(__file__), "calculator.lark"), "r").read() +parser = Lark(grammar, start="expression") + + +def test_calculator(): + ast = parser.parse("(1 + 2) * 3 - -4 ** 5") + assert ast + print(ast.pretty()) From b107bcca6bf4345cb7cb3226ed8a70e0dcd746a8 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Wed, 5 May 2021 16:10:59 +0200 Subject: [PATCH 03/17] Define operators --- lark-sandbox/calculator.lark | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/lark-sandbox/calculator.lark b/lark-sandbox/calculator.lark index 4c6cdb0..27d5f05 100644 --- a/lark-sandbox/calculator.lark +++ b/lark-sandbox/calculator.lark @@ -1,15 +1,23 @@ -expression : term (("+" | "-") term)* +expression : term ((PLUS | MINUS) term)* -term : factor (("*" | "/") factor)* +term : factor ((MULTIPLY | DIVIDE) factor)* -factor : ("+" | "-") factor +factor : (PLUS | MINUS) factor | power -power : primary ("**" factor)* +power : primary (POWER factor)* primary : NUMBER | "(" expression ")" +// Operators + +PLUS : "+" +MINUS : "-" +MULTIPLY : "*" +DIVIDE : "/" +POWER : "**" + %import common.WS_INLINE %import common.NUMBER From c86635fbdf98b8d298b0e95dcd9314d62cc8663f Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Wed, 5 May 2021 19:12:49 +0200 Subject: [PATCH 04/17] WIP --- src/Calculator/Ast.ts | 23 ++++++ src/Calculator/Calculator.test.ts | 0 src/Calculator/Calculator.ts | 0 src/Calculator/Parser.test.ts | 41 ++++++++++ src/Calculator/Parser.ts | 113 ++++++++++++++++++++++++++++ src/Calculator/Tokenizer.test.ts | 20 +++++ src/Calculator/Tokenizer.ts | 119 ++++++++++++++++++++++++++++++ 7 files changed, 316 insertions(+) create mode 100644 src/Calculator/Ast.ts create mode 100644 src/Calculator/Calculator.test.ts create mode 100644 src/Calculator/Calculator.ts create mode 100644 src/Calculator/Parser.test.ts create mode 100644 src/Calculator/Parser.ts create mode 100644 src/Calculator/Tokenizer.test.ts create mode 100644 src/Calculator/Tokenizer.ts diff --git a/src/Calculator/Ast.ts b/src/Calculator/Ast.ts new file mode 100644 index 0000000..99b5f1d --- /dev/null +++ b/src/Calculator/Ast.ts @@ -0,0 +1,23 @@ +abstract class AstNode {} + +export class BinaryOperation extends AstNode { + constructor( + public left: AstNode, + public operator: "+" | "-" | "*" | "/" | "**", + public right: AstNode + ) { + super(); + } +} + +export class UnaryOperation extends AstNode { + constructor(public operator: "+" | "-", public child: AstNode) { + super(); + } +} + +export class NumberLiteral extends AstNode { + constructor(public value: number) { + super(); + } +} diff --git a/src/Calculator/Calculator.test.ts b/src/Calculator/Calculator.test.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/Calculator/Calculator.ts b/src/Calculator/Calculator.ts new file mode 100644 index 0000000..e69de29 diff --git a/src/Calculator/Parser.test.ts b/src/Calculator/Parser.test.ts new file mode 100644 index 0000000..9ef127c --- /dev/null +++ b/src/Calculator/Parser.test.ts @@ -0,0 +1,41 @@ +import { BinaryOperation, NumberLiteral, UnaryOperation } from "./Ast"; +import { Parser } from "./Parser"; +import { Tokenizer } from "./Tokenizer"; + +describe("Parser", () => { + it("generates a valid AST", () => { + const tokens = new Tokenizer("(1 + 2) * 3 - -4 ** 5").tokenize(); + const ast = new Parser(tokens).parse(); + + expect(ast).toEqual( + new BinaryOperation( + new BinaryOperation( + new BinaryOperation(new NumberLiteral(1), "+", new NumberLiteral(2)), + "*", + new NumberLiteral(3) + ), + "-", + new UnaryOperation( + "-", + new BinaryOperation(new NumberLiteral(4), "**", new NumberLiteral(5)) + ) + ) + ); + }); + + it("has nice errors reporting", () => { + const tokens = new Tokenizer("(1 + 2 *").tokenize(); + + expect(() => new Parser(tokens).parse()).toThrow( + "Unexpected token eof at position 5." + ); + }); + + it("has nice errors reporting", () => { + const tokens = new Tokenizer("** **").tokenize(); + + expect(() => new Parser(tokens).parse()).toThrow( + "Unexpected token ** at position 0." + ); + }); +}); diff --git a/src/Calculator/Parser.ts b/src/Calculator/Parser.ts new file mode 100644 index 0000000..29ab68b --- /dev/null +++ b/src/Calculator/Parser.ts @@ -0,0 +1,113 @@ +import { AstNode } from "../Parser"; +import { BinaryOperation, NumberLiteral, UnaryOperation } from "./Ast"; +import { Token, TokenType } from "./Tokenizer"; + +export class Parser { + private position = 0; + + constructor(private tokens: Token[]) {} + + parse(): AstNode { + return this.expression(); + } + + // term ((PLUS | MINUS) term)* + private expression(): AstNode { + let left = this.term(); + + while (this.currentToken.type === "+" || this.currentToken.type === "-") { + const operator = this.currentToken.type; + this.consume(operator); + + left = new BinaryOperation(left, operator, this.term()); + } + + return left; + } + + // factor ((MULTIPLY | DIVIDE) factor)* + private term(): AstNode { + let left = this.factor(); + + while (this.currentToken.type === "*" || this.currentToken.type === "/") { + const operator = this.currentToken.type; + this.consume(operator); + + left = new BinaryOperation(left, operator, this.factor()); + } + + return left; + } + + // : (PLUS | MINUS) factor + // | power + private factor(): AstNode { + if (this.currentToken.type === "+" || this.currentToken.type === "-") { + const operator = this.currentToken.type; + this.consume(operator); + + return new UnaryOperation(operator, this.factor()); + } + + return this.power(); + } + + // primary (POWER factor)* + private power(): AstNode { + let left = this.primary(); + + while (this.currentToken.type === "**") { + this.consume("**"); + left = new BinaryOperation(left, "**", this.factor()); + } + + return left; + } + + // : NUMBER + // | group + private primary(): AstNode { + if (this.currentToken.type === "number") { + const value = this.currentToken.value!; + this.consume("number"); + + return new NumberLiteral(value); + } + + if (this.currentToken.type === "(") { + return this.group(); + } + + // TODO: Improve this error reporting + throw new Error( + `Unexpected token ${this.currentToken.type} at position ${this.position}.` + ); + } + + // "(" expression ")" + private group(): AstNode { + this.consume("("); + const expression = this.expression(); + this.consume(")"); + + return expression; + } + + private get currentToken(): Token { + return this.tokens[this.position]; + } + + private consume(tokenType: TokenType): void { + if (this.currentToken.type !== tokenType) { + throw new Error( + `Expected ${tokenType} but got ${this.currentToken.type} at position ${this.currentToken.position}.` + ); + } + + this.advance(); + } + + private advance() { + this.position += 1; + } +} diff --git a/src/Calculator/Tokenizer.test.ts b/src/Calculator/Tokenizer.test.ts new file mode 100644 index 0000000..ebd67dd --- /dev/null +++ b/src/Calculator/Tokenizer.test.ts @@ -0,0 +1,20 @@ +import { Tokenizer, Token } from "./Tokenizer"; + +describe("Tokenizer", () => { + it("tokenizes the expression", () => { + const tokens = new Tokenizer("(1 + 12) * 3 ** 1234").tokenize(); + + expect(tokens).toEqual([ + new Token(0, "("), + new Token(1, "number", 1), + new Token(3, "+"), + new Token(5, "number", 12), + new Token(7, ")"), + new Token(9, "*"), + new Token(11, "number", 3), + new Token(13, "**"), + new Token(16, "number", 1234), + new Token(20, "eof"), + ]); + }); +}); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts new file mode 100644 index 0000000..7042ba4 --- /dev/null +++ b/src/Calculator/Tokenizer.ts @@ -0,0 +1,119 @@ +export type TokenType = + | "+" + | "-" + | "*" + | "/" + | "**" + | "(" + | ")" + | "number" + | "eof"; + +export class Token { + constructor( + public position: number, + public type: TokenType, + public value?: number + ) {} +} + +const isDigit = (symbol: string): boolean => { + return /^\d$/.test(symbol); +}; + +export class Tokenizer { + private position = -1; + + constructor(private expression: string) {} + + tokenize(): Token[] { + const tokens: Token[] = []; + + while (true) { + const token = this.nextToken(); + + tokens.push(token); + + if (token.type === "eof") { + break; + } + } + + return tokens; + } + + private nextToken(): Token { + this.advance(); + this.skipWhitespaces(); + + switch (this.currentSymbol) { + case "+": + return this.createToken("+"); + case "-": + return this.createToken("-"); + case "*": { + if (this.nextSymbol === "*") { + const token = this.createToken("**"); + this.advance(); + + return token; + } + + return this.createToken("*"); + } + case "/": + return this.createToken("/"); + case "(": + return this.createToken("("); + case ")": + return this.createToken(")"); + default: { + if (this.currentSymbol === undefined) { + return this.createToken("eof"); + } + + if (isDigit(this.currentSymbol)) { + return this.tokenizeNumber(); + } + + throw Error( + `Unrecognized character ${this.currentSymbol} at ${this.position}` + ); + } + } + } + + private advance() { + this.position += 1; + } + + private skipWhitespaces(): void { + while (this.currentSymbol === " ") { + this.advance(); + } + } + + private get currentSymbol(): undefined | string { + return this.expression[this.position]; + } + + private get nextSymbol(): undefined | string { + return this.expression[this.position + 1]; + } + + private tokenizeNumber(): Token { + const position = this.position; + let raw = this.currentSymbol!; + + while (this.nextSymbol !== undefined && isDigit(this.nextSymbol)) { + this.advance(); + raw += this.currentSymbol!; + } + + return new Token(position, "number", parseFloat(raw)); + } + + private createToken(type: TokenType, value?: number): Token { + return new Token(this.position, type, value); + } +} From c4c5446520a89f353332341a8f3a6dba5b9efd11 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Wed, 5 May 2021 19:49:51 +0200 Subject: [PATCH 05/17] Evaluate expression --- src/Calculator/Ast.ts | 37 ++++++++++++++++++++++++++++++++++- src/Calculator/Parser.test.ts | 8 +++++++- src/Calculator/Parser.ts | 6 ++---- 3 files changed, 45 insertions(+), 6 deletions(-) diff --git a/src/Calculator/Ast.ts b/src/Calculator/Ast.ts index 99b5f1d..e51b75b 100644 --- a/src/Calculator/Ast.ts +++ b/src/Calculator/Ast.ts @@ -1,4 +1,6 @@ -abstract class AstNode {} +export abstract class AstNode { + abstract evaluate(): number; +} export class BinaryOperation extends AstNode { constructor( @@ -8,16 +10,49 @@ export class BinaryOperation extends AstNode { ) { super(); } + + evaluate(): number { + const left = this.left.evaluate(); + const right = this.right.evaluate(); + + switch (this.operator) { + case "+": + return left + right; + case "-": + return left + right; + case "*": + return left * right; + case "/": + return left / right; + case "**": + return left ** right; + } + } } export class UnaryOperation extends AstNode { constructor(public operator: "+" | "-", public child: AstNode) { super(); } + + evaluate(): number { + const value = this.child.evaluate(); + + switch (this.operator) { + case "+": + return value; + case "-": + return value * -1; + } + } } export class NumberLiteral extends AstNode { constructor(public value: number) { super(); } + + evaluate(): number { + return this.value; + } } diff --git a/src/Calculator/Parser.test.ts b/src/Calculator/Parser.test.ts index 9ef127c..7e32921 100644 --- a/src/Calculator/Parser.test.ts +++ b/src/Calculator/Parser.test.ts @@ -23,11 +23,17 @@ describe("Parser", () => { ); }); + it("evaluates", () => { + const tokens = new Tokenizer("1 + 2 * 3").tokenize(); + const ast = new Parser(tokens).parse(); + expect(ast.evaluate()).toBe(7); + }); + it("has nice errors reporting", () => { const tokens = new Tokenizer("(1 + 2 *").tokenize(); expect(() => new Parser(tokens).parse()).toThrow( - "Unexpected token eof at position 5." + "Unexpected token eof at position 8." ); }); diff --git a/src/Calculator/Parser.ts b/src/Calculator/Parser.ts index 29ab68b..88194c1 100644 --- a/src/Calculator/Parser.ts +++ b/src/Calculator/Parser.ts @@ -1,5 +1,4 @@ -import { AstNode } from "../Parser"; -import { BinaryOperation, NumberLiteral, UnaryOperation } from "./Ast"; +import { AstNode, BinaryOperation, NumberLiteral, UnaryOperation } from "./Ast"; import { Token, TokenType } from "./Tokenizer"; export class Parser { @@ -78,9 +77,8 @@ export class Parser { return this.group(); } - // TODO: Improve this error reporting throw new Error( - `Unexpected token ${this.currentToken.type} at position ${this.position}.` + `Unexpected token ${this.currentToken.type} at position ${this.currentToken.position}.` ); } From 09b017d71b4e6937eab0fb9acbae03ace30ee152 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 07:15:01 +0200 Subject: [PATCH 06/17] Tokenize indentifier --- src/Calculator/Ast.ts | 10 +++++-- src/Calculator/Parser.ts | 4 +-- src/Calculator/Token.ts | 12 ++++++++ src/Calculator/Tokenizer.test.ts | 13 +++++---- src/Calculator/Tokenizer.ts | 49 +++++++++++++++++--------------- 5 files changed, 55 insertions(+), 33 deletions(-) create mode 100644 src/Calculator/Token.ts diff --git a/src/Calculator/Ast.ts b/src/Calculator/Ast.ts index e51b75b..81bb0f2 100644 --- a/src/Calculator/Ast.ts +++ b/src/Calculator/Ast.ts @@ -1,3 +1,5 @@ +import { Operator } from "./Token"; + export abstract class AstNode { abstract evaluate(): number; } @@ -5,7 +7,7 @@ export abstract class AstNode { export class BinaryOperation extends AstNode { constructor( public left: AstNode, - public operator: "+" | "-" | "*" | "/" | "**", + public operator: Operator, public right: AstNode ) { super(); @@ -26,12 +28,14 @@ export class BinaryOperation extends AstNode { return left / right; case "**": return left ** right; + default: + throw new Error(`Unsupported operator ${this.operator}`); } } } export class UnaryOperation extends AstNode { - constructor(public operator: "+" | "-", public child: AstNode) { + constructor(public operator: Operator, public child: AstNode) { super(); } @@ -43,6 +47,8 @@ export class UnaryOperation extends AstNode { return value; case "-": return value * -1; + default: + throw new Error(`Unsupported operator ${this.operator}`); } } } diff --git a/src/Calculator/Parser.ts b/src/Calculator/Parser.ts index 88194c1..0a21569 100644 --- a/src/Calculator/Parser.ts +++ b/src/Calculator/Parser.ts @@ -1,5 +1,5 @@ import { AstNode, BinaryOperation, NumberLiteral, UnaryOperation } from "./Ast"; -import { Token, TokenType } from "./Tokenizer"; +import { Token, TokenType } from "./Token"; export class Parser { private position = 0; @@ -67,7 +67,7 @@ export class Parser { // | group private primary(): AstNode { if (this.currentToken.type === "number") { - const value = this.currentToken.value!; + const value = this.currentToken.value as number; this.consume("number"); return new NumberLiteral(value); diff --git a/src/Calculator/Token.ts b/src/Calculator/Token.ts new file mode 100644 index 0000000..810eeb0 --- /dev/null +++ b/src/Calculator/Token.ts @@ -0,0 +1,12 @@ +export type Operator = "+" | "-" | "*" | "/" | "**"; +export type Delimiter = "(" | ")"; +export type Literal = "number" | "identifier"; +export type TokenType = Operator | Delimiter | Literal | "eof"; + +export class Token { + constructor( + public position: number, + public type: TokenType, + public value?: string | number + ) {} +} diff --git a/src/Calculator/Tokenizer.test.ts b/src/Calculator/Tokenizer.test.ts index ebd67dd..6e841b3 100644 --- a/src/Calculator/Tokenizer.test.ts +++ b/src/Calculator/Tokenizer.test.ts @@ -1,8 +1,9 @@ -import { Tokenizer, Token } from "./Tokenizer"; +import { Token } from "./Token"; +import { Tokenizer } from "./Tokenizer"; describe("Tokenizer", () => { it("tokenizes the expression", () => { - const tokens = new Tokenizer("(1 + 12) * 3 ** 1234").tokenize(); + const tokens = new Tokenizer("(1 + 12) * foo ** 1234").tokenize(); expect(tokens).toEqual([ new Token(0, "("), @@ -11,10 +12,10 @@ describe("Tokenizer", () => { new Token(5, "number", 12), new Token(7, ")"), new Token(9, "*"), - new Token(11, "number", 3), - new Token(13, "**"), - new Token(16, "number", 1234), - new Token(20, "eof"), + new Token(11, "identifier", "foo"), + new Token(15, "**"), + new Token(18, "number", 1234), + new Token(22, "eof"), ]); }); }); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index 7042ba4..41ea18a 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -1,24 +1,11 @@ -export type TokenType = - | "+" - | "-" - | "*" - | "/" - | "**" - | "(" - | ")" - | "number" - | "eof"; - -export class Token { - constructor( - public position: number, - public type: TokenType, - public value?: number - ) {} -} +import { Token, TokenType } from "./Token"; const isDigit = (symbol: string): boolean => { - return /^\d$/.test(symbol); + return /^[0-9]$/.test(symbol); +}; + +const isCharacter = (symbol: string): boolean => { + return /^[a-z]$/.test(symbol); }; export class Tokenizer { @@ -76,6 +63,10 @@ export class Tokenizer { return this.tokenizeNumber(); } + if (isCharacter(this.currentSymbol)) { + return this.tokenizeIdentifier(); + } + throw Error( `Unrecognized character ${this.currentSymbol} at ${this.position}` ); @@ -103,17 +94,29 @@ export class Tokenizer { private tokenizeNumber(): Token { const position = this.position; - let raw = this.currentSymbol!; + let text = this.currentSymbol!; while (this.nextSymbol !== undefined && isDigit(this.nextSymbol)) { this.advance(); - raw += this.currentSymbol!; + text += this.currentSymbol!; + } + + return new Token(position, "number", parseFloat(text)); + } + + private tokenizeIdentifier(): Token { + const position = this.position; + let text = this.currentSymbol!; + + while (this.nextSymbol !== undefined && isCharacter(this.nextSymbol)) { + this.advance(); + text += this.currentSymbol!; } - return new Token(position, "number", parseFloat(raw)); + return new Token(position, "identifier", text); } - private createToken(type: TokenType, value?: number): Token { + private createToken(type: TokenType, value?: string | number): Token { return new Token(this.position, type, value); } } From 47bf46cc8be57836eb93c6807742b825225043f6 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 07:40:19 +0200 Subject: [PATCH 07/17] Regexp based tokenizer WIP --- src/Calculator/Tokenizer.test.ts | 4 + src/Calculator/Tokenizer.ts | 150 +++++++++++-------------------- 2 files changed, 56 insertions(+), 98 deletions(-) diff --git a/src/Calculator/Tokenizer.test.ts b/src/Calculator/Tokenizer.test.ts index 6e841b3..8710ab2 100644 --- a/src/Calculator/Tokenizer.test.ts +++ b/src/Calculator/Tokenizer.test.ts @@ -18,4 +18,8 @@ describe("Tokenizer", () => { new Token(22, "eof"), ]); }); + + it.skip("raises error on unrecognized character", () => { + expect(() => new Tokenizer("123 %").tokenize()).toThrow(); + }); }); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index 41ea18a..375b9f3 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -1,122 +1,76 @@ import { Token, TokenType } from "./Token"; -const isDigit = (symbol: string): boolean => { - return /^[0-9]$/.test(symbol); -}; - -const isCharacter = (symbol: string): boolean => { - return /^[a-z]$/.test(symbol); -}; +type Rule = (value: string) => Token; export class Tokenizer { private position = -1; - constructor(private expression: string) {} + private rules: [string | RegExp, undefined | Rule][] = []; + + constructor(private expression: string) { + this.addRule(/^\s+/); + this.addRule("(", () => this.createToken("(")); + this.addRule(")", () => this.createToken(")")); + this.addRule("+", () => this.createToken("+")); + this.addRule("-", () => this.createToken("-")); + this.addRule("**", () => this.createToken("**")); + this.addRule("*", () => this.createToken("*")); + this.addRule("/", () => this.createToken("/")); + this.addRule(/^\d+/, (value) => + this.createToken("number", parseFloat(value)) + ); + this.addRule(/^\w+/, (value) => this.createToken("identifier", value)); + } tokenize(): Token[] { const tokens: Token[] = []; - while (true) { - const token = this.nextToken(); - - tokens.push(token); - - if (token.type === "eof") { - break; - } - } - - return tokens; - } - - private nextToken(): Token { - this.advance(); - this.skipWhitespaces(); - - switch (this.currentSymbol) { - case "+": - return this.createToken("+"); - case "-": - return this.createToken("-"); - case "*": { - if (this.nextSymbol === "*") { - const token = this.createToken("**"); - this.advance(); - - return token; + this.position = 0; + while (!this.isEnd()) { + const rest = this.expression.slice(this.position); + + for (const [matcher, rule] of this.rules) { + if (typeof matcher === "string") { + const text = matcher; + if (rest.startsWith(text)) { + if (rule) { + tokens.push(rule(text)); + } + + this.position += text.length; + break; + } + } else { + const result = rest.match(matcher); + if (result) { + const text = result[0]; + if (rule) { + tokens.push(rule(text)); + } + + this.position += text.length; + break; + } } - - return this.createToken("*"); } - case "/": - return this.createToken("/"); - case "(": - return this.createToken("("); - case ")": - return this.createToken(")"); - default: { - if (this.currentSymbol === undefined) { - return this.createToken("eof"); - } - if (isDigit(this.currentSymbol)) { - return this.tokenizeNumber(); - } - - if (isCharacter(this.currentSymbol)) { - return this.tokenizeIdentifier(); - } - - throw Error( - `Unrecognized character ${this.currentSymbol} at ${this.position}` - ); - } + // throw new Error(`Unrecognized character ${rest}!`); } - } - - private advance() { - this.position += 1; - } - - private skipWhitespaces(): void { - while (this.currentSymbol === " ") { - this.advance(); - } - } - - private get currentSymbol(): undefined | string { - return this.expression[this.position]; - } - - private get nextSymbol(): undefined | string { - return this.expression[this.position + 1]; - } - - private tokenizeNumber(): Token { - const position = this.position; - let text = this.currentSymbol!; - while (this.nextSymbol !== undefined && isDigit(this.nextSymbol)) { - this.advance(); - text += this.currentSymbol!; - } + tokens.push(new Token(this.position, "eof")); - return new Token(position, "number", parseFloat(text)); + return tokens; } - private tokenizeIdentifier(): Token { - const position = this.position; - let text = this.currentSymbol!; - - while (this.nextSymbol !== undefined && isCharacter(this.nextSymbol)) { - this.advance(); - text += this.currentSymbol!; - } - - return new Token(position, "identifier", text); + private addRule(matcher: string | RegExp, rule?: Rule): void { + this.rules.push([matcher, rule]); } private createToken(type: TokenType, value?: string | number): Token { return new Token(this.position, type, value); } + + private isEnd(): boolean { + return this.position >= this.expression.length; + } } From 429220af17ec7ee3a050a645bbfb4c1baea14043 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 07:46:36 +0200 Subject: [PATCH 08/17] Regexp based tokenizer WIP --- src/Calculator/Tokenizer.ts | 37 ++++++++++++++++++------------------- 1 file changed, 18 insertions(+), 19 deletions(-) diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index 375b9f3..b70ecbe 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -29,28 +29,27 @@ export class Tokenizer { while (!this.isEnd()) { const rest = this.expression.slice(this.position); - for (const [matcher, rule] of this.rules) { - if (typeof matcher === "string") { - const text = matcher; - if (rest.startsWith(text)) { - if (rule) { - tokens.push(rule(text)); - } - - this.position += text.length; - break; + for (const [pattern, rule] of this.rules) { + let matchedText: undefined | string; + + if (typeof pattern === "string") { + if (rest.startsWith(pattern)) { + matchedText = pattern; } } else { - const result = rest.match(matcher); + const result = rest.match(pattern); if (result) { - const text = result[0]; - if (rule) { - tokens.push(rule(text)); - } + matchedText = result[0]; + } + } - this.position += text.length; - break; + if (matchedText) { + if (rule) { + tokens.push(rule(matchedText)); } + + this.position += matchedText.length; + break; } } @@ -62,8 +61,8 @@ export class Tokenizer { return tokens; } - private addRule(matcher: string | RegExp, rule?: Rule): void { - this.rules.push([matcher, rule]); + private addRule(pattern: string | RegExp, rule?: Rule): void { + this.rules.push([pattern, rule]); } private createToken(type: TokenType, value?: string | number): Token { From 8c3974f605748c562cc368e56ed8b57a8e9805d4 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 07:50:43 +0200 Subject: [PATCH 09/17] Regexp based tokenizer WIP --- src/Calculator/Tokenizer.test.ts | 6 ++- src/Calculator/Tokenizer.ts | 85 +++++++++++++++++++------------- 2 files changed, 55 insertions(+), 36 deletions(-) diff --git a/src/Calculator/Tokenizer.test.ts b/src/Calculator/Tokenizer.test.ts index 8710ab2..8f59946 100644 --- a/src/Calculator/Tokenizer.test.ts +++ b/src/Calculator/Tokenizer.test.ts @@ -19,7 +19,9 @@ describe("Tokenizer", () => { ]); }); - it.skip("raises error on unrecognized character", () => { - expect(() => new Tokenizer("123 %").tokenize()).toThrow(); + it("raises error on unrecognized character", () => { + expect(() => new Tokenizer("123 % 123").tokenize()).toThrow( + "Unrecognized character '%'" + ); }); }); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index b70ecbe..95b4ebf 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -1,51 +1,45 @@ import { Token, TokenType } from "./Token"; -type Rule = (value: string) => Token; +type Pattern = string | RegExp; +type Rule = (match: string) => undefined | Token; export class Tokenizer { private position = -1; - private rules: [string | RegExp, undefined | Rule][] = []; + private rules: [Pattern, Rule][] = []; constructor(private expression: string) { - this.addRule(/^\s+/); - this.addRule("(", () => this.createToken("(")); - this.addRule(")", () => this.createToken(")")); - this.addRule("+", () => this.createToken("+")); - this.addRule("-", () => this.createToken("-")); - this.addRule("**", () => this.createToken("**")); - this.addRule("*", () => this.createToken("*")); - this.addRule("/", () => this.createToken("/")); - this.addRule(/^\d+/, (value) => - this.createToken("number", parseFloat(value)) - ); - this.addRule(/^\w+/, (value) => this.createToken("identifier", value)); + this.rule(/^\s+/, () => this.skip()); + this.rule("(", () => this.accept("(")); + this.rule(")", () => this.accept(")")); + this.rule("+", () => this.accept("+")); + this.rule("-", () => this.accept("-")); + this.rule("**", () => this.accept("**")); + this.rule("*", () => this.accept("*")); + this.rule("/", () => this.accept("/")); + this.rule(/^\d+/, (match) => this.accept("number", parseFloat(match))); + this.rule(/^\w+/, (match) => this.accept("identifier", match)); } tokenize(): Token[] { + this.position = 0; + const tokens: Token[] = []; - this.position = 0; while (!this.isEnd()) { const rest = this.expression.slice(this.position); - for (const [pattern, rule] of this.rules) { - let matchedText: undefined | string; + let noMatchFound = true; - if (typeof pattern === "string") { - if (rest.startsWith(pattern)) { - matchedText = pattern; - } - } else { - const result = rest.match(pattern); - if (result) { - matchedText = result[0]; - } - } + for (const [pattern, rule] of this.rules) { + const matchedText = this.test(pattern, rest); if (matchedText) { - if (rule) { - tokens.push(rule(matchedText)); + noMatchFound = false; + + const token = rule && rule(matchedText); + if (token) { + tokens.push(token); } this.position += matchedText.length; @@ -53,7 +47,11 @@ export class Tokenizer { } } - // throw new Error(`Unrecognized character ${rest}!`); + if (noMatchFound) { + throw new Error( + `Unrecognized character ${this.expression[this.position]}` + ); + } } tokens.push(new Token(this.position, "eof")); @@ -61,15 +59,34 @@ export class Tokenizer { return tokens; } - private addRule(pattern: string | RegExp, rule?: Rule): void { + private test(pattern: Pattern, text: string): string | undefined { + if (typeof pattern === "string") { + if (text.startsWith(pattern)) { + return pattern; + } + } else { + const match = text.match(pattern); + if (match) { + return match[0]; + } + } + + return undefined; + } + + private isEnd(): boolean { + return this.position >= this.expression.length; + } + + private rule(pattern: Pattern, rule: Rule): void { this.rules.push([pattern, rule]); } - private createToken(type: TokenType, value?: string | number): Token { + private accept(type: TokenType, value?: string | number): Token { return new Token(this.position, type, value); } - private isEnd(): boolean { - return this.position >= this.expression.length; + private skip() { + return undefined; } } From 9e9461244b10013949ae5f0b7d153260b911b796 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 09:42:30 +0200 Subject: [PATCH 10/17] Implement interpreter --- src/Calculator/Ast.ts | 64 ---------------------- src/Calculator/Calculator.test.ts | 0 src/Calculator/Calculator.ts | 0 src/Calculator/Expression.ts | 31 +++++++++++ src/Calculator/Interpreter.test.ts | 22 ++++++++ src/Calculator/Interpreter.ts | 88 ++++++++++++++++++++++++++++++ src/Calculator/Parser.test.ts | 25 +++++---- src/Calculator/Parser.ts | 30 +++++++--- src/Calculator/Tokenizer.test.ts | 6 +- src/Calculator/Tokenizer.ts | 14 ++++- 10 files changed, 191 insertions(+), 89 deletions(-) delete mode 100644 src/Calculator/Ast.ts delete mode 100644 src/Calculator/Calculator.test.ts delete mode 100644 src/Calculator/Calculator.ts create mode 100644 src/Calculator/Expression.ts create mode 100644 src/Calculator/Interpreter.test.ts create mode 100644 src/Calculator/Interpreter.ts diff --git a/src/Calculator/Ast.ts b/src/Calculator/Ast.ts deleted file mode 100644 index 81bb0f2..0000000 --- a/src/Calculator/Ast.ts +++ /dev/null @@ -1,64 +0,0 @@ -import { Operator } from "./Token"; - -export abstract class AstNode { - abstract evaluate(): number; -} - -export class BinaryOperation extends AstNode { - constructor( - public left: AstNode, - public operator: Operator, - public right: AstNode - ) { - super(); - } - - evaluate(): number { - const left = this.left.evaluate(); - const right = this.right.evaluate(); - - switch (this.operator) { - case "+": - return left + right; - case "-": - return left + right; - case "*": - return left * right; - case "/": - return left / right; - case "**": - return left ** right; - default: - throw new Error(`Unsupported operator ${this.operator}`); - } - } -} - -export class UnaryOperation extends AstNode { - constructor(public operator: Operator, public child: AstNode) { - super(); - } - - evaluate(): number { - const value = this.child.evaluate(); - - switch (this.operator) { - case "+": - return value; - case "-": - return value * -1; - default: - throw new Error(`Unsupported operator ${this.operator}`); - } - } -} - -export class NumberLiteral extends AstNode { - constructor(public value: number) { - super(); - } - - evaluate(): number { - return this.value; - } -} diff --git a/src/Calculator/Calculator.test.ts b/src/Calculator/Calculator.test.ts deleted file mode 100644 index e69de29..0000000 diff --git a/src/Calculator/Calculator.ts b/src/Calculator/Calculator.ts deleted file mode 100644 index e69de29..0000000 diff --git a/src/Calculator/Expression.ts b/src/Calculator/Expression.ts new file mode 100644 index 0000000..420c699 --- /dev/null +++ b/src/Calculator/Expression.ts @@ -0,0 +1,31 @@ +import { Operator } from "./Token"; + +export abstract class Expression {} + +export class BinaryOperation extends Expression { + constructor( + public left: Expression, + public operator: Operator, + public right: Expression + ) { + super(); + } +} + +export class UnaryOperation extends Expression { + constructor(public operator: Operator, public child: Expression) { + super(); + } +} + +export class NumberLiteral extends Expression { + constructor(public value: number) { + super(); + } +} + +export class VariableAccess extends Expression { + constructor(public name: string) { + super(); + } +} diff --git a/src/Calculator/Interpreter.test.ts b/src/Calculator/Interpreter.test.ts new file mode 100644 index 0000000..d119afe --- /dev/null +++ b/src/Calculator/Interpreter.test.ts @@ -0,0 +1,22 @@ +import { Interpreter, SymbolTable } from "./Interpreter"; +import { Parser } from "./Parser"; +import { Tokenizer } from "./Tokenizer"; + +describe("Interpreter", () => { + it("evaluates", () => { + const tokens = new Tokenizer("1 + 2 * 3").tokenize(); + const expression = new Parser(tokens).parse(); + const interpreter = new Interpreter(expression); + + expect(interpreter.evaluate()).toBe(7); + }); + + it("evaluates with variables", () => { + const tokens = new Tokenizer("1 + foo + bar").tokenize(); + const expression = new Parser(tokens).parse(); + const variables: SymbolTable = { foo: 2, bar: 3 }; + const interpreter = new Interpreter(expression, variables); + + expect(interpreter.evaluate()).toBe(6); + }); +}); diff --git a/src/Calculator/Interpreter.ts b/src/Calculator/Interpreter.ts new file mode 100644 index 0000000..cc77fbc --- /dev/null +++ b/src/Calculator/Interpreter.ts @@ -0,0 +1,88 @@ +import { + BinaryOperation, + Expression, + NumberLiteral, + UnaryOperation, + VariableAccess, +} from "./Expression"; + +export type SymbolTable = Record; + +export class Interpreter { + constructor( + private readonly expression: Expression, + private readonly symbolTable: SymbolTable = {} + ) {} + + public evaluate(): number { + return this.visitExpression(this.expression); + } + + private visitExpression(expression: Expression): number { + if (expression instanceof NumberLiteral) { + return this.visitNumberLiteral(expression); + } + + if (expression instanceof VariableAccess) { + return this.visitVariableAccess(expression); + } + + if (expression instanceof BinaryOperation) { + return this.visitBinaryOperation(expression); + } + + if (expression instanceof UnaryOperation) { + return this.visitUnaryOperation(expression); + } + + return 0; + } + + private visitNumberLiteral(expression: NumberLiteral) { + return expression.value; + } + + private visitVariableAccess(expression: VariableAccess): number { + const name = expression.name; + const value = this.symbolTable[name]; + + if (value === undefined) { + throw new Error(`Undefined variable ${name}`); + } + + return value; + } + + private visitBinaryOperation(expression: BinaryOperation): number { + const left = this.visitExpression(expression.left); + const right = this.visitExpression(expression.right); + + switch (expression.operator) { + case "+": + return left + right; + case "-": + return left + right; + case "*": + return left * right; + case "/": + return left / right; + case "**": + return left ** right; + default: + throw new Error(`Unsupported binary operator ${expression.operator}`); + } + } + + private visitUnaryOperation(expression: UnaryOperation) { + const value = this.visitExpression(expression.child); + + switch (expression.operator) { + case "+": + return value; + case "-": + return value * -1; + default: + throw new Error(`Unsupported unary operator ${expression.operator}`); + } + } +} diff --git a/src/Calculator/Parser.test.ts b/src/Calculator/Parser.test.ts index 7e32921..1ace11d 100644 --- a/src/Calculator/Parser.test.ts +++ b/src/Calculator/Parser.test.ts @@ -1,13 +1,18 @@ -import { BinaryOperation, NumberLiteral, UnaryOperation } from "./Ast"; +import { + BinaryOperation, + NumberLiteral, + UnaryOperation, + VariableAccess, +} from "./Expression"; import { Parser } from "./Parser"; import { Tokenizer } from "./Tokenizer"; describe("Parser", () => { it("generates a valid AST", () => { - const tokens = new Tokenizer("(1 + 2) * 3 - -4 ** 5").tokenize(); - const ast = new Parser(tokens).parse(); + const tokens = new Tokenizer("(1 + 2) * 3 - -foo ** 5").tokenize(); + const expression = new Parser(tokens).parse(); - expect(ast).toEqual( + expect(expression).toEqual( new BinaryOperation( new BinaryOperation( new BinaryOperation(new NumberLiteral(1), "+", new NumberLiteral(2)), @@ -17,18 +22,16 @@ describe("Parser", () => { "-", new UnaryOperation( "-", - new BinaryOperation(new NumberLiteral(4), "**", new NumberLiteral(5)) + new BinaryOperation( + new VariableAccess("foo"), + "**", + new NumberLiteral(5) + ) ) ) ); }); - it("evaluates", () => { - const tokens = new Tokenizer("1 + 2 * 3").tokenize(); - const ast = new Parser(tokens).parse(); - expect(ast.evaluate()).toBe(7); - }); - it("has nice errors reporting", () => { const tokens = new Tokenizer("(1 + 2 *").tokenize(); diff --git a/src/Calculator/Parser.ts b/src/Calculator/Parser.ts index 0a21569..b38cea5 100644 --- a/src/Calculator/Parser.ts +++ b/src/Calculator/Parser.ts @@ -1,4 +1,10 @@ -import { AstNode, BinaryOperation, NumberLiteral, UnaryOperation } from "./Ast"; +import { + Expression, + BinaryOperation, + NumberLiteral, + UnaryOperation, + VariableAccess, +} from "./Expression"; import { Token, TokenType } from "./Token"; export class Parser { @@ -6,12 +12,12 @@ export class Parser { constructor(private tokens: Token[]) {} - parse(): AstNode { + parse(): Expression { return this.expression(); } // term ((PLUS | MINUS) term)* - private expression(): AstNode { + private expression(): Expression { let left = this.term(); while (this.currentToken.type === "+" || this.currentToken.type === "-") { @@ -25,7 +31,7 @@ export class Parser { } // factor ((MULTIPLY | DIVIDE) factor)* - private term(): AstNode { + private term(): Expression { let left = this.factor(); while (this.currentToken.type === "*" || this.currentToken.type === "/") { @@ -40,7 +46,7 @@ export class Parser { // : (PLUS | MINUS) factor // | power - private factor(): AstNode { + private factor(): Expression { if (this.currentToken.type === "+" || this.currentToken.type === "-") { const operator = this.currentToken.type; this.consume(operator); @@ -52,7 +58,7 @@ export class Parser { } // primary (POWER factor)* - private power(): AstNode { + private power(): Expression { let left = this.primary(); while (this.currentToken.type === "**") { @@ -64,8 +70,9 @@ export class Parser { } // : NUMBER + // | IDENTIFIER // | group - private primary(): AstNode { + private primary(): Expression { if (this.currentToken.type === "number") { const value = this.currentToken.value as number; this.consume("number"); @@ -73,6 +80,13 @@ export class Parser { return new NumberLiteral(value); } + if (this.currentToken.type === "identifier") { + const name = this.currentToken.value as string; + this.consume("identifier"); + + return new VariableAccess(name); + } + if (this.currentToken.type === "(") { return this.group(); } @@ -83,7 +97,7 @@ export class Parser { } // "(" expression ")" - private group(): AstNode { + private group(): Expression { this.consume("("); const expression = this.expression(); this.consume(")"); diff --git a/src/Calculator/Tokenizer.test.ts b/src/Calculator/Tokenizer.test.ts index 8f59946..ba92378 100644 --- a/src/Calculator/Tokenizer.test.ts +++ b/src/Calculator/Tokenizer.test.ts @@ -3,7 +3,7 @@ import { Tokenizer } from "./Tokenizer"; describe("Tokenizer", () => { it("tokenizes the expression", () => { - const tokens = new Tokenizer("(1 + 12) * foo ** 1234").tokenize(); + const tokens = new Tokenizer("(1 + 12) * foo ** 12.34").tokenize(); expect(tokens).toEqual([ new Token(0, "("), @@ -14,8 +14,8 @@ describe("Tokenizer", () => { new Token(9, "*"), new Token(11, "identifier", "foo"), new Token(15, "**"), - new Token(18, "number", 1234), - new Token(22, "eof"), + new Token(18, "number", 12.34), + new Token(23, "eof"), ]); }); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index 95b4ebf..c1ad131 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -10,14 +10,22 @@ export class Tokenizer { constructor(private expression: string) { this.rule(/^\s+/, () => this.skip()); - this.rule("(", () => this.accept("(")); - this.rule(")", () => this.accept(")")); + + // Operators this.rule("+", () => this.accept("+")); this.rule("-", () => this.accept("-")); this.rule("**", () => this.accept("**")); this.rule("*", () => this.accept("*")); this.rule("/", () => this.accept("/")); - this.rule(/^\d+/, (match) => this.accept("number", parseFloat(match))); + + // Delimiters + this.rule("(", () => this.accept("(")); + this.rule(")", () => this.accept(")")); + + // Literals + this.rule(/^\d+(.\d+)?/, (match) => + this.accept("number", parseFloat(match)) + ); this.rule(/^\w+/, (match) => this.accept("identifier", match)); } From 21478979e4759a26bf043f28ddbfc59d6216b806 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 11:56:28 +0200 Subject: [PATCH 11/17] Add repl and better errors handling --- src/Calculator/Expression.ts | 14 ++++----- src/Calculator/Interpreter.test.ts | 8 +++++ src/Calculator/Interpreter.ts | 49 ++++++++++++++++-------------- src/Calculator/Parser.test.ts | 35 ++++++++++----------- src/Calculator/Parser.ts | 12 +++++--- src/Calculator/Token.ts | 5 +-- src/Calculator/TokenType.ts | 14 +++++++++ src/Calculator/Tokenizer.test.ts | 14 ++++----- src/Calculator/Tokenizer.ts | 9 ++++-- src/Calculator/repl.ts | 36 ++++++++++++++++++++++ 10 files changed, 132 insertions(+), 64 deletions(-) create mode 100644 src/Calculator/TokenType.ts create mode 100644 src/Calculator/repl.ts diff --git a/src/Calculator/Expression.ts b/src/Calculator/Expression.ts index 420c699..43b4ca4 100644 --- a/src/Calculator/Expression.ts +++ b/src/Calculator/Expression.ts @@ -1,11 +1,11 @@ -import { Operator } from "./Token"; +import { BinaryOperator, UnaryOperator } from "./TokenType"; export abstract class Expression {} export class BinaryOperation extends Expression { constructor( public left: Expression, - public operator: Operator, + public operator: BinaryOperator, public right: Expression ) { super(); @@ -13,19 +13,19 @@ export class BinaryOperation extends Expression { } export class UnaryOperation extends Expression { - constructor(public operator: Operator, public child: Expression) { + constructor(public operator: UnaryOperator, public child: Expression) { super(); } } -export class NumberLiteral extends Expression { - constructor(public value: number) { +export class VariableAccess extends Expression { + constructor(public name: string) { super(); } } -export class VariableAccess extends Expression { - constructor(public name: string) { +export class NumberLiteral extends Expression { + constructor(public value: number) { super(); } } diff --git a/src/Calculator/Interpreter.test.ts b/src/Calculator/Interpreter.test.ts index d119afe..8335b79 100644 --- a/src/Calculator/Interpreter.test.ts +++ b/src/Calculator/Interpreter.test.ts @@ -19,4 +19,12 @@ describe("Interpreter", () => { expect(interpreter.evaluate()).toBe(6); }); + + it("raises an error on division by zero", () => { + const tokens = new Tokenizer("1/0").tokenize(); + const expression = new Parser(tokens).parse(); + const interpreter = new Interpreter(expression); + + expect(() => interpreter.evaluate()).toThrow("Division by zero"); + }); }); diff --git a/src/Calculator/Interpreter.ts b/src/Calculator/Interpreter.ts index cc77fbc..debfd65 100644 --- a/src/Calculator/Interpreter.ts +++ b/src/Calculator/Interpreter.ts @@ -19,14 +19,6 @@ export class Interpreter { } private visitExpression(expression: Expression): number { - if (expression instanceof NumberLiteral) { - return this.visitNumberLiteral(expression); - } - - if (expression instanceof VariableAccess) { - return this.visitVariableAccess(expression); - } - if (expression instanceof BinaryOperation) { return this.visitBinaryOperation(expression); } @@ -35,22 +27,15 @@ export class Interpreter { return this.visitUnaryOperation(expression); } - return 0; - } - - private visitNumberLiteral(expression: NumberLiteral) { - return expression.value; - } - - private visitVariableAccess(expression: VariableAccess): number { - const name = expression.name; - const value = this.symbolTable[name]; + if (expression instanceof VariableAccess) { + return this.visitVariableAccess(expression); + } - if (value === undefined) { - throw new Error(`Undefined variable ${name}`); + if (expression instanceof NumberLiteral) { + return this.visitNumberLiteral(expression); } - return value; + return 0; } private visitBinaryOperation(expression: BinaryOperation): number { @@ -64,8 +49,13 @@ export class Interpreter { return left + right; case "*": return left * right; - case "/": + case "/": { + if (right === 0) { + throw new Error("Division by zero"); + } + return left / right; + } case "**": return left ** right; default: @@ -85,4 +75,19 @@ export class Interpreter { throw new Error(`Unsupported unary operator ${expression.operator}`); } } + + private visitVariableAccess(expression: VariableAccess): number { + const name = expression.name; + const value = this.symbolTable[name]; + + if (value === undefined) { + throw new Error(`Undefined variable ${name}`); + } + + return value; + } + + private visitNumberLiteral(expression: NumberLiteral) { + return expression.value; + } } diff --git a/src/Calculator/Parser.test.ts b/src/Calculator/Parser.test.ts index 1ace11d..b4555d0 100644 --- a/src/Calculator/Parser.test.ts +++ b/src/Calculator/Parser.test.ts @@ -9,13 +9,17 @@ import { Tokenizer } from "./Tokenizer"; describe("Parser", () => { it("generates a valid AST", () => { - const tokens = new Tokenizer("(1 + 2) * 3 - -foo ** 5").tokenize(); + const tokens = new Tokenizer("(1.5 + 2) * 3 - -foo ** 5").tokenize(); const expression = new Parser(tokens).parse(); expect(expression).toEqual( new BinaryOperation( new BinaryOperation( - new BinaryOperation(new NumberLiteral(1), "+", new NumberLiteral(2)), + new BinaryOperation( + new NumberLiteral(1.5), + "+", + new NumberLiteral(2) + ), "*", new NumberLiteral(3) ), @@ -32,19 +36,16 @@ describe("Parser", () => { ); }); - it("has nice errors reporting", () => { - const tokens = new Tokenizer("(1 + 2 *").tokenize(); - - expect(() => new Parser(tokens).parse()).toThrow( - "Unexpected token eof at position 8." - ); - }); - - it("has nice errors reporting", () => { - const tokens = new Tokenizer("** **").tokenize(); - - expect(() => new Parser(tokens).parse()).toThrow( - "Unexpected token ** at position 0." - ); - }); + it.each` + input | error + ${"(1 + 2 *"} | ${"Unexpected 'eof' at position 8."} + ${"** **"} | ${"Unexpected '**' at position 0."} + ${"1 2"} | ${"Expected 'eof' but got 'number' at position 2."} + `( + "throws error when the given input has invalid syntax", + ({ input, error }) => { + const tokens = new Tokenizer(input).tokenize(); + expect(() => new Parser(tokens).parse()).toThrow(error); + } + ); }); diff --git a/src/Calculator/Parser.ts b/src/Calculator/Parser.ts index b38cea5..dfbb1c1 100644 --- a/src/Calculator/Parser.ts +++ b/src/Calculator/Parser.ts @@ -5,7 +5,8 @@ import { UnaryOperation, VariableAccess, } from "./Expression"; -import { Token, TokenType } from "./Token"; +import { Token } from "./Token"; +import { TokenType } from "./TokenType"; export class Parser { private position = 0; @@ -13,7 +14,10 @@ export class Parser { constructor(private tokens: Token[]) {} parse(): Expression { - return this.expression(); + const expression = this.expression(); + this.consume("eof"); + + return expression; } // term ((PLUS | MINUS) term)* @@ -92,7 +96,7 @@ export class Parser { } throw new Error( - `Unexpected token ${this.currentToken.type} at position ${this.currentToken.position}.` + `Unexpected '${this.currentToken.type}' at position ${this.currentToken.position}.` ); } @@ -112,7 +116,7 @@ export class Parser { private consume(tokenType: TokenType): void { if (this.currentToken.type !== tokenType) { throw new Error( - `Expected ${tokenType} but got ${this.currentToken.type} at position ${this.currentToken.position}.` + `Expected '${tokenType}' but got '${this.currentToken.type}' at position ${this.currentToken.position}.` ); } diff --git a/src/Calculator/Token.ts b/src/Calculator/Token.ts index 810eeb0..583e774 100644 --- a/src/Calculator/Token.ts +++ b/src/Calculator/Token.ts @@ -1,7 +1,4 @@ -export type Operator = "+" | "-" | "*" | "/" | "**"; -export type Delimiter = "(" | ")"; -export type Literal = "number" | "identifier"; -export type TokenType = Operator | Delimiter | Literal | "eof"; +import { TokenType } from "./TokenType"; export class Token { constructor( diff --git a/src/Calculator/TokenType.ts b/src/Calculator/TokenType.ts new file mode 100644 index 0000000..1cdef71 --- /dev/null +++ b/src/Calculator/TokenType.ts @@ -0,0 +1,14 @@ +export type BinaryOperator = "+" | "-" | "*" | "/" | "**"; + +export type UnaryOperator = "+" | "-"; + +export type Delimiter = "(" | ")"; + +export type Literal = "number" | "identifier"; + +export type TokenType = + | BinaryOperator + | UnaryOperator + | Delimiter + | Literal + | "eof"; diff --git a/src/Calculator/Tokenizer.test.ts b/src/Calculator/Tokenizer.test.ts index ba92378..af52ff5 100644 --- a/src/Calculator/Tokenizer.test.ts +++ b/src/Calculator/Tokenizer.test.ts @@ -3,7 +3,7 @@ import { Tokenizer } from "./Tokenizer"; describe("Tokenizer", () => { it("tokenizes the expression", () => { - const tokens = new Tokenizer("(1 + 12) * foo ** 12.34").tokenize(); + const tokens = new Tokenizer("(1 + 12) / fooBar ** 12.34").tokenize(); expect(tokens).toEqual([ new Token(0, "("), @@ -11,17 +11,17 @@ describe("Tokenizer", () => { new Token(3, "+"), new Token(5, "number", 12), new Token(7, ")"), - new Token(9, "*"), - new Token(11, "identifier", "foo"), - new Token(15, "**"), - new Token(18, "number", 12.34), - new Token(23, "eof"), + new Token(9, "/"), + new Token(11, "identifier", "fooBar"), + new Token(18, "**"), + new Token(21, "number", 12.34), + new Token(26, "eof"), ]); }); it("raises error on unrecognized character", () => { expect(() => new Tokenizer("123 % 123").tokenize()).toThrow( - "Unrecognized character '%'" + "Unrecognized character '%' at 4" ); }); }); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index c1ad131..bbff715 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -1,4 +1,5 @@ -import { Token, TokenType } from "./Token"; +import { Token } from "./Token"; +import { TokenType } from "./TokenType"; type Pattern = string | RegExp; type Rule = (match: string) => undefined | Token; @@ -23,7 +24,7 @@ export class Tokenizer { this.rule(")", () => this.accept(")")); // Literals - this.rule(/^\d+(.\d+)?/, (match) => + this.rule(/^\d+(\.\d+)?/, (match) => this.accept("number", parseFloat(match)) ); this.rule(/^\w+/, (match) => this.accept("identifier", match)); @@ -57,7 +58,9 @@ export class Tokenizer { if (noMatchFound) { throw new Error( - `Unrecognized character ${this.expression[this.position]}` + `Unrecognized character '${this.expression[this.position]}' at ${ + this.position + }` ); } } diff --git a/src/Calculator/repl.ts b/src/Calculator/repl.ts new file mode 100644 index 0000000..b97410f --- /dev/null +++ b/src/Calculator/repl.ts @@ -0,0 +1,36 @@ +import * as readline from "readline"; + +import { SymbolTable, Interpreter } from "./Interpreter"; +import { Parser } from "./Parser"; +import { Tokenizer } from "./Tokenizer"; + +const PROMPT = "> "; + +const scanner = readline.createInterface({ + input: process.stdin, + output: process.stdout, + prompt: PROMPT, +}); + +scanner.prompt(); + +const symbolTable: SymbolTable = { + one: 1, + two: 2, + three: 3, +}; + +scanner.on("line", (line) => { + const input = line.trim(); + + try { + const lexer = new Tokenizer(input).tokenize(); + const ast = new Parser(lexer).parse(); + const result = new Interpreter(ast, symbolTable).evaluate(); + console.log(result); + } catch (error) { + console.error(error); + } + + scanner.prompt(); +}); From 263cbdb7762e146f6266cd52d574878f07e74236 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 11:58:36 +0200 Subject: [PATCH 12/17] wip --- src/Calculator/Tokenizer.ts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index bbff715..2dbbee4 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -13,6 +13,7 @@ export class Tokenizer { this.rule(/^\s+/, () => this.skip()); // Operators + this.rule("+", () => this.accept("+")); this.rule("-", () => this.accept("-")); this.rule("**", () => this.accept("**")); @@ -20,13 +21,16 @@ export class Tokenizer { this.rule("/", () => this.accept("/")); // Delimiters + this.rule("(", () => this.accept("(")); this.rule(")", () => this.accept(")")); // Literals + this.rule(/^\d+(\.\d+)?/, (match) => this.accept("number", parseFloat(match)) ); + this.rule(/^\w+/, (match) => this.accept("identifier", match)); } From 0c3018f1d476302f97cc01c0275fc2b21e596b6d Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 14:04:26 +0200 Subject: [PATCH 13/17] Refactor Token constructor --- lark-sandbox/calculator.lark | 2 -- src/Calculator/Token.ts | 2 +- src/Calculator/Tokenizer.test.ts | 20 ++++++++++---------- src/Calculator/Tokenizer.ts | 4 ++-- 4 files changed, 13 insertions(+), 15 deletions(-) diff --git a/lark-sandbox/calculator.lark b/lark-sandbox/calculator.lark index 27d5f05..c1375de 100644 --- a/lark-sandbox/calculator.lark +++ b/lark-sandbox/calculator.lark @@ -10,8 +10,6 @@ power : primary (POWER factor)* primary : NUMBER | "(" expression ")" -// Operators - PLUS : "+" MINUS : "-" MULTIPLY : "*" diff --git a/src/Calculator/Token.ts b/src/Calculator/Token.ts index 583e774..c257b6a 100644 --- a/src/Calculator/Token.ts +++ b/src/Calculator/Token.ts @@ -2,8 +2,8 @@ import { TokenType } from "./TokenType"; export class Token { constructor( - public position: number, public type: TokenType, + public position: number, public value?: string | number ) {} } diff --git a/src/Calculator/Tokenizer.test.ts b/src/Calculator/Tokenizer.test.ts index af52ff5..6fbc55e 100644 --- a/src/Calculator/Tokenizer.test.ts +++ b/src/Calculator/Tokenizer.test.ts @@ -6,16 +6,16 @@ describe("Tokenizer", () => { const tokens = new Tokenizer("(1 + 12) / fooBar ** 12.34").tokenize(); expect(tokens).toEqual([ - new Token(0, "("), - new Token(1, "number", 1), - new Token(3, "+"), - new Token(5, "number", 12), - new Token(7, ")"), - new Token(9, "/"), - new Token(11, "identifier", "fooBar"), - new Token(18, "**"), - new Token(21, "number", 12.34), - new Token(26, "eof"), + new Token("(", 0), + new Token("number", 1, 1), + new Token("+", 3), + new Token("number", 5, 12), + new Token(")", 7), + new Token("/", 9), + new Token("identifier", 11, "fooBar"), + new Token("**", 18), + new Token("number", 21, 12.34), + new Token("eof", 26), ]); }); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index 2dbbee4..c52b266 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -69,7 +69,7 @@ export class Tokenizer { } } - tokens.push(new Token(this.position, "eof")); + tokens.push(new Token("eof", this.position)); return tokens; } @@ -98,7 +98,7 @@ export class Tokenizer { } private accept(type: TokenType, value?: string | number): Token { - return new Token(this.position, type, value); + return new Token(type, this.position, value); } private skip() { From 497e32ee04848f7da028325b4aff3080d37307a8 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 14:09:10 +0200 Subject: [PATCH 14/17] Add runtime errors --- src/Calculator/Interpreter.test.ts | 11 +++++++++++ src/Calculator/Interpreter.ts | 5 +++-- src/Calculator/errors.ts | 13 +++++++++++++ 3 files changed, 27 insertions(+), 2 deletions(-) create mode 100644 src/Calculator/errors.ts diff --git a/src/Calculator/Interpreter.test.ts b/src/Calculator/Interpreter.test.ts index 8335b79..813276f 100644 --- a/src/Calculator/Interpreter.test.ts +++ b/src/Calculator/Interpreter.test.ts @@ -1,3 +1,4 @@ +import { UndefinedVariableError, ZeroDivisionError } from "./errors"; import { Interpreter, SymbolTable } from "./Interpreter"; import { Parser } from "./Parser"; import { Tokenizer } from "./Tokenizer"; @@ -20,11 +21,21 @@ describe("Interpreter", () => { expect(interpreter.evaluate()).toBe(6); }); + it("raises an error when variable is not defined", () => { + const tokens = new Tokenizer("1 + foo + bar").tokenize(); + const expression = new Parser(tokens).parse(); + const interpreter = new Interpreter(expression); + + expect(() => interpreter.evaluate()).toThrow(UndefinedVariableError); + expect(() => interpreter.evaluate()).toThrow("Variable foo is not defined"); + }); + it("raises an error on division by zero", () => { const tokens = new Tokenizer("1/0").tokenize(); const expression = new Parser(tokens).parse(); const interpreter = new Interpreter(expression); + expect(() => interpreter.evaluate()).toThrow(ZeroDivisionError); expect(() => interpreter.evaluate()).toThrow("Division by zero"); }); }); diff --git a/src/Calculator/Interpreter.ts b/src/Calculator/Interpreter.ts index debfd65..86f6db2 100644 --- a/src/Calculator/Interpreter.ts +++ b/src/Calculator/Interpreter.ts @@ -1,3 +1,4 @@ +import { UndefinedVariableError, ZeroDivisionError } from "./errors"; import { BinaryOperation, Expression, @@ -51,7 +52,7 @@ export class Interpreter { return left * right; case "/": { if (right === 0) { - throw new Error("Division by zero"); + throw new ZeroDivisionError(); } return left / right; @@ -81,7 +82,7 @@ export class Interpreter { const value = this.symbolTable[name]; if (value === undefined) { - throw new Error(`Undefined variable ${name}`); + throw new UndefinedVariableError(name); } return value; diff --git a/src/Calculator/errors.ts b/src/Calculator/errors.ts new file mode 100644 index 0000000..1527355 --- /dev/null +++ b/src/Calculator/errors.ts @@ -0,0 +1,13 @@ +class RuntimeError extends Error {} + +export class UndefinedVariableError extends RuntimeError { + constructor(name: string) { + super(`Variable ${name} is not defined`); + } +} + +export class ZeroDivisionError extends RuntimeError { + constructor() { + super("Division by zero"); + } +} From 8d4361c762f0e9f21555c1d445a6063a7a97b980 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 14:22:19 +0200 Subject: [PATCH 15/17] Better syntax errors --- src/Calculator/Parser.test.ts | 9 ++++++--- src/Calculator/Parser.ts | 9 +++------ src/Calculator/Tokenizer.ts | 8 ++++---- src/Calculator/errors.ts | 26 ++++++++++++++++++++++++++ 4 files changed, 39 insertions(+), 13 deletions(-) diff --git a/src/Calculator/Parser.test.ts b/src/Calculator/Parser.test.ts index b4555d0..8678365 100644 --- a/src/Calculator/Parser.test.ts +++ b/src/Calculator/Parser.test.ts @@ -1,3 +1,4 @@ +import { IllegalTokenError } from "./errors"; import { BinaryOperation, NumberLiteral, @@ -38,13 +39,15 @@ describe("Parser", () => { it.each` input | error - ${"(1 + 2 *"} | ${"Unexpected 'eof' at position 8."} - ${"** **"} | ${"Unexpected '**' at position 0."} - ${"1 2"} | ${"Expected 'eof' but got 'number' at position 2."} + ${"(1 + 2 *"} | ${"Unexpected 'eof' at position 8"} + ${"** **"} | ${"Unexpected '**' at position 0"} + ${"1 2"} | ${"Expected 'eof' but got 'number' at position 2"} `( "throws error when the given input has invalid syntax", ({ input, error }) => { const tokens = new Tokenizer(input).tokenize(); + + expect(() => new Parser(tokens).parse()).toThrow(IllegalTokenError); expect(() => new Parser(tokens).parse()).toThrow(error); } ); diff --git a/src/Calculator/Parser.ts b/src/Calculator/Parser.ts index dfbb1c1..9733037 100644 --- a/src/Calculator/Parser.ts +++ b/src/Calculator/Parser.ts @@ -1,3 +1,4 @@ +import { IllegalTokenError } from "./errors"; import { Expression, BinaryOperation, @@ -95,9 +96,7 @@ export class Parser { return this.group(); } - throw new Error( - `Unexpected '${this.currentToken.type}' at position ${this.currentToken.position}.` - ); + throw new IllegalTokenError(this.currentToken); } // "(" expression ")" @@ -115,9 +114,7 @@ export class Parser { private consume(tokenType: TokenType): void { if (this.currentToken.type !== tokenType) { - throw new Error( - `Expected '${tokenType}' but got '${this.currentToken.type}' at position ${this.currentToken.position}.` - ); + throw new IllegalTokenError(this.currentToken, tokenType); } this.advance(); diff --git a/src/Calculator/Tokenizer.ts b/src/Calculator/Tokenizer.ts index c52b266..4b14ea6 100644 --- a/src/Calculator/Tokenizer.ts +++ b/src/Calculator/Tokenizer.ts @@ -1,3 +1,4 @@ +import { IllegalCharacterError, SyntaxError } from "./errors"; import { Token } from "./Token"; import { TokenType } from "./TokenType"; @@ -61,10 +62,9 @@ export class Tokenizer { } if (noMatchFound) { - throw new Error( - `Unrecognized character '${this.expression[this.position]}' at ${ - this.position - }` + throw new IllegalCharacterError( + this.expression[this.position], + this.position ); } } diff --git a/src/Calculator/errors.ts b/src/Calculator/errors.ts index 1527355..0312a77 100644 --- a/src/Calculator/errors.ts +++ b/src/Calculator/errors.ts @@ -1,3 +1,29 @@ +import { Token } from "./Token"; +import { TokenType } from "./TokenType"; + +export abstract class SyntaxError extends Error { + protected constructor(message: string, public position: number) { + super(message); + } +} + +export class IllegalCharacterError extends SyntaxError { + constructor(character: string, position: number) { + super(`Unrecognized character '${character}' at ${position}`, position); + } +} + +export class IllegalTokenError extends SyntaxError { + constructor(currentToken: Token, expectedTokenType?: TokenType) { + super( + expectedTokenType + ? `Expected '${expectedTokenType}' but got '${currentToken.type}' at position ${currentToken.position}` + : `Unexpected '${currentToken.type}' at position ${currentToken.position}`, + currentToken.position + ); + } +} + class RuntimeError extends Error {} export class UndefinedVariableError extends RuntimeError { From f7c3a0230fb6b9270e89a28e55cbba0793269798 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Thu, 6 May 2021 16:32:39 +0200 Subject: [PATCH 16/17] Draw an arrow for error --- src/Calculator/Expression.ts | 15 +++++++++------ src/Calculator/errors.ts | 6 +++--- src/Calculator/repl.ts | 6 +++++- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/src/Calculator/Expression.ts b/src/Calculator/Expression.ts index 43b4ca4..1d7c2ec 100644 --- a/src/Calculator/Expression.ts +++ b/src/Calculator/Expression.ts @@ -4,28 +4,31 @@ export abstract class Expression {} export class BinaryOperation extends Expression { constructor( - public left: Expression, - public operator: BinaryOperator, - public right: Expression + public readonly left: Expression, + public readonly operator: BinaryOperator, + public readonly right: Expression ) { super(); } } export class UnaryOperation extends Expression { - constructor(public operator: UnaryOperator, public child: Expression) { + constructor( + public readonly operator: UnaryOperator, + public readonly child: Expression + ) { super(); } } export class VariableAccess extends Expression { - constructor(public name: string) { + constructor(public readonly name: string) { super(); } } export class NumberLiteral extends Expression { - constructor(public value: number) { + constructor(public readonly value: number) { super(); } } diff --git a/src/Calculator/errors.ts b/src/Calculator/errors.ts index 0312a77..66e0e4c 100644 --- a/src/Calculator/errors.ts +++ b/src/Calculator/errors.ts @@ -14,10 +14,10 @@ export class IllegalCharacterError extends SyntaxError { } export class IllegalTokenError extends SyntaxError { - constructor(currentToken: Token, expectedTokenType?: TokenType) { + constructor(currentToken: Token, expectedType?: TokenType) { super( - expectedTokenType - ? `Expected '${expectedTokenType}' but got '${currentToken.type}' at position ${currentToken.position}` + expectedType + ? `Expected '${expectedType}' but got '${currentToken.type}' at position ${currentToken.position}` : `Unexpected '${currentToken.type}' at position ${currentToken.position}`, currentToken.position ); diff --git a/src/Calculator/repl.ts b/src/Calculator/repl.ts index b97410f..f9f2765 100644 --- a/src/Calculator/repl.ts +++ b/src/Calculator/repl.ts @@ -1,5 +1,6 @@ import * as readline from "readline"; +import { SyntaxError } from "./errors"; import { SymbolTable, Interpreter } from "./Interpreter"; import { Parser } from "./Parser"; import { Tokenizer } from "./Tokenizer"; @@ -29,7 +30,10 @@ scanner.on("line", (line) => { const result = new Interpreter(ast, symbolTable).evaluate(); console.log(result); } catch (error) { - console.error(error); + if (error instanceof SyntaxError) { + console.error("^".padStart(PROMPT.length + error.position + 1, " ")); + } + console.error(error.message); } scanner.prompt(); From b631de257fff3e997c053e900db28dcc99cf1410 Mon Sep 17 00:00:00 2001 From: Lukasz Bandzarewicz Date: Sat, 8 May 2021 11:29:23 +0200 Subject: [PATCH 17/17] List predefined variables --- src/Calculator/repl.ts | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/Calculator/repl.ts b/src/Calculator/repl.ts index f9f2765..bb342da 100644 --- a/src/Calculator/repl.ts +++ b/src/Calculator/repl.ts @@ -13,14 +13,16 @@ const scanner = readline.createInterface({ prompt: PROMPT, }); -scanner.prompt(); - const symbolTable: SymbolTable = { one: 1, two: 2, three: 3, }; +console.log("Predefined variables:", symbolTable); + +scanner.prompt(); + scanner.on("line", (line) => { const input = line.trim();