js实现 有限状态自动机 实现的词法分析器

class TokenNumberLiteral{
    constructor(n) {
        this.data = n  // number
    }
}
class TokenIdentifier{
    constructor(s) {
        this.data = s  // 变量
    }
}

class TokenMinus{}  // -
class TokenPlus{}  // +
class TokenSlash{} // /
class TokenStar{}  // *
class TokenWhitespace{} // 空格


let is_digit = (c) => {
    return c >= "0" && c <= "9"
}

let is_dot = (c) => {
    return c == "."
}
let is_alpha = (c) => {
    return (c >= "a" && c <= "z") || (c >= "A" && c <= "Z") || c == "_"
}

let is_alphanumeric = (c) => {
    return is_digit(c) || is_alpha(c)
}

let is_whitespace = (c) => {
    if(c == " " || c == "\r" || c == "\t" || c == "\n") {
        return true
    }
    return false
}


class TokenPosition {
    constructor() {
        this.line = 1
        this.column = 1
    }
}


class TokenWithContext{
    constructor(token, lexeme, position) {
        this.token = token
        this.lexeme = lexeme
        this.position = position
    }
}


class Scanner {
    constructor() {
        this.source = `123.23a_123 + a
        2 + 3`

        this.peek_status = 0
        this.scan_status = 0
        this.current_lexeme = ""
        this.current_position = new TokenPosition()
    }

    peek() {
        let peek_char = this.source[this.peek_status]
        if (peek_char) {
            this.peek_status ++
        } else {
            peek_char = null
        }

        return peek_char
    }

    reset_peek(){
        this.peek_status = this.scan_status
    }

    advance(){
        let next_char = this.source[this.scan_status]

        if (next_char) {
            this.current_lexeme += next_char
            if(next_char == "\n") {
                this.current_position.line ++
                this.current_position.column = 1

            } else {
                this.current_position.column ++
            }
            this.peek_status ++
            this.scan_status ++
        } else {
            next_char = null
        }
        return next_char
    }


    advance_while(f){
        while (this.peek_check1(f)) {
            this.advance()
        }
    }

    peek_check1(f) {
        this.reset_peek()

        let peek_char = this.peek()
        if(peek_char != null) {
            return f(peek_char)
        }
        return false
    }

    peek_check2(f1, f2) {
        this.reset_peek()

        let peek_char_1 = this.peek()
        if(peek_char_1 != null) {
            let peek_char_2 = this.peek()
            if (peek_char_2 != null) {
                return f1(peek_char_1) && f2(peek_char_2)
            } else {
                return false
            }
        } else {
            return false
        }

    }

    number(){
        this.advance_while(is_digit)
        if (this.peek_check2(is_dot, is_digit)) {
            this.advance()
            this.advance_while(is_digit)
        }
        let num = Number(this.current_lexeme)
        return new TokenNumberLiteral(num)
    }

    identifier() {
        this.advance_while(is_alphanumeric)
        return new TokenIdentifier(this.current_lexeme)
    }

    scan_next(){
        let current_position = this.current_position
        this.current_lexeme = ""

        let next_char = this.advance()
        if(next_char == null){
            return null
        }

        let token = null
        if (next_char == "+") {
            token = new TokenPlus()
        } else if (next_char == "-") {
            token = new TokenMinus()
        } else if (next_char == "*") {
            token = new TokenStar()
        } else if (next_char == "/") {
            token = new TokenSlash()
        } else if(is_whitespace(next_char)) {
            token = new TokenWhitespace
        } else if (is_digit(next_char)) {
            token = this.number()
        } else if (is_alpha) {
            token = this.identifier()
        }

        return new TokenWithContext(token, this.current_lexeme, current_position)

    }
}

let s = new Scanner()
while (true) {
    let token = s.scan_next()
    if (token == null) {
        break
    }
    console.log(token)
}