Add initial language spec, Scanner almost complete

2022-06-16 17:12:43 -05:00 · 2022-06-16 17:12:43 -05:00 · 6d2f1ee907
commit 6d2f1ee907
parent 449ce575a7
3 changed files with 287 additions and 1 deletions
--- a/cobalt/Scanner.java
+++ b/cobalt/Scanner.java
@ -13,11 +13,36 @@ class Scanner {
    private int start = 0;
    private int current = 0;
    private int line = 0;
+    private static final Map<String, TokenType> keywords;

+    static {
+        keywords = new HashMap<>();
+        keywords.put("and", AND);
+        keywords.put("class", ELSE);
+        keywords.put("false", FALSE);
+        keywords.put("for", FOR);
+        keywords.put("func", FUNC);
+        keywords.put("if", IF);
+        keywords.put("nil", NIL);
+        keywords.put("or", OR);
+        keywords.put("print", PRINT);
+        keywords.put("return", RETURN);
+        keywords.put("super", SUPER);
+        keywords.put("this", THIS);
+        keywords.put("true", TRUE);
+        keywords.put("var",   VAR);
+        keywords.put("while", WHILE);
+    }
+
+    // Constructor
+    // Set our source string to be the incoming character data from the
+    // input script
    Scanner(String source) {
        this.source = source;
    }

+    // Scan the input file for all available tokens, return a Token list with all
+    // of our valid tokens
    List<Token> scanTokens() {
        while (!isAtEnd()) {
            start = current;
@ -28,7 +53,193 @@ class Scanner {
        return tokens;
    }

+    // Check to see if we have reached the end of the script
    private boolean isAtEnd() {
        return current >= source.length();
    }
+
+    // Parse the current token from the scanner to see if its a valid
+    // lexeme. Report an error otherwise
+    private void scanToken() {
+        char c = advance();
+        switch (c) {
+            // Structural and Accessors
+            case '(': addToken(LEFT_PAREN); break;
+            case ')': addToken(RIGHT_PAREN); break;
+            case '{': addToken(LEFT_BRACE); break;
+            case '}': addToken(RIGHT_BRACE); break;
+            case ',': addToken(COMMA); break;
+            case '.': addToken(DOT); break;
+            case ';': addToken(SEMICOLON); break;
+
+            // Operators 
+            case '*': addToken(STAR); break;
+            case '-': addToken(MINUS); break;
+            case '+': addToken(PLUS); break;
+            case '!':
+                addToken(match('=') ? BANG_EQUAL : BANG);
+                break;
+            case '=':
+                addToken(match('=') ? EQUAL_EQUAL : EQUAL);
+                break;
+            case '<':
+                addToken(match('=') ? LESS_EQUAL : LESS);
+                break;
+            case '>':
+                addToken(match('=') ? GREATER_EQUAL : GREATER);
+                break;
+            case '/':
+                if (match('/')) {
+                    // A comment goes until the end of the line
+                    while (peek() != '\n' && !isAtEnd()) advance();
+                } else {
+                    addToken(SLASH);
+                }
+                break;
+
+            // Whitespace and new lines
+            case ' ':
+            case '\r':
+            case '\t':
+                //ignore whitespace characters
+                break;
+            case '\n':
+                line++;
+                break;
+
+            default:
+                if (isDigit(c)) {
+                    // Check to see if our incoming value is part of a number
+                    number();
+                } else if (isAlpha(c)) {
+                    // Check to see if our incoming value is part of
+                    // a reserved word or identifier
+                    identifier();
+                } else {
+                    Cobalt.error(line, "Unexpected character.");
+                }
+                break;
+        }
+    }
+
+    // Determine if the char is a base 10 digit
+    private boolean isDigit(char c) {
+        return c >= '0' && c <= '9';
+    }
+
+
+    private void number() {
+        while (!isDigit(peek())) advance();
+
+        // Look for a decimal place.
+        if (peek() == '.' && isDigit(peekNext())) {
+            // Consume the .
+            advance();
+
+            while (isDigit(peek())) advance();
+        }
+    }
+
+
+    // TODO: Lox spec supports multiline strings, we'll need to
+    // probably remove that support since I don't intend Cobalt's
+    // grammar to support that (maybe) :/
+
+    // TODO: Escape sequences are not supported atm, for the
+    // love of god please implement this functionality. Probably
+    // should make an enum for the valid escape sequences, parse them
+    // out like we do with operators, and inject the actual escape
+    // sequence in the object thats returned to the interpreter
+
+    // Process the input line if quotation marks are found
+    // and we have a string literal
+    private void string() {
+        while (peek() != '"' && !isAtEnd()) {
+            if (peek() == '\n') line++;
+            advance();
+        }
+
+        if (isAtEnd()) {
+            Cobalt.error(line, "Unterminated string.");
+            return;
+        }
+
+        // Get closing quotes
+        advance();
+
+        // Trim the
+        String value = source.substring(start + 1, current - 1);
+        addToken(STRING, value);
+    }
+
+
+    // Determine if the infoming token is alphanumeric, and
+    // add it to the Token list if it is valid
+    private void identifier() {
+        while (isAlphaNumeric(peek())) advance();
+        addToken(IDENTIFIER);
+    }
+
+
+    // Checkout the next character in our input, but dont consume it
+    // This is mainly to process things like comments that take an entire line
+    private char peek() {
+        if (isAtEnd()) return '\0';
+        return source.charAt(current);
+    }
+
+
+    // Checkout the next+1 character in our input, but dont consume it
+    // This is mainly to process things like comments that take an entire line
+    private char peekNext() {
+        if (current + 1 >= source.length()) return '\0';
+        return source.charAt(current + 1);
+    }
+
+
+    // Check to see if the character passed is within
+    // [a-z][A-Z]
+    private boolean isAlpha(char c) {
+        return (c >= 'a' && c <= 'z') ||
+               (c >= 'A' && c <= 'Z') ||
+               c == '_';
+    }
+
+
+    // Check to see if the character passed is within
+    // [a-z][A-Z][0-9]
+    private boolean isAlphaNumeric(char c) {
+        return isAlpha(c) || isDigit(c);
+    }
+
+
+    // Return a boolean based on if a char is found at the current cursor,
+    // then increment
+    private boolean match(char expected) {
+        if (isAtEnd()) return false;
+        if (source.charAt(current) != expected) return false;
+
+        current++;
+        return true;
+    }
+
+
+    // Advance the char pointer in the line scanner
+    private char advance() {
+        return source.charAt(current++);
+    }
+
+
+    // Add a token to the token List that does not have an object literal
+    // associated with it.
+    private void addToken(TokenType type) {
+        addToken(type, null);
+    }
+
+
+    // Add a token to the token List that has an object associated with it
+    private void addToken(TokenType type, Object literal) {
+        String text = source.substring(start, current);
+        tokens.add(new Token(type, text, literal, line));
+    }
 }
--- a/cobalt/TokenType.java
+++ b/cobalt/TokenType.java
@ -13,7 +13,7 @@ enum TokenType {
    IDENTIFIER, STRING, NUMBER,

    // Keywords
-    AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
+    AND, CLASS, ELSE, FALSE, FUNC, FOR, IF, NIL, OR,
    PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,

    EOF
--- a/lang_spec.txt
+++ b/lang_spec.txt
@ -0,0 +1,75 @@
+using cobalt.std;
+using cobalt.math;
+
+// Comments
+
+/*
+ Block Comments
+*/
+
+public class MyProgram {
+
+    // *** Instance Variables ***
+
+    // (Option 1)
+    // Since mutable variables are frequently used, you should have to specify if the variable is intended
+    // to be immutable/constant
+
+    let x: int = 0;             // Mutable type
+    let y: const int = 0;       // Immutable type
+
+
+    // (Option 2)
+    // Assume all variables are immutable, and only allow them to be mutable if specified. Helpful in compiled, memory
+    // safe languages, but probably not for an interpreted language that sits in memory?
+
+    let mut x: int = 0;         // Mutable type
+    let y: int = 0;             // Immutable type
+
+
+    // *** Main and Declaring functions ***
+    
+    // I think scripts should work similar to python, but not be as
+    // funky/verbose with function names
+
+    // If there is no main function defined within the script, the interpreter should
+    // process the file sequentially like Python,
+    // Otherwise, the main function is ran and operates like any normal program
+
+    // Best practice would have main return an integer, however it could be
+    // void or return any other type
+
+
+    // (Option 1, C/C++/Java style)
+    private int main1() {
+        // Some code
+        return 0;
+    }
+
+    // (Option 2, Swift/Rust style)
+    private func main() => int {
+        // Some code
+        return 0;
+    }
+
+    // (Option 3, Ada style)
+    private func main() returns int {
+        // Some code
+        return 0;
+    }
+
+    // *** Handling Multiple Main Methods ***
+
+    // I feel that classes should be able to have their own main methods, and it could be
+    // determined which one is the entry point by requiring the user to provide an entry point
+    // 
+    //  (Example)
+    //      -Multiple classes in one file, each with their own main method
+    //      -Specify the script along with the class when opening with Cobalt, and it will run that specific class
+    //      -Attempt to run the class's main method. Error if it doesn't exist
+    //      -If there are multiple mains and one isn't specific, just error
+    //
+    //      Ex: cobalt script.cblt --main MyClass
+    //
+    // This allows for multiple "sub programs" within a single Cobalt script
+}