Add initial language spec, Scanner almost complete

This commit is contained in:
Garrett Dickinson 2022-06-16 17:12:43 -05:00 committed by GitHub
parent 449ce575a7
commit 6d2f1ee907
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 287 additions and 1 deletions

View File

@ -13,11 +13,36 @@ class Scanner {
private int start = 0; private int start = 0;
private int current = 0; private int current = 0;
private int line = 0; private int line = 0;
private static final Map<String, TokenType> keywords;
static {
keywords = new HashMap<>();
keywords.put("and", AND);
keywords.put("class", ELSE);
keywords.put("false", FALSE);
keywords.put("for", FOR);
keywords.put("func", FUNC);
keywords.put("if", IF);
keywords.put("nil", NIL);
keywords.put("or", OR);
keywords.put("print", PRINT);
keywords.put("return", RETURN);
keywords.put("super", SUPER);
keywords.put("this", THIS);
keywords.put("true", TRUE);
keywords.put("var", VAR);
keywords.put("while", WHILE);
}
// Constructor
// Set our source string to be the incoming character data from the
// input script
Scanner(String source) { Scanner(String source) {
this.source = source; this.source = source;
} }
// Scan the input file for all available tokens, return a Token list with all
// of our valid tokens
List<Token> scanTokens() { List<Token> scanTokens() {
while (!isAtEnd()) { while (!isAtEnd()) {
start = current; start = current;
@ -28,7 +53,193 @@ class Scanner {
return tokens; return tokens;
} }
// Check to see if we have reached the end of the script
private boolean isAtEnd() { private boolean isAtEnd() {
return current >= source.length(); return current >= source.length();
} }
// Parse the current token from the scanner to see if its a valid
// lexeme. Report an error otherwise
private void scanToken() {
char c = advance();
switch (c) {
// Structural and Accessors
case '(': addToken(LEFT_PAREN); break;
case ')': addToken(RIGHT_PAREN); break;
case '{': addToken(LEFT_BRACE); break;
case '}': addToken(RIGHT_BRACE); break;
case ',': addToken(COMMA); break;
case '.': addToken(DOT); break;
case ';': addToken(SEMICOLON); break;
// Operators
case '*': addToken(STAR); break;
case '-': addToken(MINUS); break;
case '+': addToken(PLUS); break;
case '!':
addToken(match('=') ? BANG_EQUAL : BANG);
break;
case '=':
addToken(match('=') ? EQUAL_EQUAL : EQUAL);
break;
case '<':
addToken(match('=') ? LESS_EQUAL : LESS);
break;
case '>':
addToken(match('=') ? GREATER_EQUAL : GREATER);
break;
case '/':
if (match('/')) {
// A comment goes until the end of the line
while (peek() != '\n' && !isAtEnd()) advance();
} else {
addToken(SLASH);
}
break;
// Whitespace and new lines
case ' ':
case '\r':
case '\t':
//ignore whitespace characters
break;
case '\n':
line++;
break;
default:
if (isDigit(c)) {
// Check to see if our incoming value is part of a number
number();
} else if (isAlpha(c)) {
// Check to see if our incoming value is part of
// a reserved word or identifier
identifier();
} else {
Cobalt.error(line, "Unexpected character.");
}
break;
}
}
// Determine if the char is a base 10 digit
private boolean isDigit(char c) {
return c >= '0' && c <= '9';
}
private void number() {
while (!isDigit(peek())) advance();
// Look for a decimal place.
if (peek() == '.' && isDigit(peekNext())) {
// Consume the .
advance();
while (isDigit(peek())) advance();
}
}
// TODO: Lox spec supports multiline strings, we'll need to
// probably remove that support since I don't intend Cobalt's
// grammar to support that (maybe) :/
// TODO: Escape sequences are not supported atm, for the
// love of god please implement this functionality. Probably
// should make an enum for the valid escape sequences, parse them
// out like we do with operators, and inject the actual escape
// sequence in the object thats returned to the interpreter
// Process the input line if quotation marks are found
// and we have a string literal
private void string() {
while (peek() != '"' && !isAtEnd()) {
if (peek() == '\n') line++;
advance();
}
if (isAtEnd()) {
Cobalt.error(line, "Unterminated string.");
return;
}
// Get closing quotes
advance();
// Trim the
String value = source.substring(start + 1, current - 1);
addToken(STRING, value);
}
// Determine if the infoming token is alphanumeric, and
// add it to the Token list if it is valid
private void identifier() {
while (isAlphaNumeric(peek())) advance();
addToken(IDENTIFIER);
}
// Checkout the next character in our input, but dont consume it
// This is mainly to process things like comments that take an entire line
private char peek() {
if (isAtEnd()) return '\0';
return source.charAt(current);
}
// Checkout the next+1 character in our input, but dont consume it
// This is mainly to process things like comments that take an entire line
private char peekNext() {
if (current + 1 >= source.length()) return '\0';
return source.charAt(current + 1);
}
// Check to see if the character passed is within
// [a-z][A-Z]
private boolean isAlpha(char c) {
return (c >= 'a' && c <= 'z') ||
(c >= 'A' && c <= 'Z') ||
c == '_';
}
// Check to see if the character passed is within
// [a-z][A-Z][0-9]
private boolean isAlphaNumeric(char c) {
return isAlpha(c) || isDigit(c);
}
// Return a boolean based on if a char is found at the current cursor,
// then increment
private boolean match(char expected) {
if (isAtEnd()) return false;
if (source.charAt(current) != expected) return false;
current++;
return true;
}
// Advance the char pointer in the line scanner
private char advance() {
return source.charAt(current++);
}
// Add a token to the token List that does not have an object literal
// associated with it.
private void addToken(TokenType type) {
addToken(type, null);
}
// Add a token to the token List that has an object associated with it
private void addToken(TokenType type, Object literal) {
String text = source.substring(start, current);
tokens.add(new Token(type, text, literal, line));
}
} }

View File

@ -13,7 +13,7 @@ enum TokenType {
IDENTIFIER, STRING, NUMBER, IDENTIFIER, STRING, NUMBER,
// Keywords // Keywords
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR, AND, CLASS, ELSE, FALSE, FUNC, FOR, IF, NIL, OR,
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE, PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
EOF EOF

75
lang_spec.txt Normal file
View File

@ -0,0 +1,75 @@
using cobalt.std;
using cobalt.math;
// Comments
/*
Block Comments
*/
public class MyProgram {
// *** Instance Variables ***
// (Option 1)
// Since mutable variables are frequently used, you should have to specify if the variable is intended
// to be immutable/constant
let x: int = 0; // Mutable type
let y: const int = 0; // Immutable type
// (Option 2)
// Assume all variables are immutable, and only allow them to be mutable if specified. Helpful in compiled, memory
// safe languages, but probably not for an interpreted language that sits in memory?
let mut x: int = 0; // Mutable type
let y: int = 0; // Immutable type
// *** Main and Declaring functions ***
// I think scripts should work similar to python, but not be as
// funky/verbose with function names
// If there is no main function defined within the script, the interpreter should
// process the file sequentially like Python,
// Otherwise, the main function is ran and operates like any normal program
// Best practice would have main return an integer, however it could be
// void or return any other type
// (Option 1, C/C++/Java style)
private int main1() {
// Some code
return 0;
}
// (Option 2, Swift/Rust style)
private func main() => int {
// Some code
return 0;
}
// (Option 3, Ada style)
private func main() returns int {
// Some code
return 0;
}
// *** Handling Multiple Main Methods ***
// I feel that classes should be able to have their own main methods, and it could be
// determined which one is the entry point by requiring the user to provide an entry point
//
// (Example)
// -Multiple classes in one file, each with their own main method
// -Specify the script along with the class when opening with Cobalt, and it will run that specific class
// -Attempt to run the class's main method. Error if it doesn't exist
// -If there are multiple mains and one isn't specific, just error
//
// Ex: cobalt script.cblt --main MyClass
//
// This allows for multiple "sub programs" within a single Cobalt script
}