Add initial language spec, Scanner almost complete
This commit is contained in:
parent
449ce575a7
commit
6d2f1ee907
|
|
@ -13,11 +13,36 @@ class Scanner {
|
||||||
private int start = 0;
|
private int start = 0;
|
||||||
private int current = 0;
|
private int current = 0;
|
||||||
private int line = 0;
|
private int line = 0;
|
||||||
|
private static final Map<String, TokenType> keywords;
|
||||||
|
|
||||||
|
static {
|
||||||
|
keywords = new HashMap<>();
|
||||||
|
keywords.put("and", AND);
|
||||||
|
keywords.put("class", ELSE);
|
||||||
|
keywords.put("false", FALSE);
|
||||||
|
keywords.put("for", FOR);
|
||||||
|
keywords.put("func", FUNC);
|
||||||
|
keywords.put("if", IF);
|
||||||
|
keywords.put("nil", NIL);
|
||||||
|
keywords.put("or", OR);
|
||||||
|
keywords.put("print", PRINT);
|
||||||
|
keywords.put("return", RETURN);
|
||||||
|
keywords.put("super", SUPER);
|
||||||
|
keywords.put("this", THIS);
|
||||||
|
keywords.put("true", TRUE);
|
||||||
|
keywords.put("var", VAR);
|
||||||
|
keywords.put("while", WHILE);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Constructor
|
||||||
|
// Set our source string to be the incoming character data from the
|
||||||
|
// input script
|
||||||
Scanner(String source) {
|
Scanner(String source) {
|
||||||
this.source = source;
|
this.source = source;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Scan the input file for all available tokens, return a Token list with all
|
||||||
|
// of our valid tokens
|
||||||
List<Token> scanTokens() {
|
List<Token> scanTokens() {
|
||||||
while (!isAtEnd()) {
|
while (!isAtEnd()) {
|
||||||
start = current;
|
start = current;
|
||||||
|
|
@ -28,7 +53,193 @@ class Scanner {
|
||||||
return tokens;
|
return tokens;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check to see if we have reached the end of the script
|
||||||
private boolean isAtEnd() {
|
private boolean isAtEnd() {
|
||||||
return current >= source.length();
|
return current >= source.length();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Parse the current token from the scanner to see if its a valid
|
||||||
|
// lexeme. Report an error otherwise
|
||||||
|
private void scanToken() {
|
||||||
|
char c = advance();
|
||||||
|
switch (c) {
|
||||||
|
// Structural and Accessors
|
||||||
|
case '(': addToken(LEFT_PAREN); break;
|
||||||
|
case ')': addToken(RIGHT_PAREN); break;
|
||||||
|
case '{': addToken(LEFT_BRACE); break;
|
||||||
|
case '}': addToken(RIGHT_BRACE); break;
|
||||||
|
case ',': addToken(COMMA); break;
|
||||||
|
case '.': addToken(DOT); break;
|
||||||
|
case ';': addToken(SEMICOLON); break;
|
||||||
|
|
||||||
|
// Operators
|
||||||
|
case '*': addToken(STAR); break;
|
||||||
|
case '-': addToken(MINUS); break;
|
||||||
|
case '+': addToken(PLUS); break;
|
||||||
|
case '!':
|
||||||
|
addToken(match('=') ? BANG_EQUAL : BANG);
|
||||||
|
break;
|
||||||
|
case '=':
|
||||||
|
addToken(match('=') ? EQUAL_EQUAL : EQUAL);
|
||||||
|
break;
|
||||||
|
case '<':
|
||||||
|
addToken(match('=') ? LESS_EQUAL : LESS);
|
||||||
|
break;
|
||||||
|
case '>':
|
||||||
|
addToken(match('=') ? GREATER_EQUAL : GREATER);
|
||||||
|
break;
|
||||||
|
case '/':
|
||||||
|
if (match('/')) {
|
||||||
|
// A comment goes until the end of the line
|
||||||
|
while (peek() != '\n' && !isAtEnd()) advance();
|
||||||
|
} else {
|
||||||
|
addToken(SLASH);
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
|
// Whitespace and new lines
|
||||||
|
case ' ':
|
||||||
|
case '\r':
|
||||||
|
case '\t':
|
||||||
|
//ignore whitespace characters
|
||||||
|
break;
|
||||||
|
case '\n':
|
||||||
|
line++;
|
||||||
|
break;
|
||||||
|
|
||||||
|
default:
|
||||||
|
if (isDigit(c)) {
|
||||||
|
// Check to see if our incoming value is part of a number
|
||||||
|
number();
|
||||||
|
} else if (isAlpha(c)) {
|
||||||
|
// Check to see if our incoming value is part of
|
||||||
|
// a reserved word or identifier
|
||||||
|
identifier();
|
||||||
|
} else {
|
||||||
|
Cobalt.error(line, "Unexpected character.");
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Determine if the char is a base 10 digit
|
||||||
|
private boolean isDigit(char c) {
|
||||||
|
return c >= '0' && c <= '9';
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
private void number() {
|
||||||
|
while (!isDigit(peek())) advance();
|
||||||
|
|
||||||
|
// Look for a decimal place.
|
||||||
|
if (peek() == '.' && isDigit(peekNext())) {
|
||||||
|
// Consume the .
|
||||||
|
advance();
|
||||||
|
|
||||||
|
while (isDigit(peek())) advance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// TODO: Lox spec supports multiline strings, we'll need to
|
||||||
|
// probably remove that support since I don't intend Cobalt's
|
||||||
|
// grammar to support that (maybe) :/
|
||||||
|
|
||||||
|
// TODO: Escape sequences are not supported atm, for the
|
||||||
|
// love of god please implement this functionality. Probably
|
||||||
|
// should make an enum for the valid escape sequences, parse them
|
||||||
|
// out like we do with operators, and inject the actual escape
|
||||||
|
// sequence in the object thats returned to the interpreter
|
||||||
|
|
||||||
|
// Process the input line if quotation marks are found
|
||||||
|
// and we have a string literal
|
||||||
|
private void string() {
|
||||||
|
while (peek() != '"' && !isAtEnd()) {
|
||||||
|
if (peek() == '\n') line++;
|
||||||
|
advance();
|
||||||
|
}
|
||||||
|
|
||||||
|
if (isAtEnd()) {
|
||||||
|
Cobalt.error(line, "Unterminated string.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get closing quotes
|
||||||
|
advance();
|
||||||
|
|
||||||
|
// Trim the
|
||||||
|
String value = source.substring(start + 1, current - 1);
|
||||||
|
addToken(STRING, value);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Determine if the infoming token is alphanumeric, and
|
||||||
|
// add it to the Token list if it is valid
|
||||||
|
private void identifier() {
|
||||||
|
while (isAlphaNumeric(peek())) advance();
|
||||||
|
addToken(IDENTIFIER);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Checkout the next character in our input, but dont consume it
|
||||||
|
// This is mainly to process things like comments that take an entire line
|
||||||
|
private char peek() {
|
||||||
|
if (isAtEnd()) return '\0';
|
||||||
|
return source.charAt(current);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Checkout the next+1 character in our input, but dont consume it
|
||||||
|
// This is mainly to process things like comments that take an entire line
|
||||||
|
private char peekNext() {
|
||||||
|
if (current + 1 >= source.length()) return '\0';
|
||||||
|
return source.charAt(current + 1);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Check to see if the character passed is within
|
||||||
|
// [a-z][A-Z]
|
||||||
|
private boolean isAlpha(char c) {
|
||||||
|
return (c >= 'a' && c <= 'z') ||
|
||||||
|
(c >= 'A' && c <= 'Z') ||
|
||||||
|
c == '_';
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Check to see if the character passed is within
|
||||||
|
// [a-z][A-Z][0-9]
|
||||||
|
private boolean isAlphaNumeric(char c) {
|
||||||
|
return isAlpha(c) || isDigit(c);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Return a boolean based on if a char is found at the current cursor,
|
||||||
|
// then increment
|
||||||
|
private boolean match(char expected) {
|
||||||
|
if (isAtEnd()) return false;
|
||||||
|
if (source.charAt(current) != expected) return false;
|
||||||
|
|
||||||
|
current++;
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Advance the char pointer in the line scanner
|
||||||
|
private char advance() {
|
||||||
|
return source.charAt(current++);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Add a token to the token List that does not have an object literal
|
||||||
|
// associated with it.
|
||||||
|
private void addToken(TokenType type) {
|
||||||
|
addToken(type, null);
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Add a token to the token List that has an object associated with it
|
||||||
|
private void addToken(TokenType type, Object literal) {
|
||||||
|
String text = source.substring(start, current);
|
||||||
|
tokens.add(new Token(type, text, literal, line));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -13,7 +13,7 @@ enum TokenType {
|
||||||
IDENTIFIER, STRING, NUMBER,
|
IDENTIFIER, STRING, NUMBER,
|
||||||
|
|
||||||
// Keywords
|
// Keywords
|
||||||
AND, CLASS, ELSE, FALSE, FUN, FOR, IF, NIL, OR,
|
AND, CLASS, ELSE, FALSE, FUNC, FOR, IF, NIL, OR,
|
||||||
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
|
PRINT, RETURN, SUPER, THIS, TRUE, VAR, WHILE,
|
||||||
|
|
||||||
EOF
|
EOF
|
||||||
|
|
|
||||||
75
lang_spec.txt
Normal file
75
lang_spec.txt
Normal file
|
|
@ -0,0 +1,75 @@
|
||||||
|
using cobalt.std;
|
||||||
|
using cobalt.math;
|
||||||
|
|
||||||
|
// Comments
|
||||||
|
|
||||||
|
/*
|
||||||
|
Block Comments
|
||||||
|
*/
|
||||||
|
|
||||||
|
public class MyProgram {
|
||||||
|
|
||||||
|
// *** Instance Variables ***
|
||||||
|
|
||||||
|
// (Option 1)
|
||||||
|
// Since mutable variables are frequently used, you should have to specify if the variable is intended
|
||||||
|
// to be immutable/constant
|
||||||
|
|
||||||
|
let x: int = 0; // Mutable type
|
||||||
|
let y: const int = 0; // Immutable type
|
||||||
|
|
||||||
|
|
||||||
|
// (Option 2)
|
||||||
|
// Assume all variables are immutable, and only allow them to be mutable if specified. Helpful in compiled, memory
|
||||||
|
// safe languages, but probably not for an interpreted language that sits in memory?
|
||||||
|
|
||||||
|
let mut x: int = 0; // Mutable type
|
||||||
|
let y: int = 0; // Immutable type
|
||||||
|
|
||||||
|
|
||||||
|
// *** Main and Declaring functions ***
|
||||||
|
|
||||||
|
// I think scripts should work similar to python, but not be as
|
||||||
|
// funky/verbose with function names
|
||||||
|
|
||||||
|
// If there is no main function defined within the script, the interpreter should
|
||||||
|
// process the file sequentially like Python,
|
||||||
|
// Otherwise, the main function is ran and operates like any normal program
|
||||||
|
|
||||||
|
// Best practice would have main return an integer, however it could be
|
||||||
|
// void or return any other type
|
||||||
|
|
||||||
|
|
||||||
|
// (Option 1, C/C++/Java style)
|
||||||
|
private int main1() {
|
||||||
|
// Some code
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (Option 2, Swift/Rust style)
|
||||||
|
private func main() => int {
|
||||||
|
// Some code
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// (Option 3, Ada style)
|
||||||
|
private func main() returns int {
|
||||||
|
// Some code
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
// *** Handling Multiple Main Methods ***
|
||||||
|
|
||||||
|
// I feel that classes should be able to have their own main methods, and it could be
|
||||||
|
// determined which one is the entry point by requiring the user to provide an entry point
|
||||||
|
//
|
||||||
|
// (Example)
|
||||||
|
// -Multiple classes in one file, each with their own main method
|
||||||
|
// -Specify the script along with the class when opening with Cobalt, and it will run that specific class
|
||||||
|
// -Attempt to run the class's main method. Error if it doesn't exist
|
||||||
|
// -If there are multiple mains and one isn't specific, just error
|
||||||
|
//
|
||||||
|
// Ex: cobalt script.cblt --main MyClass
|
||||||
|
//
|
||||||
|
// This allows for multiple "sub programs" within a single Cobalt script
|
||||||
|
}
|
||||||
Loading…
Reference in New Issue
Block a user