import { Lexer, Token } from "@helpers/Lexer";

type TokenType =
	| "number"
	| "operator"
	| "whitespace"
	| "string"
	| "start quote"
	| "end quote"
	| "bracket"
	| "operand"
	| "range-operand"
	| "column-operand"
	| "comma"
	| "function-name"
	| "reference-name"
	| "boolean"
	| "error";

/**
 * This is a lexer for spreadsheet formulas. It takes a string and returns an array of tokens.
 *
 * See: https://andrewstevens.dev/posts/formula-parser-in-javascript/
 *
 * Example:
 *
 * Input "=AVERAGE(A1:A5)"
 * 1: sytaxTokenizer("AVERAGE(A1:A5)") => {type: "function-name", value: "AVERAGE"}
 * 2: sytaxTokenizer("(A1:A5)") => {type: "bracket", value: "("}
 * 3: sytaxTokenizer("A1:A5)") => {type: "range-operand", value: "A1:A5"}
 * 4: sytaxTokenizer(")") => {type: "bracket", value: ")"}
 * 5: Done
 * returns [
 *   {type: "function-name", value: "AVERAGE"},
 *   {type: "bracket", value: "("},
 *   {type: "range-operand", value: "A6:A10"},
 *   {type: "bracket", value: ")"}
 * ]
 */
export const LexFormula = (formula: string) => {
	// Remove the leading "="
	if (formula?.[0] === "=") {
		formula = formula.substring(1);
	}
	return formulaLexer(formula);
};

export const TokenTypes = {
	NUMBER: "number",
	OPERATOR: "operator",
	WHITESPACE: "whitespace",
	STRING: "string",
	START_QUOTE: "start quote",
	END_QUOTE: "end quote",
	BRACKET: "bracket",
	OPERAND: "operand",
	RANGE_OPERAND: "range-operand",
	COLUMN_OPERAND: "column-operand",
	COMMA: "comma",
	FUNCTION_NAME: "function-name",
	REFERENCE_NAME: "reference-name",
	BOOLEAN: "boolean",
	ERROR: "error",
};

export const isOperand = (type: TokenType) => type && type.includes("operand");

export type FormulaToken = Token<TokenType>;

const formulaLexer = Lexer<TokenType>((stream) => {
	const peek = stream.peek() as string;
	const previousToken = stream.getPreviousToken();
	const previousTokenType = previousToken ? previousToken.type : null;

	// handle double quotes
	if (peek === '"') {
		stream.next();
		if (
			previousTokenType === "string" ||
			previousTokenType === "start quote"
		) {
			return "end quote";
		}
		return "start quote";
	}

	// handle strings inside of quotes
	if (
		peek !== '"' &&
		previousTokenType === "start quote" &&
		previousToken?.value === '"'
	) {
		if (stream.match(/^[^"]+(?=")/, true)) {
			return "string";
		} else {
			// didn't find end quote so select all the way to the end
			stream.match(/^[^"]+/, true);
			return "string";
		}
	}

	// handle numbers
	if (stream.match(/^[-]?\d*\.?\d+/, false)) {
		if (
			peek === "-" &&
			previousTokenType !== "operator" &&
			tokenIsValue(previousToken)
		) {
			// if this number is starting with a minus and there is no previous operator, then we need to be treating this as an operator instead
			stream.next();
			return "operator";
		}
		stream.match(/^[-]?\d*\.?\d+/, true);
		return "number";
	}

	// handle ranges
	if (stream.match(/^[a-zA-Z]\d+:[a-zA-Z]\d+/, true)) {
		return "range-operand";
	}

	// handle operands
	if (stream.match(/^[a-zA-Z]\d+/, true)) {
		return "operand";
	}

	// handle operators
	if (["&", "*", "-", "+", "/", ">", "<", "^"].indexOf(peek) > -1) {
		stream.next();
		return "operator";
	}

	// handle references
	if (
		previousTokenType === "bracket" &&
		previousToken?.value === "[" &&
		stream.match(/^[^[\]]+(?=\])/, true)
	) {
		return "reference-name";
	}

	// handle functions
	if (stream.match(/^[a-zA-Z_]\w*(?=\()/, true)) {
		return "function-name";
	}

	// handle brackets
	if ([")", "]", "(", "["].indexOf(peek) > -1) {
		stream.next();
		return "bracket";
	}

	// handle comma
	if (peek === ",") {
		stream.next();
		return "comma";
	}

	// handle whitespace
	if (stream.match(/^ +/, true)) {
		return "whitespace";
	}

	// handle boolean
	if (stream.match(/^(?:true|false|TRUE|FALSE)/, true)) {
		return "boolean";
	}

	// handle full column
	if (stream.match(/^[a-zA-Z][^a-zA-Z]*/, true)) {
		return "column-operand";
	}

	// mark anything else as an error
	stream.next();
	return "error";
});

function tokenIsValue(token: FormulaToken | null) {
	if (!token) {
		return false;
	}
	if (token.type === "number" || token.type === "end quote") {
		return true;
	}
	if (
		token.type === "bracket" &&
		(token.value === ")" || token.value === "]")
	) {
		return true;
	}
	return false;
}
