I am currently trying to make my own little programming language for the first time. I am, as of now, creating the basic lexer. I am trying to allow floating point values in my code, but alas, it gets split into an identifier and a float value (see output below)
Question: How do I fix my lexer to properly handle floats?
Output:let value = 7.24const pi = 3.14
Code:
function lexer(input) { const tokens = []; const keywords = new Set(['let', 'var', 'const', 'def', 'float', 'floater', 'double', 'int', 'integer', 'bool', 'boolean', 'string', 'char']); const alphaNumericRegex = /[a-zA-Z0-9]/; const digitRegex = /\d/; let current = 0; let length = input.length; while (current < length) { let char = input[current]; if (char === '' || char === '\n') { current++; continue; } if (alphaNumericRegex.test(char)) { let wordStart = current; while (alphaNumericRegex.test(char)) { char = input[++current]; } let word = input.slice(wordStart, current); if (keywords.has(word)) { tokens.push({ type: 'keyword', value: word }); } else { tokens.push({ type: 'identifier', value: word }); } continue; } if (digitRegex.test(char) || char === '.') { let numStart = current; let hasDecimal = false; while (digitRegex.test(char) || (!hasDecimal && char === '.')) { if (char === '.') { hasDecimal = true; } char = input[++current]; } let numStr = input.slice(numStart, current); let num = parseFloat(numStr); if (isNaN(num)) { throw new SyntaxError("Invalid number"); } if (Number.isInteger(num)) { tokens.push({ type: 'number', value: num }); } else { tokens.push({ type: 'number', value: parseFloat(numStr) }); } continue; } if (char === '"') { let strStart = ++current; while (input[current] !== '"') { if (++current >= length) throw new SyntaxError("Unterminated string literal"); } let str = input.slice(strStart, current++); tokens.push({ type: 'string', value: str }); continue; } if (char === "'") { let charValue = input[++current]; if (input[++current] === "'") { tokens.push({ type: 'char', value: charValue }); current++; } else { throw new SyntaxError("Invalid character literal"); } continue; } if (char === '=') { tokens.push({ type: 'assign' }); current++; continue; } if (char === ';') { tokens.push({ type: 'semicolon' }); current++; continue; } if (char === '.') { tokens.push({ type: 'dot' }); current++; continue; } current++; } return tokens;}const code = `let value = 7.24;var count = 5;const pi = 3.14;bool isTrue = true;string message = "Hello";char initial = 'A';`;console.log(JSON.stringify(lexer(code), null, 2));
I'm still somewhat new to JavaScript and completely new to lexers, so I tried getting help from ai tools such as ChatGPT and AskCodi. They attempted to fix the problem and any changes I made from their recommendations made no difference.