1. #include <cstdio>
  2. #include <cstdlib>
  3. #include <map>
  4. #include <string>
  5. #include <vector>
  6.  
  7. //===----------------------------------------------------------------------===//
  8. // Lexer
  9. //===----------------------------------------------------------------------===//
  10.  
  11. // The lexer returns tokens [0-255] if it is an unknown character, otherwise one
  12. // of these for known things.
  13. enum Token {
  14. tok_eof = -1,
  15.  
  16. // commands
  17. tok_def = -2, tok_extern = -3,
  18.  
  19. // primary
  20. tok_identifier = -4, tok_number = -5
  21. };
  22.  
  23. static std::string IdentifierStr; // Filled in if tok_identifier
  24. static double NumVal; // Filled in if tok_number
  25.  
  26. /// gettok - Return the next token from standard input.
  27. static int gettok() {
  28. static int LastChar = ' ';
  29.  
  30. // Skip any whitespace.
  31. while (isspace(LastChar))
  32. LastChar = getchar();
  33.  
  34. if (isalpha(LastChar)) { // identifier: [a-zA-Z][a-zA-Z0-9]*
  35. IdentifierStr = LastChar;
  36. while (isalnum((LastChar = getchar())))
  37. IdentifierStr += LastChar;
  38.  
  39. if (IdentifierStr == "def") return tok_def;
  40. if (IdentifierStr == "extern") return tok_extern;
  41. return tok_identifier;
  42. }
  43.  
  44. if (isdigit(LastChar) || LastChar == '.') { // Number: [0-9.]+
  45. std::string NumStr;
  46. do {
  47. NumStr += LastChar;
  48. LastChar = getchar();
  49. } while (isdigit(LastChar) || LastChar == '.');
  50.  
  51. NumVal = strtod(NumStr.c_str(), 0);
  52. return tok_number;
  53. }
  54.  
  55. if (LastChar == '#') {
  56. // Comment until end of line.
  57. do LastChar = getchar();
  58. while (LastChar != EOF && LastChar != '\n' && LastChar != '\r');
  59.  
  60. if (LastChar != EOF)
  61. return gettok();
  62. }
  63.  
  64. // Check for end of file. Don't eat the EOF.
  65. if (LastChar == EOF)
  66. return tok_eof;
  67.  
  68. // Otherwise, just return the character as its ascii value.
  69. int ThisChar = LastChar;
  70. LastChar = getchar();
  71. return ThisChar;
  72. }
  73.  
  74. //===----------------------------------------------------------------------===//
  75. // Abstract Syntax Tree (aka Parse Tree)
  76. //===----------------------------------------------------------------------===//
  77. namespace {
  78. /// ExprAST - Base class for all expression nodes.
  79. class ExprAST {
  80. public:
  81. virtual ~ExprAST() {}
  82. };
  83.  
  84. /// NumberExprAST - Expression class for numeric literals like "1.0".
  85. class NumberExprAST : public ExprAST {
  86. public:
  87. NumberExprAST(double val) {}
  88. };
  89.  
  90. /// VariableExprAST - Expression class for referencing a variable, like "a".
  91. class VariableExprAST : public ExprAST {
  92. std::string Name;
  93. public:
  94. VariableExprAST(const std::string &name) : Name(name) {}
  95. };
  96.  
  97. /// BinaryExprAST - Expression class for a binary operator.
  98. class BinaryExprAST : public ExprAST {
  99. public:
  100. BinaryExprAST(char op, ExprAST *lhs, ExprAST *rhs) {}
  101. };
  102.  
  103. /// CallExprAST - Expression class for function calls.
  104. class CallExprAST : public ExprAST {
  105. std::string Callee;
  106. std::vector<ExprAST*> Args;
  107. public:
  108. CallExprAST(const std::string &callee, std::vector<ExprAST*> &args)
  109. : Callee(callee), Args(args) {}
  110. };
  111.  
  112. /// PrototypeAST - This class represents the "prototype" for a function,
  113. /// which captures its name, and its argument names (thus implicitly the number
  114. /// of arguments the function takes).
  115. class PrototypeAST {
  116. std::string Name;
  117. std::vector<std::string> Args;
  118. public:
  119. PrototypeAST(const std::string &name, const std::vector<std::string> &args)
  120. : Name(name), Args(args) {}
  121.  
  122. };
  123.  
  124. /// FunctionAST - This class represents a function definition itself.
  125. class FunctionAST {
  126. public:
  127. FunctionAST(PrototypeAST *proto, ExprAST *body) {}
  128. };
  129. } // end anonymous namespace
  130. //===----------------------------------------------------------------------===//
  131. // Parser
  132. //===----------------------------------------------------------------------===//
  133.  
  134. /// CurTok/getNextToken - Provide a simple token buffer. CurTok is the current
  135. /// token the parser is looking at. getNextToken reads another token from the
  136. /// lexer and updates CurTok with its results.
  137. static int CurTok;
  138. static int getNextToken() {
  139. return CurTok = gettok();
  140. }
  141.  
  142. /// BinopPrecedence - This holds the precedence for each binary operator that is
  143. /// defined.
  144. static std::map<char, int> BinopPrecedence;
  145.  
  146. /// GetTokPrecedence - Get the precedence of the pending binary operator token.
  147. static int GetTokPrecedence() {
  148. if (!isascii(CurTok))
  149. return -1;
  150.  
  151. // Make sure it's a declared binop.
  152. int TokPrec = BinopPrecedence[CurTok];
  153. if (TokPrec <= 0) return -1;
  154. return TokPrec;
  155. }
  156.  
  157. /// Error* - These are little helper functions for error handling.
  158. ExprAST *Error(const char *Str) { fprintf(stderr, "Error: %s\n", Str);return 0;}
  159. PrototypeAST *ErrorP(const char *Str) { Error(Str); return 0; }
  160.  
  161. static ExprAST *ParseExpression();
  162.  
  163. /// identifierexpr
  164. /// ::= identifier
  165. /// ::= identifier '(' expression* ')'
  166. static ExprAST *ParseIdentifierExpr() {
  167. std::string IdName = IdentifierStr;
  168.  
  169. getNextToken(); // eat identifier.
  170.  
  171. if (CurTok != '(') // Simple variable ref.
  172. return new VariableExprAST(IdName);
  173.  
  174. // Call.
  175. getNextToken(); // eat (
  176. std::vector<ExprAST*> Args;
  177. if (CurTok != ')') {
  178. while (1) {
  179. ExprAST *Arg = ParseExpression();
  180. if (!Arg) return 0;
  181. Args.push_back(Arg);
  182.  
  183. if (CurTok == ')') break;
  184.  
  185. if (CurTok != ',')
  186. return Error("Expected ')' or ',' in argument list");
  187. getNextToken();
  188. }
  189. }
  190.  
  191. // Eat the ')'.
  192. getNextToken();
  193.  
  194. return new CallExprAST(IdName, Args);
  195. }
  196.  
  197. /// numberexpr ::= number
  198. static ExprAST *ParseNumberExpr() {
  199. ExprAST *Result = new NumberExprAST(NumVal);
  200. getNextToken(); // consume the number
  201. return Result;
  202. }
  203.  
  204. /// parenexpr ::= '(' expression ')'
  205. static ExprAST *ParseParenExpr() {
  206. getNextToken(); // eat (.
  207. ExprAST *V = ParseExpression();
  208. if (!V) return 0;
  209.  
  210. if (CurTok != ')')
  211. return Error("expected ')'");
  212. getNextToken(); // eat ).
  213. return V;
  214. }
  215.  
  216. /// primary
  217. /// ::= identifierexpr
  218. /// ::= numberexpr
  219. /// ::= parenexpr
  220. static ExprAST *ParsePrimary() {
  221. switch (CurTok) {
  222. default: return Error("unknown token when expecting an expression");
  223. case tok_identifier: return ParseIdentifierExpr();
  224. case tok_number: return ParseNumberExpr();
  225. case '(': return ParseParenExpr();
  226. }
  227. }
  228.  
  229. /// binoprhs
  230. /// ::= ('+' primary)*
  231. static ExprAST *ParseBinOpRHS(int ExprPrec, ExprAST *LHS) {
  232. // If this is a binop, find its precedence.
  233. while (1) {
  234. int TokPrec = GetTokPrecedence();
  235.  
  236. // If this is a binop that binds at least as tightly as the current binop,
  237. // consume it, otherwise we are done.
  238. if (TokPrec < ExprPrec)
  239. return LHS;
  240.  
  241. // Okay, we know this is a binop.
  242. int BinOp = CurTok;
  243. getNextToken(); // eat binop
  244.  
  245. // Parse the primary expression after the binary operator.
  246. ExprAST *RHS = ParsePrimary();
  247. if (!RHS) return 0;
  248.  
  249. // If BinOp binds less tightly with RHS than the operator after RHS, let
  250. // the pending operator take RHS as its LHS.
  251. int NextPrec = GetTokPrecedence();
  252. if (TokPrec < NextPrec) {
  253. RHS = ParseBinOpRHS(TokPrec+1, RHS);
  254. if (RHS == 0) return 0;
  255. }
  256.  
  257. // Merge LHS/RHS.
  258. LHS = new BinaryExprAST(BinOp, LHS, RHS);
  259. }
  260.  
  261. /// expression
  262. /// ::= primary binoprhs
  263. ///
  264. static ExprAST *ParseExpression() {
  265. ExprAST *LHS = ParsePrimary();
  266. if (!LHS) return 0;
  267.  
  268. return ParseBinOpRHS(0, LHS);
  269. }
  270.  
  271. /// prototype
  272. /// ::= id '(' id* ')'
  273. static PrototypeAST *ParsePrototype() {
  274. if (CurTok != tok_identifier)
  275. return ErrorP("Expected function name in prototype");
  276.  
  277. std::string FnName = IdentifierStr;
  278. getNextToken();
  279.  
  280. if (CurTok != '(')
  281. return ErrorP("Expected '(' in prototype");
  282.  
  283. std::vector<std::string> ArgNames;
  284. while (getNextToken() == tok_identifier)
  285. ArgNames.push_back(IdentifierStr);
  286. if (CurTok != ')')
  287. return ErrorP("Expected ')' in prototype");
  288.  
  289. // success.
  290. getNextToken(); // eat ')'.
  291.  
  292. return new PrototypeAST(FnName, ArgNames);
  293. }
  294.  
  295. /// definition ::= 'def' prototype expression
  296. static FunctionAST *ParseDefinition() {
  297. getNextToken(); // eat def.
  298. PrototypeAST *Proto = ParsePrototype();
  299. if (Proto == 0) return 0;
  300.  
  301. if (ExprAST *E = ParseExpression())
  302. return new FunctionAST(Proto, E);
  303. return 0;
  304. }
  305.  
  306. /// toplevelexpr ::= expression
  307. static FunctionAST *ParseTopLevelExpr() {
  308. if (ExprAST *E = ParseExpression()) {
  309. // Make an anonymous proto.
  310. PrototypeAST *Proto = new PrototypeAST("", std::vector<std::string>());
  311. return new FunctionAST(Proto, E);
  312. }
  313. return 0;
  314. }
  315.  
  316. /// external ::= 'extern' prototype
  317. static PrototypeAST *ParseExtern() {
  318. getNextToken(); // eat extern.
  319. return ParsePrototype();
  320. }
  321.  
  322. //===----------------------------------------------------------------------===//
  323. // Top-Level parsing
  324. //===----------------------------------------------------------------------===//
  325.  
  326. static void HandleDefinition() {
  327. if (ParseDefinition()) {
  328. fprintf(stderr, "Parsed a function definition.\n");
  329. } else {
  330. // Skip token for error recovery.
  331. getNextToken();
  332. }
  333. }
  334.  
  335. static void HandleExtern() {
  336. if (ParseExtern()) {
  337. fprintf(stderr, "Parsed an extern\n");
  338. } else {
  339. // Skip token for error recovery.
  340. getNextToken();
  341. }
  342. }
  343.  
  344. static void HandleTopLevelExpression() {
  345. // Evaluate a top-level expression into an anonymous function.
  346. if (ParseTopLevelExpr()) {
  347. fprintf(stderr, "Parsed a top-level expr\n");
  348. } else {
  349. // Skip token for error recovery.
  350. getNextToken();
  351. }
  352. }
  353.  
  354. /// top ::= definition | external | expression | ';'
  355. static void MainLoop() {
  356. while (1) {
  357. fprintf(stderr, "ready> ");
  358. switch (CurTok) {
  359. case tok_eof: return;
  360. case ';': getNextToken(); break; // ignore top-level semicolons.
  361. case tok_def: HandleDefinition(); break;
  362. case tok_extern: HandleExtern(); break;
  363. default: HandleTopLevelExpression(); break;
  364. }
  365. }
  366. }
  367.  
  368. //===----------------------------------------------------------------------===//
  369. // Main driver code.
  370. //===----------------------------------------------------------------------===//
  371.  
  372. int main() {
  373. // Install standard binary operators.
  374. // 1 is lowest precedence.
  375. BinopPrecedence['<'] = 10;
  376. BinopPrecedence['+'] = 20;
  377. BinopPrecedence['-'] = 20;
  378. BinopPrecedence['*'] = 40; // highest.
  379.  
  380. // Prime the first token.
  381. fprintf(stderr, "ready> ");
  382. getNextToken();
  383.  
  384. // Run the main "interpreter loop" now.
  385. MainLoop();
  386.  
  387. return 0;