Implement a grammar and lexer with Antlr4 capable of actually parsing an example

This of course comes with no actual implementation, but it at least provides a grammar to start with. There's likely some cleanup in the rules and edge cases that are missing, but this is a start.
2019-07-04 17:33:19 -07:00 · 2019-07-04 17:33:19 -07:00 · 0df23d721e
parent cc2a9fdedc
commit 0df23d721e
7 changed files with 808 additions and 253 deletions
--- a/32
+++ b/32
@ -1,22 +1,32 @@
+##
+# This Makefile provides the main development interface for working with Otto,
+# and helps organize the various tasks for preparation, compilation, and
+# testing.
+#
+# Execute `make` to get help ffor the various targets
+################################################################################

+# Set the PATH so we can automatically include our node binstubs
 export PATH:=./node_modules/.bin:${PATH}

 ANTLR_BIN=antlr-4.7.2-complete.jar
-DREDD=./node_modules/.bin/dredd
 ANTLR=contrib/$(ANTLR_BIN)
-GRAMMAR=Otto.g4
+GRAMMAR=Otto.g4 OttoLexer.g4
+
 ################################################################################
 ## Phony targets
 all: help

-build: ## Build all components
+build: depends ## Build all components
 	tsc

-check: ## Run validation tests
+check: depends build ## Run validation tests
+	#dredd
+	node parse-test.js

 swagger: depends ## Generate the swagger stubs based on apispecs

-depends: prereqs $(ANTLR) $(DREDD) ## Download all dependencies
+depends: prereqs $(ANTLR) ## Download all dependencies

 prereqs: scripts/prereqs.sh ## Check that this system has the necessary tools to build otto
 	@sh scripts/prereqs.sh
@ -24,14 +34,11 @@ prereqs: scripts/prereqs.sh ## Check that this system has the necessary tools to
 clean: ## Clean all temporary/working files
 	rm -f $(ANTLR)

-dredd: $(DREDD)
-	$(DREDD)
-
 parser: depends $(GRAMMAR) ## Generate the parser code
-	@for target in JavaScript Go Cpp; do \
+	@for target in Java JavaScript; do \
 		java -cp $(ANTLR) org.antlr.v4.Tool \
 			-Dlanguage=$$target \
-			-o build/$$target \
+			-o build/parser/$$target \
 			$(GRAMMAR); \
 		echo "--> Generated $$target stubs"; \
 	done;
@ -41,9 +48,6 @@ parser: depends $(GRAMMAR) ## Generate the parser code
 $(ANTLR): ## Download the latest ANTLR4 binary
 	(cd contrib && wget https://www.antlr.org/download/$(ANTLR_BIN))

-$(DREDD):
-	npm i dredd
-
 ################################################################################

 # Cute hack thanks to:
@ -51,4 +55,4 @@ $(DREDD):
 help: ## Display this help text
 	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'

-.PHONY: all build check clean depends
+.PHONY: all build check clean depends parser
--- a/Otto.g4
+++ b/Otto.g4
@ -1,4 +1,228 @@
-grammar Otto;
-r  : 'hello' ID ;         // match keyword hello followed by an identifier
-ID : [a-z]+ ;             // match lower-case identifiers
-WS : [ \t\r\n]+ -> skip ; // skip spaces, tabs, newlines
+parser grammar Otto;
+
+options {
+    tokenVocab=OttoLexer;
+}
+
+// Start rule to parse the .otto pipeline declaration
+pipeline
+    : use_block?
+    configure_block?
+    envs_block?
+    pipeline_block
+    ;
+
+/*
+ * The use {} block helps bring user defined libraries into scope for the
+ * runtime of the pipeline, but does not influence parse time
+ *
+ * Example:
+    use {
+      stdlib
+    }
+ *
+ */
+use_block
+    : USE BEGIN statements? END
+    ;
+
+/*
+ * The configure {} block allows the user to configure libraries or other
+ * pipeline-specific settings.
+ *
+ * Example:
+    configure {
+      slack {
+        channel = '#otto'
+      }
+    }
+ */
+configure_block
+    : CONFIGURE BEGIN setting_block+ END
+    ;
+
+/* The environments {} block allows the definition of logical environments for
+ * the pipeline to deliver into
+ *
+ * Example:
+    environments {
+      preprod {
+        settings {
+          HOSTNAME = "preprod-ottoapp.herokuapp.com"
+        }
+      }
+    }
+ */
+envs_block
+    : ENVIRONMENTS BEGIN env_block+ END
+    ;
+
+/*
+ * Handling an identified environment block.
+ *
+ * This block is typically responsible for configuring a single target
+ * environment for the delivery of this pipeline.
+ *
+ * Example:
+    preprod {
+      settings {
+        HOSTNAME = "preprod-ottoapp.herokuapp.com"
+      }
+    }
+ */
+env_block
+    : ID BEGIN settings_block? END
+    ;
+settings_block
+    :  SETTINGS BEGIN settings? END
+    ;
+
+/*
+ * Set settings for an identified subcomponent
+ *
+ * Example:
+      slack {
+        channel = '#otto'
+      }
+ *
+ * The identified subcomponent is not expected to be known at parse time, but
+ * should be looked up when the parsed .otto file has been processed to ensure
+ * that a corresponding subcomponent is available
+ */
+setting_block
+    : ID BEGIN settings? END
+    ;
+settings
+    : setting+
+    ;
+setting
+    : ID ASSIGN (StringLiteral | array)
+    ;
+array
+    : ARRAY_START (StringLiteral COMMA?)+ ARRAY_END
+    ;
+
+
+
+/*
+ * The pipeline {} block contains the main execution definition of the
+ * pipeline. Roughly modeled after the Jenkins Pipeline declarative syntax.
+ */
+pipeline_block
+    : PIPELINE BEGIN stages_block END
+    ;
+
+stages_block
+    : STAGES BEGIN stages+ END
+    ;
+stages
+    : STAGE OPEN StringLiteral CLOSE BEGIN stageStatements* END
+    ;
+
+
+stageStatements
+    : steps
+    | runtime
+    | cache
+    | when
+    | deployExpr
+    | notify
+    | feedback
+    | before
+    // And finally, allow nesting our stages!
+    | stages+
+    ;
+steps
+    : STEPS BEGIN statements+ END
+    ;
+cache
+    : CACHE BEGIN
+        (
+        (setting+)
+        | fromExpr
+        )
+     END
+    ;
+runtime
+    : RUNTIME BEGIN 
+        (
+        setting_block
+        | fromExpr
+        )
+    END
+    ;
+when
+    : WHEN BEGIN whenExpr* END
+    ;
+whenExpr
+    : (BRANCH EQUALS StringLiteral)
+    | fromExpr
+    ;
+
+/*
+ * A "deployment expression" signifies that the output of the given context
+ * will result in binaries or some form of delivery to the environment being
+ * pointed to
+ */
+deployExpr
+    : ENVIRONMENT TO ID
+    ;
+
+
+notify
+    : NOTIFY BEGIN
+        (
+        (SUCCESS | FAILURE | COMPLETE)
+        BEGIN
+        statements+
+        END
+        )+
+    END
+    ;
+
+feedback
+    : FEEDBACK BEGIN
+        (
+        statements
+        | setting_block
+        )+
+    END
+    ;
+
+before
+    : BEFORE BEGIN statements+ END
+    ;
+
+
+/*
+ * A "from" expression is a shorthand in the syntax for coping the contents of
+ * another block of "this" type, from another stage or location
+ *
+ * For exmaple, if one stage in the pipeline has a `cache` configuration
+ * defined, a later stage can use: cache { from 'StageA' } to copy the settings
+ * over verbatim
+ */
+fromExpr
+    : FROM StringLiteral
+    ;
+
+statements
+    : statement+
+    ;
+statement
+    : keyword
+    | step
+    | StringLiteral
+    ;
+
+step
+    : ID StringLiteral
+    ;
+
+/*
+ * Keywords are expected to be semantically important after parse time and
+ * effectively represent reserved words in the .otto language
+ */
+keyword
+    : STDLIB
+    ;
--- a/OttoLexer.g4
+++ b/OttoLexer.g4
@ -0,0 +1,113 @@
+lexer grammar OttoLexer;
+
+USE : 'use';
+CONFIGURE : 'configure';
+ENVIRONMENTS : 'environments';
+ENVIRONMENT : 'environment';
+SETTINGS : 'settings';
+PIPELINE : 'pipeline';
+STAGES : 'stages';
+STAGE : 'stage';
+STEPS : 'steps';
+CACHE : 'cache';
+RUNTIME : 'runtime';
+
+NOTIFY : 'notify';
+SUCCESS : 'success';
+FAILURE : 'failure';
+COMPLETE : 'complete';
+
+
+FEEDBACK : 'feedback';
+BEFORE : 'before';
+
+WHEN : 'when';
+BRANCH : 'branch';
+EQUALS : '==';
+
+/*
+ * The "to" token helps signify the output of the current context going "to" a
+ * designated environment
+ */
+TO : '->';
+
+FROM : 'from';
+
+// Keyword tokens
+STDLIB: 'stdlib';
+
+// Begin block
+BEGIN : '{';
+// End block
+END : '}';
+OPEN : '(';
+CLOSE : ')';
+ARRAY_START : '[';
+ARRAY_END : ']';
+COMMA : ',';
+ASSIGN : '=';
+
+
+StringLiteral: ('"' DoubleStringCharacter* '"'
+             |  '\'' SingleStringCharacter* '\'')
+;
+
+fragment DoubleStringCharacter
+    : ~["\\\r\n]
+    | '\\' EscapeSequence
+    | LineContinuation
+    ;
+fragment SingleStringCharacter
+    : ~['\\\r\n]
+    | '\\' EscapeSequence
+    | LineContinuation
+    ;
+fragment EscapeSequence
+    : CharacterEscapeSequence
+    | '0' // no digit ahead! TODO
+    | HexEscapeSequence
+    | UnicodeEscapeSequence
+    | ExtendedUnicodeEscapeSequence
+    ;
+
+fragment CharacterEscapeSequence
+    : SingleEscapeCharacter
+    | NonEscapeCharacter
+    ;
+fragment HexEscapeSequence
+    : 'x' HexDigit HexDigit
+    ;
+fragment UnicodeEscapeSequence
+    : 'u' HexDigit HexDigit HexDigit HexDigit
+    ;
+fragment ExtendedUnicodeEscapeSequence
+    : 'u' '{' HexDigit+ '}'
+    ;
+
+fragment HexDigit
+    : [0-9a-fA-F]
+    ;
+fragment SingleEscapeCharacter
+    : ['"\\bfnrtv]
+;
+fragment NonEscapeCharacter
+    : ~['"\\bfnrtv0-9xu\r\n]
+    ;
+fragment EscapeCharacter
+    : SingleEscapeCharacter
+    | [0-9]
+    | [xu]
+    ;
+
+fragment LineContinuation
+    : '\\' [\r\n\u2028\u2029]
+    ;
+
+
+
+ID : [a-zA-Z_]+ ;
+
+// skip spaces, tabs, newlines
+WS : [ \t\r\n]+ -> skip ;
+MultiLineComment:               '/*' .*? '*/'             -> channel(HIDDEN);
+SingleLineComment: '//' ~[\r\n\u2028\u2029]* -> channel(HIDDEN);
--- a/examples/webapp.otto
+++ b/examples/webapp.otto
@ -168,7 +168,7 @@ pipeline {
           * finished"
           */
          webhook {
-            description 'Pingdom health check'
+            description = 'Pingdom health check'
          }
        }
      }
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@ -18,16 +18,17 @@
    "cors": "^2.8.5",
    "dredd": "^11.2.9",
    "helmet": "^3.18.0",
-    "jest": "^24.8.0",
    "module-alias": "^2.2.0",
    "nodemon": "^1.19.1",
-    "ts-jest": "^24.0.2",
    "tslib": "^1.10.0",
    "typescript": "^3.5.2",
    "uuid": "^3.3.2",
    "winston": "^3.2.1"
  },
-  "devDependencies": {},
+  "devDependencies": {
+    "jest": "^24.8.0",
+    "ts-jest": "^24.0.2"
+  },
  "scripts": {
    "build": "tsc",
    "test": "jest"
--- a/parse-test.js
+++ b/parse-test.js
@ -0,0 +1,50 @@
+const fs = require('fs');
+const antlr = require('antlr4');
+
+const Lexer = require('./build/parser/JavaScript/OttoLexer').OttoLexer;
+const Parser = require('./build/parser/JavaScript/Otto').Otto;
+const OttoListener = require('./build/parser/JavaScript/OttoListener').OttoListener;
+
+const input = fs.readFileSync('./examples/webapp.otto', 'utf8');
+let chars = new antlr.InputStream(input);
+let lexer = new Lexer(chars);
+let tokens = new antlr.CommonTokenStream(lexer);
+let parser = new Parser(tokens);
+parser.buildParseTrees = true;
+let tree = parser.pipeline();
+
+class Visitor {
+  visitChildren(ctx) {
+    if (!ctx) {
+      console.log('noctx');
+      return;
+    }
+
+    if (ctx.children) {
+      return ctx.children.map(child => {
+        if (child.children && child.children.length != 0) {
+          return child.accept(this);
+        } else {
+          return child.getText();
+        }
+      });
+    }
+  }
+}
+
+class L extends OttoListener {
+  enterPipeline(ctx) {
+    console.log('entering pipeline');
+  }
+
+  enterConfigure_block(ctx) {
+    console.log('enter config');
+  }
+
+  enterUse_block(ctx) {
+    console.log('enter use');
+  }
+}
+
+tree.accept(new Visitor());
+//antlr.tree.ParseTreeWalker.DEFAULT.walk(new L(), tree);