Language & Syntax

The ETK assembly language takes inspiration from NASM and other similar assemblers, but has its own particular flavor.

Syntax

Friendly Example

This example should increment a value from 0 to 255 on the stack, then halt execution.


#![allow(unused)]
fn main() {
extern crate etk_asm;
let src = r#"
push1 0x00

loop:
    jumpdest
    push1 0x01
    add
    dup1
    push1 0xFF
    gt
    push1 loop
    jumpi

pop
stop                # This halts execution
"#;
let mut ingest = etk_asm::ingest::Ingest::new(Vec::new());
ingest.ingest(file!(), src).unwrap();
}

The first line—push1 0x00—describes a push instruction of length one, with a value of 0. When assembled, this line would become 0x6000.

Next, we have loop:, which introduces a label named loop. Labels can be used as arguments to push instructions, usually for jumps or subroutines.

Finally, we have # This halts execution, which is a comment. Comments are introduced with # and continue to the end of the line. Comments are ignored as far as the assembler is concerned.

There are a couple other features, like macros, which will be covered in later chapters.

Formal Syntax

For the language nerds, the ETK assembly language syntax is defined by the following Pest grammar:

///////////////////////
// program structure //
///////////////////////
program = _{ SOI ~ inner ~ EOI }
inner = _{ NEWLINE* ~ (stmt ~ (NEWLINE+|";"))* ~ stmt? }
stmt = _{ label_definition | builtin | local_macro | push | op }

//////////////////////
// opcode mnemonics //
//////////////////////
op = @{
	"origin" | "stop" | "mulmod" | "mul" | "sub" | "div" | "sdiv" | "mod" | "smod" |
	"addmod" | "exp" | "signextend" | "lt" | "gt" | "slt" |
	"sgt" | "eq" | "iszero" | "and" | "or" | "xor" | "not" | "shl" | "shr" |
	"sar" | "keccak256" | "address" | "add" | "balance" | "caller" |
	"callvalue" | "calldataload" | "calldatasize" | "calldatacopy" |
	"codesize" | "codecopy" | "gasprice" | "extcodesize" | "extcodecopy" |
	"returndatasize" | "returndatacopy" | "extcodehash" | "blockhash" |
	"coinbase" | "timestamp" | "number" | "difficulty" | "gaslimit" |
	"pop" | "mload" | "mstore8" | "mstore" | "sload" | "sstore" | "jumpdest" |
	"jumpi" | "jump" | "pc" | "msize" | "gas" | swap | dup | log |
	"create2" | "callcode" | "call" | "return" | "delegatecall" | "create" |
	"staticcall" | "revert" | "selfdestruct" | "byte" | "chainid" | "selfbalance" |
	"basefee" | "invalid"
}
push = ${ "push" ~  word_size ~ WHITESPACE ~ expression }
swap = @{ "swap" ~ half_word_size }
dup  = @{ "dup" ~ half_word_size }
log = @{ "log" ~ '0'..'4' }

word_size = @{ ('1'..'2' ~ '0'..'9') | ("3" ~ '0'..'2') | '1'..'9' }
half_word_size = @{ ("1" ~ '0'..'6') | '1'..'9' }

////////////////////////
// instruction macros //
////////////////////////
instruction_macro_definition = { "%macro" ~ function_declaration ~ NEWLINE* ~ (instruction_macro_stmt ~ NEWLINE+)* ~ "%end" }
instruction_macro_stmt = _{ label_definition | "%" ~ push_macro | local_macro | push | op }
instruction_macro_variable = @{ "$" ~ function_parameter }
instruction_macro = !{ "%" ~ function_invocation }

local_macro = { !builtin ~ (instruction_macro_definition | instruction_macro  | expression_macro_definition) }
builtin = ${ "%" ~ ( import | include | include_hex | push_macro ) }

import = !{ "import" ~ arguments }
include = !{ "include" ~ arguments }
include_hex = !{ "include_hex" ~ arguments }
push_macro = !{ "push" ~ arguments }

arguments = _{ "(" ~ arguments_list? ~ ")" }
arguments_list = _{ ( argument ~ "," )* ~ argument? }
argument = _{ string | expression }

string = @{ "\"" ~ string_char* ~ "\"" }
string_char = _{ "\\\\" | "\\\"" | (!"\\" ~ !"\"" ~ ANY) }

///////////////////////
// expression macros //
///////////////////////
expression_macro_definition = !{ "%def" ~ function_declaration ~ NEWLINE ~ expression ~ NEWLINE ~ "%end" }
expression_macro = { function_invocation }

selector = ${ "selector(\"" ~ selector_function_declaration ~ "\")" }
topic = ${ "topic(\"" ~ selector_function_declaration ~ "\")" }
selector_function_declaration = @{ function_name ~ "(" ~ function_parameter* ~ ("," ~ function_parameter)* ~ ")" }
function_declaration = { function_name ~ "(" ~ function_parameter* ~ ("," ~ function_parameter)* ~ ")" }
function_invocation = _{ function_name ~ "(" ~ expression* ~ ("," ~ expression)* ~ ")" }
function_name = @{ ( ASCII_ALPHA | "_" ) ~ ( ASCII_ALPHANUMERIC | "_" )* }
function_parameter = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* }

//////////////
// operands //
//////////////
number = _{ binary | octal | hex | decimal }

binary = @{ "0b" ~ ASCII_BIN_DIGIT+ }
octal = @{ "0o" ~ ASCII_OCT_DIGIT+ }
decimal = @{ ASCII_DIGIT+ }
hex = @{ "0x" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT+ }

label = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
label_definition = { label ~ ":" }

////////////////
// infix math //
////////////////
expression = !{ term ~ (operation ~ term)* }
term = _{ instruction_macro_variable | selector | topic | expression_macro | label | number | negative_decimal | "(" ~ expression ~ ")" }
negative_decimal = @{ "-" ~ ASCII_DIGIT+ }
operation = _{ plus | minus | times | divide }
plus = { "+" }
minus = { "-" }
times = { "*" }
divide = { "/" }

///////////////
// overrides //
///////////////
WHITESPACE = _{ " " | "\t" }
COMMENT = _{ "#" ~ (!NEWLINE ~ ANY)* }