Language & Syntax
The ETK assembly language takes inspiration from NASM and other similar assemblers, but has its own particular flavor.
Syntax
Friendly Example
This example should increment a value from 0 to 255 on the stack, then halt execution.
#![allow(unused)] fn main() { extern crate etk_asm; let src = r#" push1 0x00 loop: jumpdest push1 0x01 add dup1 push1 0xFF gt push1 loop jumpi pop stop # This halts execution "#; let mut ingest = etk_asm::ingest::Ingest::new(Vec::new()); ingest.ingest(file!(), src).unwrap(); }
The first line—push1 0x00
—describes a push instruction of length one, with a value of 0
. When assembled, this line would become 0x6000
.
Next, we have loop:
, which introduces a label named loop. Labels can be used as arguments to push instructions, usually for jumps or subroutines.
Finally, we have # This halts execution
, which is a comment. Comments are introduced with #
and continue to the end of the line. Comments are ignored as far as the assembler is concerned.
There are a couple other features, like macros, which will be covered in later chapters.
Formal Syntax
For the language nerds, the ETK assembly language syntax is defined by the following Pest grammar:
///////////////////////
// program structure //
///////////////////////
program = _{ SOI ~ inner ~ EOI }
inner = _{ NEWLINE* ~ (stmt ~ (NEWLINE+|";"))* ~ stmt? }
stmt = _{ label_definition | builtin | local_macro | push | op }
//////////////////////
// opcode mnemonics //
//////////////////////
op = @{
"origin" | "stop" | "mulmod" | "mul" | "sub" | "div" | "sdiv" | "mod" | "smod" |
"addmod" | "exp" | "signextend" | "lt" | "gt" | "slt" |
"sgt" | "eq" | "iszero" | "and" | "or" | "xor" | "not" | "shl" | "shr" |
"sar" | "keccak256" | "address" | "add" | "balance" | "caller" |
"callvalue" | "calldataload" | "calldatasize" | "calldatacopy" |
"codesize" | "codecopy" | "gasprice" | "extcodesize" | "extcodecopy" |
"returndatasize" | "returndatacopy" | "extcodehash" | "blockhash" |
"coinbase" | "timestamp" | "number" | "difficulty" | "gaslimit" |
"pop" | "mload" | "mstore8" | "mstore" | "sload" | "sstore" | "jumpdest" |
"jumpi" | "jump" | "pc" | "msize" | "gas" | swap | dup | log |
"create2" | "callcode" | "call" | "return" | "delegatecall" | "create" |
"staticcall" | "revert" | "selfdestruct" | "byte" | "chainid" | "selfbalance" |
"basefee" | "invalid" | "push0" | "mcopy"
}
push = ${ "push" ~ word_size ~ WHITESPACE ~ expression }
swap = @{ "swap" ~ half_word_size }
dup = @{ "dup" ~ half_word_size }
log = @{ "log" ~ '0'..'4' }
word_size = @{ ('1'..'2' ~ '0'..'9') | ("3" ~ '0'..'2') | '1'..'9' }
half_word_size = @{ ("1" ~ '0'..'6') | '1'..'9' }
////////////////////////
// instruction macros //
////////////////////////
instruction_macro_definition = { "%macro" ~ function_declaration ~ NEWLINE* ~ (instruction_macro_stmt ~ NEWLINE+)* ~ "%end" }
instruction_macro_stmt = _{ label_definition | "%" ~ push_macro | local_macro | push | op }
instruction_macro_variable = @{ "$" ~ function_parameter }
instruction_macro = !{ "%" ~ function_invocation }
local_macro = { !builtin ~ (instruction_macro_definition | instruction_macro | expression_macro_definition) }
builtin = ${ "%" ~ ( import | include | include_hex | push_macro ) }
import = !{ "import" ~ arguments }
include = !{ "include" ~ arguments }
include_hex = !{ "include_hex" ~ arguments }
push_macro = !{ "push" ~ arguments }
arguments = _{ "(" ~ arguments_list? ~ ")" }
arguments_list = _{ ( argument ~ "," )* ~ argument? }
argument = _{ string | expression }
string = @{ "\"" ~ string_char* ~ "\"" }
string_char = _{ "\\\\" | "\\\"" | (!"\\" ~ !"\"" ~ ANY) }
///////////////////////
// expression macros //
///////////////////////
expression_macro_definition = !{ "%def" ~ function_declaration ~ NEWLINE ~ expression ~ NEWLINE ~ "%end" }
expression_macro = { function_invocation }
selector = ${ "selector(\"" ~ selector_function_declaration ~ "\")" }
topic = ${ "topic(\"" ~ selector_function_declaration ~ "\")" }
selector_function_declaration = @{ function_name ~ "(" ~ function_parameter* ~ ("," ~ function_parameter)* ~ ")" }
function_declaration = { function_name ~ "(" ~ function_parameter* ~ ("," ~ function_parameter)* ~ ")" }
function_invocation = _{ function_name ~ "(" ~ expression* ~ ("," ~ expression)* ~ ")" }
function_name = @{ ( ASCII_ALPHA | "_" ) ~ ( ASCII_ALPHANUMERIC | "_" )* }
function_parameter = @{ ASCII_ALPHA ~ ASCII_ALPHANUMERIC* }
//////////////
// operands //
//////////////
number = _{ binary | octal | hex | decimal }
binary = @{ "0b" ~ ASCII_BIN_DIGIT+ }
octal = @{ "0o" ~ ASCII_OCT_DIGIT+ }
decimal = @{ ASCII_DIGIT+ }
hex = @{ "0x" ~ ASCII_HEX_DIGIT ~ ASCII_HEX_DIGIT+ }
label = @{ ASCII_ALPHA ~ (ASCII_ALPHANUMERIC | "_")* }
label_definition = { label ~ ":" }
////////////////
// infix math //
////////////////
expression = !{ term ~ (operation ~ term)* }
term = _{ instruction_macro_variable | selector | topic | expression_macro | label | number | negative_decimal | "(" ~ expression ~ ")" }
negative_decimal = @{ "-" ~ ASCII_DIGIT+ }
operation = _{ plus | minus | times | divide }
plus = { "+" }
minus = { "-" }
times = { "*" }
divide = { "/" }
///////////////
// overrides //
///////////////
WHITESPACE = _{ " " | "\t" }
COMMENT = _{ "#" ~ (!NEWLINE ~ ANY)* }