Compare commits
89 Commits
d1a7d55051
..
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 28f7d98fcd | |||
| e95c8ba791 | |||
| 7d21506d7b | |||
| b06f9b65c2 | |||
| b57ddaf8dc | |||
| b533ed4119 | |||
| b2e251cfdd | |||
| 1119aca5ae | |||
| 715476bcc9 | |||
| 7234e21caa | |||
| 0a1bdde25f | |||
| 9905399edb | |||
| ec950551fd | |||
| 280f09c60d | |||
| 0fda7e6fe8 | |||
| 5dccc1ac9e | |||
| 2834c437cf | |||
| f9af39ba94 | |||
| ea34b7cb52 | |||
| 73bcee960b | |||
| 321f001232 | |||
| 7780ea3ab3 | |||
| 2cb47dc7cf | |||
| 6118ddc53c | |||
| 2994486191 | |||
| a688c1c6c2 | |||
| c48829371e | |||
| 20339b4909 | |||
| 08b2eced2d | |||
| 7344d3a8d7 | |||
| b1f7a5cc49 | |||
| 4cc71666d5 | |||
| 14fec05784 | |||
| 36a177a39a | |||
| 806d7b3094 | |||
| a486bc1379 | |||
| 112b409f14 | |||
| 028599790a | |||
| 2087c62abb | |||
| 48769aef6e | |||
| 533ef2d223 | |||
| 57f5f66704 | |||
| 41df5f90d0 | |||
| 7ec1d3692f | |||
| 6673614b54 | |||
| b8c9f1f9f9 | |||
| 357bbc2ee9 | |||
| 8f2c70e6f4 | |||
| d0991c58f6 | |||
| b2378e34be | |||
| d30670ebf7 | |||
| 00b0e87fb3 | |||
| bc4120a713 | |||
| 00efec2cf2 | |||
| d3b4382440 | |||
| b40725615d | |||
| 4d2e3957c3 | |||
| 1482d7513e | |||
| db6292add6 | |||
| 3a0f328f90 | |||
| 321903831d | |||
| 22373d89af | |||
| c9bf61aeec | |||
| 6c60cbb741 | |||
| ef79b28e45 | |||
| f3bc270904 | |||
| dea3a32c33 | |||
| 759142ea75 | |||
| 2b43a36a83 | |||
| 0a9be743a1 | |||
| 35830fd986 | |||
| b2cf289c36 | |||
| 282f884a3d | |||
| 2c1f7fb3af | |||
| eb79c40c69 | |||
| 3e7f92b7ef | |||
| f80c612835 | |||
| 8bfdd966ea | |||
| f99f9d5290 | |||
| 2c74222193 | |||
| 6d3b7c5a89 | |||
| cb270c8765 | |||
| 1d204c0a86 | |||
| fb1395c740 | |||
| 1fd8f7196e | |||
| 5eee0d1810 | |||
| d22a0a5756 | |||
| b8993f556e | |||
| 683281363d |
@@ -57,3 +57,9 @@ ci: fmt clippy deny test
|
||||
# Check compilation without running
|
||||
check:
|
||||
cargo check --workspace
|
||||
|
||||
# Install bat syntax highlighting for WAFER / Forth
|
||||
install-syntax:
|
||||
mkdir -p ~/.config/bat/syntaxes
|
||||
cp tools/editor-support/bat/WAFER.sublime-syntax ~/.config/bat/syntaxes/
|
||||
bat cache --build
|
||||
|
||||
@@ -310,3 +310,39 @@
|
||||
\ State-smart string literal for the next whitespace-delimited token.
|
||||
\ Handled in Rust (outer.rs interpret_token_immediate / compile_token)
|
||||
\ so the string survives REFILL in interpret mode.
|
||||
|
||||
\ ---------------------------------------------------------------
|
||||
\ Structures (Forth 2012 Facility-ext 10.6.2.0935 family)
|
||||
\ ---------------------------------------------------------------
|
||||
\ Usage:
|
||||
\ BEGIN-STRUCTURE POINT FIELD: P.X FIELD: P.Y END-STRUCTURE
|
||||
\ CREATE ORIGIN POINT ALLOT
|
||||
\ 1 ORIGIN P.X ! 2 ORIGIN P.Y !
|
||||
|
||||
\ Each defining word factored inline (CREATE .. DOES>). WAFER dispatches
|
||||
\ DOES>-defining words only at the outer interpreter, so they can't be
|
||||
\ factored through other compiled words (FIELD: -> +FIELD would no-op).
|
||||
|
||||
: BEGIN-STRUCTURE ( "name" -- struct-sys 0 )
|
||||
CREATE HERE 0 0 , DOES> @ ;
|
||||
|
||||
: END-STRUCTURE ( struct-sys +n -- )
|
||||
SWAP ! ;
|
||||
|
||||
: +FIELD ( n1 "name" n2 -- n3 )
|
||||
CREATE OVER , + DOES> @ + ;
|
||||
|
||||
: FIELD: ( n1 "name" -- n2 )
|
||||
CREATE ALIGNED DUP , 1 CELLS + DOES> @ + ;
|
||||
|
||||
: CFIELD: ( n1 "name" -- n2 )
|
||||
CREATE DUP , 1 CHARS + DOES> @ + ;
|
||||
|
||||
: FFIELD: ( n1 "name" -- n2 )
|
||||
CREATE FALIGNED DUP , 1 FLOATS + DOES> @ + ;
|
||||
|
||||
: SFFIELD: ( n1 "name" -- n2 )
|
||||
CREATE SFALIGNED DUP , 1 SFLOATS + DOES> @ + ;
|
||||
|
||||
: DFFIELD: ( n1 "name" -- n2 )
|
||||
CREATE DFALIGNED DUP , 1 DFLOATS + DOES> @ + ;
|
||||
|
||||
+82
-28
@@ -229,6 +229,9 @@ fn bool_to_forth_flag(f: &mut Function, tmp: u32) {
|
||||
struct EmitCtx {
|
||||
f64_local_0: u32,
|
||||
f64_local_1: u32,
|
||||
/// Base WASM local index for float-typed Forth locals (`F:` in `{: ... :}`).
|
||||
/// Float local N maps to WASM local `forth_f_local_base + N` (f64 type).
|
||||
forth_f_local_base: u32,
|
||||
/// Base WASM local index for Forth locals ({: ... :}).
|
||||
/// Forth local N maps to WASM local `forth_local_base + N`.
|
||||
forth_local_base: u32,
|
||||
@@ -691,6 +694,14 @@ fn emit_op(f: &mut Function, op: &IrOp, ctx: &mut EmitCtx) {
|
||||
IrOp::ForthLocalSet(n) => {
|
||||
pop_to(f, ctx.forth_local_base + n);
|
||||
}
|
||||
IrOp::ForthFLocalGet(n) => {
|
||||
f.instruction(&Instruction::LocalGet(ctx.forth_f_local_base + n));
|
||||
fpush_via_local(f, ctx.f64_local_0);
|
||||
}
|
||||
IrOp::ForthFLocalSet(n) => {
|
||||
fpop(f);
|
||||
f.instruction(&Instruction::LocalSet(ctx.forth_f_local_base + n));
|
||||
}
|
||||
|
||||
// -- Return stack ---------------------------------------------------
|
||||
IrOp::ToR => {
|
||||
@@ -1125,6 +1136,7 @@ fn is_promotable_body(ops: &[IrOp]) -> bool {
|
||||
IrOp::Call(_) | IrOp::TailCall(_) | IrOp::Execute | IrOp::SpFetch => return false,
|
||||
IrOp::ToR | IrOp::FromR | IrOp::Exit => return false,
|
||||
IrOp::ForthLocalGet(_) | IrOp::ForthLocalSet(_) => return false,
|
||||
IrOp::ForthFLocalGet(_) | IrOp::ForthFLocalSet(_) => return false,
|
||||
IrOp::Emit | IrOp::Dot | IrOp::Cr | IrOp::Type => return false,
|
||||
IrOp::PushI64(_) | IrOp::PushF64(_) => return false,
|
||||
IrOp::FDup
|
||||
@@ -2000,14 +2012,12 @@ fn emit_promoted_op(f: &mut Function, op: &IrOp, sim: &mut StackSim) {
|
||||
// Outside loops, RFetch shouldn't appear in promoted code
|
||||
}
|
||||
|
||||
IrOp::LoopJ => {
|
||||
if sim.loop_index_stack.len() >= 2 {
|
||||
let (outer_index, _) = sim.loop_index_stack[sim.loop_index_stack.len() - 2];
|
||||
let result = sim.alloc();
|
||||
f.instruction(&Instruction::LocalGet(outer_index));
|
||||
f.instruction(&Instruction::LocalSet(result));
|
||||
sim.push(result);
|
||||
}
|
||||
IrOp::LoopJ if sim.loop_index_stack.len() >= 2 => {
|
||||
let (outer_index, _) = sim.loop_index_stack[sim.loop_index_stack.len() - 2];
|
||||
let result = sim.alloc();
|
||||
f.instruction(&Instruction::LocalGet(outer_index));
|
||||
f.instruction(&Instruction::LocalSet(result));
|
||||
sim.push(result);
|
||||
}
|
||||
|
||||
IrOp::Exit => {
|
||||
@@ -2135,15 +2145,15 @@ fn needs_f64_locals(ops: &[IrOp]) -> bool {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => {
|
||||
if needs_f64_locals(body) {
|
||||
return true;
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body }
|
||||
if needs_f64_locals(body) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body } => {
|
||||
if needs_f64_locals(test) || needs_f64_locals(body) {
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body }
|
||||
if needs_f64_locals(test) || needs_f64_locals(body) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginDoubleWhileRepeat {
|
||||
outer_test,
|
||||
@@ -2197,15 +2207,15 @@ fn body_needs_return_stack(ops: &[IrOp]) -> bool {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => {
|
||||
if body_needs_return_stack(body) {
|
||||
return true;
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body }
|
||||
if body_needs_return_stack(body) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body } => {
|
||||
if body_needs_return_stack(test) || body_needs_return_stack(body) {
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body }
|
||||
if body_needs_return_stack(test) || body_needs_return_stack(body) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginDoubleWhileRepeat {
|
||||
outer_test,
|
||||
@@ -2360,6 +2370,34 @@ fn count_forth_locals(ops: &[IrOp]) -> u32 {
|
||||
max
|
||||
}
|
||||
|
||||
fn count_forth_f_locals(ops: &[IrOp]) -> u32 {
|
||||
let mut max: u32 = 0;
|
||||
for op in ops {
|
||||
match op {
|
||||
IrOp::ForthFLocalGet(n) | IrOp::ForthFLocalSet(n) => max = max.max(*n + 1),
|
||||
IrOp::If {
|
||||
then_body,
|
||||
else_body,
|
||||
} => {
|
||||
max = max.max(count_forth_f_locals(then_body));
|
||||
if let Some(eb) = else_body {
|
||||
max = max.max(count_forth_f_locals(eb));
|
||||
}
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => {
|
||||
max = max.max(count_forth_f_locals(body));
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body } => {
|
||||
max = max
|
||||
.max(count_forth_f_locals(test))
|
||||
.max(count_forth_f_locals(body));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
max
|
||||
}
|
||||
|
||||
/// Generate a complete WASM module for a single compiled word.
|
||||
///
|
||||
/// This is the JIT path: each word gets its own module that imports
|
||||
@@ -2467,8 +2505,14 @@ pub fn compile_word(
|
||||
} else {
|
||||
1 + scratch_count + forth_local_count + loop_local_count
|
||||
};
|
||||
let has_floats = needs_f64_locals(body);
|
||||
let num_f64: u32 = if has_floats { 2 } else { 0 };
|
||||
let forth_f_local_count = count_forth_f_locals(body);
|
||||
// F: locals need f64 storage, which also implies the f64 scratch pair.
|
||||
let has_floats = needs_f64_locals(body) || forth_f_local_count > 0;
|
||||
let num_f64: u32 = if has_floats {
|
||||
2 + forth_f_local_count
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let mut locals_decl = vec![(num_locals, ValType::I32)];
|
||||
if num_f64 > 0 {
|
||||
locals_decl.push((num_f64, ValType::F64));
|
||||
@@ -2482,9 +2526,12 @@ pub fn compile_word(
|
||||
1 + scratch_count
|
||||
};
|
||||
let loop_local_base = forth_local_base + forth_local_count;
|
||||
// f64 scratch pair first (indices num_locals, num_locals+1), then F: locals.
|
||||
let forth_f_local_base = num_locals + 2;
|
||||
let mut ctx = EmitCtx {
|
||||
f64_local_0: num_locals,
|
||||
f64_local_1: num_locals + 1,
|
||||
forth_f_local_base,
|
||||
forth_local_base,
|
||||
loop_local_base,
|
||||
loop_locals: Vec::new(),
|
||||
@@ -2969,8 +3016,13 @@ fn compile_multi_word_module(
|
||||
} else {
|
||||
1 + scratch_count + forth_local_count + loop_local_count
|
||||
};
|
||||
let has_floats = needs_f64_locals(body);
|
||||
let num_f64: u32 = if has_floats { 2 } else { 0 };
|
||||
let forth_f_local_count = count_forth_f_locals(body);
|
||||
let has_floats = needs_f64_locals(body) || forth_f_local_count > 0;
|
||||
let num_f64: u32 = if has_floats {
|
||||
2 + forth_f_local_count
|
||||
} else {
|
||||
0
|
||||
};
|
||||
let mut locals_decl = vec![(num_locals, ValType::I32)];
|
||||
if num_f64 > 0 {
|
||||
locals_decl.push((num_f64, ValType::F64));
|
||||
@@ -2984,9 +3036,11 @@ fn compile_multi_word_module(
|
||||
1 + scratch_count
|
||||
};
|
||||
let loop_local_base = forth_local_base + forth_local_count;
|
||||
let forth_f_local_base = num_locals + 2;
|
||||
let mut ctx = EmitCtx {
|
||||
f64_local_0: num_locals,
|
||||
f64_local_1: num_locals + 1,
|
||||
forth_f_local_base,
|
||||
forth_local_base,
|
||||
loop_local_base,
|
||||
loop_locals: Vec::new(),
|
||||
|
||||
@@ -80,7 +80,10 @@ mod tests {
|
||||
|
||||
#[test]
|
||||
fn sha1_rfc3174_abc() {
|
||||
assert_eq!(hex(&sha1_hash(b"abc")), "a9993e364706816aba3e25717850c26c9cd0d89d");
|
||||
assert_eq!(
|
||||
hex(&sha1_hash(b"abc")),
|
||||
"a9993e364706816aba3e25717850c26c9cd0d89d"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -131,10 +131,8 @@ pub fn export_module(
|
||||
fn collect_external_calls(ops: &[IrOp], ir_ids: &HashSet<WordId>, host_ids: &mut HashSet<WordId>) {
|
||||
for op in ops {
|
||||
match op {
|
||||
IrOp::Call(id) | IrOp::TailCall(id) => {
|
||||
if !ir_ids.contains(id) {
|
||||
host_ids.insert(*id);
|
||||
}
|
||||
IrOp::Call(id) | IrOp::TailCall(id) if !ir_ids.contains(id) => {
|
||||
host_ids.insert(*id);
|
||||
}
|
||||
IrOp::If {
|
||||
then_body,
|
||||
|
||||
@@ -139,6 +139,10 @@ pub enum IrOp {
|
||||
ForthLocalGet(u32),
|
||||
/// Set Forth local variable N: ( x -- )
|
||||
ForthLocalSet(u32),
|
||||
/// Push float-typed Forth local N: ( F: -- r )
|
||||
ForthFLocalGet(u32),
|
||||
/// Set float-typed Forth local N: ( F: r -- )
|
||||
ForthFLocalSet(u32),
|
||||
|
||||
// -- I/O --
|
||||
/// Output character: ( char -- )
|
||||
|
||||
@@ -50,23 +50,23 @@ pub const DATA_STACK_BASE: u32 = WORD_BUF_BASE + WORD_BUF_SIZE; // 0x0600
|
||||
pub const DATA_STACK_SIZE: u32 = 4096; // 1024 cells
|
||||
|
||||
/// Return stack region. Grows downward.
|
||||
pub const RETURN_STACK_BASE: u32 = DATA_STACK_BASE + DATA_STACK_SIZE; // 0x1540
|
||||
pub const RETURN_STACK_BASE: u32 = DATA_STACK_BASE + DATA_STACK_SIZE; // 0x1600
|
||||
/// Size of return stack region.
|
||||
pub const RETURN_STACK_SIZE: u32 = 4096;
|
||||
|
||||
/// Floating-point stack region (fallback). Grows downward.
|
||||
pub const FLOAT_STACK_BASE: u32 = RETURN_STACK_BASE + RETURN_STACK_SIZE; // 0x2540
|
||||
pub const FLOAT_STACK_BASE: u32 = RETURN_STACK_BASE + RETURN_STACK_SIZE; // 0x2600
|
||||
/// Size of float stack region.
|
||||
pub const FLOAT_STACK_SIZE: u32 = 2048; // 256 doubles
|
||||
|
||||
/// Hash scratch region — output buffer for `SHA1`/`SHA256`/`SHA512` and
|
||||
/// other hash host words. Sized for the largest supported digest (SHA512 = 64 B).
|
||||
pub const HASH_SCRATCH_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2D40
|
||||
pub const HASH_SCRATCH_BASE: u32 = FLOAT_STACK_BASE + FLOAT_STACK_SIZE; // 0x2E00
|
||||
/// Size of hash scratch region.
|
||||
pub const HASH_SCRATCH_SIZE: u32 = 128;
|
||||
|
||||
/// Dictionary region start. Grows upward.
|
||||
pub const DICTIONARY_BASE: u32 = HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE; // 0x2DC0
|
||||
pub const DICTIONARY_BASE: u32 = HASH_SCRATCH_BASE + HASH_SCRATCH_SIZE; // 0x2E80
|
||||
|
||||
/// Initial top of data stack (grows down from here).
|
||||
pub const DATA_STACK_TOP: u32 = DATA_STACK_BASE + DATA_STACK_SIZE;
|
||||
|
||||
@@ -591,15 +591,15 @@ fn contains_call_to(ops: &[IrOp], target: WordId) -> bool {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => {
|
||||
if contains_call_to(body, target) {
|
||||
return true;
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body }
|
||||
if contains_call_to(body, target) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body } => {
|
||||
if contains_call_to(test, target) || contains_call_to(body, target) {
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body }
|
||||
if contains_call_to(test, target) || contains_call_to(body, target) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginDoubleWhileRepeat {
|
||||
outer_test,
|
||||
@@ -633,7 +633,11 @@ fn contains_call_to(ops: &[IrOp], target: WordId) -> bool {
|
||||
fn contains_exit(ops: &[IrOp]) -> bool {
|
||||
for op in ops {
|
||||
match op {
|
||||
IrOp::Exit | IrOp::ForthLocalGet(_) | IrOp::ForthLocalSet(_) => return true,
|
||||
IrOp::Exit
|
||||
| IrOp::ForthLocalGet(_)
|
||||
| IrOp::ForthLocalSet(_)
|
||||
| IrOp::ForthFLocalGet(_)
|
||||
| IrOp::ForthFLocalSet(_) => return true,
|
||||
IrOp::If {
|
||||
then_body,
|
||||
else_body,
|
||||
@@ -647,15 +651,13 @@ fn contains_exit(ops: &[IrOp]) -> bool {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body } => {
|
||||
if contains_exit(body) {
|
||||
return true;
|
||||
}
|
||||
IrOp::DoLoop { body, .. } | IrOp::BeginUntil { body } | IrOp::BeginAgain { body }
|
||||
if contains_exit(body) =>
|
||||
{
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body } => {
|
||||
if contains_exit(test) || contains_exit(body) {
|
||||
return true;
|
||||
}
|
||||
IrOp::BeginWhileRepeat { test, body } if contains_exit(test) || contains_exit(body) => {
|
||||
return true;
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
|
||||
+647
-47
@@ -119,6 +119,13 @@ enum PendingAction {
|
||||
CsRoll(u32),
|
||||
/// Compile a control-flow operation (from POSTPONE of compile-time keywords).
|
||||
CompileControl(i32),
|
||||
/// Forth 2012 §13.6.1.0086 `(LOCAL)` non-sentinel: declare a local of the
|
||||
/// given name. Name is already ASCII-uppercased by the host primitive.
|
||||
DeclareLocal(String),
|
||||
/// Forth 2012 §13.6.1.0086 `(LOCAL)` sentinel (`0 0 (LOCAL)`): emit the
|
||||
/// init code for locals declared since the last sentinel (or start of
|
||||
/// the current colon definition).
|
||||
DeclareLocalEnd,
|
||||
}
|
||||
|
||||
// Control-flow action codes for PendingAction::CompileControl
|
||||
@@ -252,6 +259,13 @@ pub struct ForthVM<R: Runtime> {
|
||||
next_block_label: u32,
|
||||
/// Local variable names for the current definition ({: ... :} syntax)
|
||||
compiling_locals: Vec<String>,
|
||||
/// Parallel to `compiling_locals`: kind of each local (Int or Float).
|
||||
compiling_local_kinds: Vec<LocalKind>,
|
||||
/// Forth 2012 §13.6.1.0086 `(LOCAL)` batch base: index into
|
||||
/// `compiling_locals` where the current `(LOCAL)` batch started.
|
||||
/// `None` means no pending batch. Set on the first `DeclareLocal` of a
|
||||
/// batch, cleared on `DeclareLocalEnd`, reset on `finish_colon_def`.
|
||||
local_batch_base: Option<usize>,
|
||||
/// Substitution table for SUBSTITUTE/REPLACES (String word set)
|
||||
substitutions: Arc<Mutex<HashMap<String, Vec<u8>>>>,
|
||||
/// Search order: list of wordlist IDs (first = top of search order).
|
||||
@@ -259,6 +273,57 @@ pub struct ForthVM<R: Runtime> {
|
||||
search_order: Arc<Mutex<Vec<u32>>>,
|
||||
/// Next wordlist ID to allocate (shared).
|
||||
next_wid: Arc<Mutex<u32>>,
|
||||
/// xorshift64 PRNG state for RANDOM / RND-SEED.
|
||||
rng_state: Arc<Mutex<u64>>,
|
||||
/// Stacked compile state for nested definitions (quotations `[: ;]`).
|
||||
compile_frames: Vec<CompileFrame>,
|
||||
/// Dictionary address of the word currently being compiled. Set by
|
||||
/// `start_colon_def` / `start_noname_def` / `start_quotation` so that
|
||||
/// `finish_colon_def` can use `reveal_at` instead of `reveal()` — the
|
||||
/// latter breaks when intermediate dictionary entries (quotations,
|
||||
/// `DOES>` actions) have moved `latest`.
|
||||
compiling_word_addr: u32,
|
||||
}
|
||||
|
||||
/// Snapshot of one compilation context. Pushed by `[:`, popped by `;]`.
|
||||
struct CompileFrame {
|
||||
compiling_name: Option<String>,
|
||||
compiling_word_id: Option<WordId>,
|
||||
compiling_word_addr: u32,
|
||||
compiling_ir: Vec<IrOp>,
|
||||
control_stack: Vec<ControlEntry>,
|
||||
saw_create_in_def: bool,
|
||||
compiling_locals: Vec<String>,
|
||||
compiling_local_kinds: Vec<LocalKind>,
|
||||
local_batch_base: Option<usize>,
|
||||
state: i32,
|
||||
}
|
||||
|
||||
/// Type of a Forth local. Int locals live on the data stack and use
|
||||
/// `ForthLocalGet/Set`. Float locals live on the float stack and use
|
||||
/// `ForthFLocalGet/Set`. Their WASM local index spaces are independent.
|
||||
#[derive(Clone, Copy, Debug, PartialEq, Eq)]
|
||||
pub enum LocalKind {
|
||||
Int,
|
||||
Float,
|
||||
}
|
||||
|
||||
/// Advance past the next `\n` in `buf`, starting at `from`. Returns the
|
||||
/// byte index of the first character on the next line (or `buf.len()` if
|
||||
/// there's no more newline). Used by the `\` line-comment handler per
|
||||
/// Forth 2012 §6.2.2535 to correctly stop at end-of-line instead of
|
||||
/// end-of-input when the input buffer spans multiple lines.
|
||||
fn skip_to_end_of_line(buf: &str, from: usize) -> usize {
|
||||
let bytes = buf.as_bytes();
|
||||
let mut i = from;
|
||||
while i < bytes.len() {
|
||||
let ch = bytes[i];
|
||||
i += 1;
|
||||
if ch == b'\n' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
i
|
||||
}
|
||||
|
||||
impl<R: Runtime> ForthVM<R> {
|
||||
@@ -323,9 +388,24 @@ impl<R: Runtime> ForthVM<R> {
|
||||
conditional_skip_depth: 0,
|
||||
next_block_label: 0,
|
||||
compiling_locals: Vec::new(),
|
||||
compiling_local_kinds: Vec::new(),
|
||||
local_batch_base: None,
|
||||
substitutions: Arc::new(Mutex::new(HashMap::new())),
|
||||
search_order: Arc::new(Mutex::new(vec![1])),
|
||||
next_wid: Arc::new(Mutex::new(2)),
|
||||
rng_state: {
|
||||
use std::time::{SystemTime, UNIX_EPOCH};
|
||||
let seed = SystemTime::now()
|
||||
.duration_since(UNIX_EPOCH)
|
||||
.map_or(0xDEAD_BEEF_CAFE_BABE, |d| d.as_nanos() as u64);
|
||||
Arc::new(Mutex::new(if seed == 0 {
|
||||
0xDEAD_BEEF_CAFE_BABE
|
||||
} else {
|
||||
seed
|
||||
}))
|
||||
},
|
||||
compile_frames: Vec::new(),
|
||||
compiling_word_addr: 0,
|
||||
};
|
||||
|
||||
vm.register_primitives()?;
|
||||
@@ -353,6 +433,9 @@ impl<R: Runtime> ForthVM<R> {
|
||||
self.control_stack.clear();
|
||||
self.compiling_word_id = None;
|
||||
self.compiling_locals.clear();
|
||||
self.compiling_local_kinds.clear();
|
||||
self.local_batch_base = None;
|
||||
self.compile_frames.clear();
|
||||
return Err(e);
|
||||
}
|
||||
}
|
||||
@@ -555,6 +638,15 @@ impl<R: Runtime> ForthVM<R> {
|
||||
return self.finish_colon_def();
|
||||
}
|
||||
|
||||
// Quotations `[: ... ;]` — state-smart anonymous xt, nestable inside
|
||||
// colon definitions via the compile-frame stack.
|
||||
if token_upper == "[:" {
|
||||
return self.start_quotation();
|
||||
}
|
||||
if token_upper == ";]" {
|
||||
return self.finish_quotation();
|
||||
}
|
||||
|
||||
// Words that must be handled in the outer interpreter because they
|
||||
// modify Rust-side VM state that host functions cannot access.
|
||||
match token_upper.as_str() {
|
||||
@@ -694,8 +786,10 @@ impl<R: Runtime> ForthVM<R> {
|
||||
return Ok(());
|
||||
}
|
||||
if token_upper == "\\" {
|
||||
// Line comment -- skip rest of input
|
||||
self.input_pos = self.input_buffer.len();
|
||||
// Forth 2012 §6.2.2535: `\` parses and discards the remainder
|
||||
// of the *line*, not the remainder of the input buffer. Stop
|
||||
// at the first `\n`; fall through to end-of-buffer otherwise.
|
||||
self.input_pos = skip_to_end_of_line(&self.input_buffer, self.input_pos);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -796,6 +890,29 @@ impl<R: Runtime> ForthVM<R> {
|
||||
fn compile_token(&mut self, token: &str) -> anyhow::Result<()> {
|
||||
let token_upper = token.to_ascii_uppercase();
|
||||
|
||||
// Forth 2012 §13.3.3.2 — locals supersede dictionary names (and,
|
||||
// by extension, hardcoded compile-mode shortcuts) within their
|
||||
// declaration scope. Checked here, before any hardcoded token
|
||||
// handling, to keep that precedence uniform — otherwise e.g. a
|
||||
// local named `s` would be hijacked by the `S` string shortcut
|
||||
// below.
|
||||
if let Some(idx) = self
|
||||
.compiling_locals
|
||||
.iter()
|
||||
.position(|n| n.eq_ignore_ascii_case(token))
|
||||
{
|
||||
let kind = self.compiling_local_kinds[idx];
|
||||
let kind_idx = self.compiling_local_kinds[0..idx]
|
||||
.iter()
|
||||
.filter(|k| **k == kind)
|
||||
.count() as u32;
|
||||
match kind {
|
||||
LocalKind::Int => self.push_ir(IrOp::ForthLocalGet(kind_idx)),
|
||||
LocalKind::Float => self.push_ir(IrOp::ForthFLocalGet(kind_idx)),
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Handle string literals in compile mode
|
||||
if token_upper == ".\"" {
|
||||
// Parse until closing quote, emit characters as EMIT calls
|
||||
@@ -859,7 +976,8 @@ impl<R: Runtime> ForthVM<R> {
|
||||
return Ok(());
|
||||
}
|
||||
if token_upper == "\\" {
|
||||
self.input_pos = self.input_buffer.len();
|
||||
// See interpret-mode branch: `\` ends at `\n`, not at `#TIB`.
|
||||
self.input_pos = skip_to_end_of_line(&self.input_buffer, self.input_pos);
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -1104,16 +1222,6 @@ impl<R: Runtime> ForthVM<R> {
|
||||
_ => {}
|
||||
}
|
||||
|
||||
// Check for local variable reference (locals supersede dictionary words)
|
||||
if let Some(idx) = self
|
||||
.compiling_locals
|
||||
.iter()
|
||||
.position(|n| n.eq_ignore_ascii_case(token))
|
||||
{
|
||||
self.push_ir(IrOp::ForthLocalGet(idx as u32));
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
// Look up in dictionary (search order, then fallback to all wordlists)
|
||||
if let Some((_addr, word_id, is_immediate)) = self.dictionary.find(token) {
|
||||
if is_immediate {
|
||||
@@ -1334,8 +1442,15 @@ impl<R: Runtime> ForthVM<R> {
|
||||
*bp = ahead_prefix;
|
||||
}
|
||||
// Emit a first-iteration guard: allocate a local flag.
|
||||
let flag_idx = self.compiling_locals.len() as u32;
|
||||
// This is an Int local; its kind-local-index is the count of
|
||||
// existing Int entries.
|
||||
let flag_idx = self
|
||||
.compiling_local_kinds
|
||||
.iter()
|
||||
.filter(|k| **k == LocalKind::Int)
|
||||
.count() as u32;
|
||||
self.compiling_locals.push("__first_iter__".to_string());
|
||||
self.compiling_local_kinds.push(LocalKind::Int);
|
||||
// Push flag init into the Begin's prefix (before the loop)
|
||||
if let ControlEntry::Begin { body: ref mut bp } = self.control_stack[bi] {
|
||||
bp.push(IrOp::PushI32(1));
|
||||
@@ -1814,6 +1929,7 @@ impl<R: Runtime> ForthVM<R> {
|
||||
.dictionary
|
||||
.create(&name, false)
|
||||
.map_err(|e| anyhow::anyhow!("{e}"))?;
|
||||
self.compiling_word_addr = self.dictionary.latest();
|
||||
// Reveal immediately so it gets an xt but isn't findable by name
|
||||
// (since the name is internal)
|
||||
self.dictionary.reveal();
|
||||
@@ -1848,6 +1964,7 @@ impl<R: Runtime> ForthVM<R> {
|
||||
|
||||
self.compiling_name = Some(name);
|
||||
self.compiling_word_id = Some(word_id);
|
||||
self.compiling_word_addr = self.dictionary.latest();
|
||||
self.compiling_ir.clear();
|
||||
self.control_stack.clear();
|
||||
self.state = -1;
|
||||
@@ -1857,16 +1974,92 @@ impl<R: Runtime> ForthVM<R> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `[:` — start a quotation. Saves the current compile frame (if any)
|
||||
/// and begins compiling an anonymous inner definition. The inner xt is
|
||||
/// produced by `;]`.
|
||||
fn start_quotation(&mut self) -> anyhow::Result<()> {
|
||||
let frame = CompileFrame {
|
||||
compiling_name: self.compiling_name.take(),
|
||||
compiling_word_id: self.compiling_word_id.take(),
|
||||
compiling_word_addr: self.compiling_word_addr,
|
||||
compiling_ir: std::mem::take(&mut self.compiling_ir),
|
||||
control_stack: std::mem::take(&mut self.control_stack),
|
||||
saw_create_in_def: self.saw_create_in_def,
|
||||
compiling_locals: std::mem::take(&mut self.compiling_locals),
|
||||
compiling_local_kinds: std::mem::take(&mut self.compiling_local_kinds),
|
||||
local_batch_base: self.local_batch_base.take(),
|
||||
state: self.state,
|
||||
};
|
||||
self.compile_frames.push(frame);
|
||||
|
||||
let name = format!("_quot_{}_", self.next_table_index);
|
||||
let word_id = self
|
||||
.dictionary
|
||||
.create(&name, false)
|
||||
.map_err(|e| anyhow::anyhow!("{e}"))?;
|
||||
self.compiling_word_addr = self.dictionary.latest();
|
||||
self.dictionary.reveal();
|
||||
|
||||
self.compiling_name = Some(name);
|
||||
self.compiling_word_id = Some(word_id);
|
||||
self.compiling_ir.clear();
|
||||
self.control_stack.clear();
|
||||
self.state = -1;
|
||||
self.saw_create_in_def = false;
|
||||
self.next_table_index = self.next_table_index.max(word_id.0 + 1);
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// `;]` — finish the current quotation. Compiles its body as an anonymous
|
||||
/// word, pops the saved outer frame, and either pushes the new xt on the
|
||||
/// data stack (interpret mode) or emits a literal push into the outer IR
|
||||
/// (compile mode).
|
||||
fn finish_quotation(&mut self) -> anyhow::Result<()> {
|
||||
if self.compile_frames.is_empty() {
|
||||
anyhow::bail!(";]: no matching [:");
|
||||
}
|
||||
let inner_xt = self
|
||||
.compiling_word_id
|
||||
.ok_or_else(|| anyhow::anyhow!(";]: no active quotation"))?
|
||||
.0;
|
||||
self.finish_colon_def()?;
|
||||
|
||||
let frame = self.compile_frames.pop().unwrap();
|
||||
self.compiling_name = frame.compiling_name;
|
||||
self.compiling_word_id = frame.compiling_word_id;
|
||||
self.compiling_word_addr = frame.compiling_word_addr;
|
||||
self.compiling_ir = frame.compiling_ir;
|
||||
self.control_stack = frame.control_stack;
|
||||
self.saw_create_in_def = frame.saw_create_in_def;
|
||||
self.compiling_locals = frame.compiling_locals;
|
||||
self.compiling_local_kinds = frame.compiling_local_kinds;
|
||||
self.local_batch_base = frame.local_batch_base;
|
||||
self.state = frame.state;
|
||||
|
||||
if self.state != 0 {
|
||||
self.push_ir(IrOp::PushI32(inner_xt as i32));
|
||||
} else {
|
||||
self.push_data_stack(inner_xt as i32)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Run all enabled optimization passes on an IR sequence.
|
||||
fn optimize_ir(&self, ir: Vec<IrOp>, bodies: &HashMap<WordId, Vec<IrOp>>) -> Vec<IrOp> {
|
||||
optimize(ir, &self.config.opt, bodies)
|
||||
}
|
||||
|
||||
/// Parse a `{: args | locals -- comment :}` block and compile local initializations.
|
||||
/// Parse a `{: args | locals -- comment :}` block and compile local
|
||||
/// initializations. Supports `F:` prefix (gforth/SwiftForth-style) to
|
||||
/// mark the next local as float-typed. Int locals pop from the data
|
||||
/// stack via `ForthLocalSet`; float locals pop from the float stack
|
||||
/// via `ForthFLocalSet`.
|
||||
fn compile_locals_block(&mut self) -> anyhow::Result<()> {
|
||||
let mut args: Vec<String> = Vec::new();
|
||||
let mut args: Vec<(String, LocalKind)> = Vec::new();
|
||||
let mut uninits: Vec<(String, LocalKind)> = Vec::new();
|
||||
let mut in_comment = false;
|
||||
let mut in_uninit = false;
|
||||
let mut next_is_float = false;
|
||||
|
||||
loop {
|
||||
let tok = self
|
||||
@@ -1875,44 +2068,50 @@ impl<R: Runtime> ForthVM<R> {
|
||||
let tok_upper = tok.to_ascii_uppercase();
|
||||
match tok_upper.as_str() {
|
||||
":}" => break,
|
||||
"--" => {
|
||||
in_comment = true;
|
||||
}
|
||||
"|" => {
|
||||
in_uninit = true;
|
||||
}
|
||||
"--" => in_comment = true,
|
||||
"|" => in_uninit = true,
|
||||
"F:" => next_is_float = true,
|
||||
_ => {
|
||||
if in_comment {
|
||||
continue; // Skip comment tokens
|
||||
continue;
|
||||
}
|
||||
if in_uninit {
|
||||
// Uninitialized local — just add to the map, no stack pop
|
||||
self.compiling_locals.push(tok_upper);
|
||||
let kind = if next_is_float {
|
||||
LocalKind::Float
|
||||
} else {
|
||||
// Stack-initialized arg
|
||||
args.push(tok_upper);
|
||||
LocalKind::Int
|
||||
};
|
||||
next_is_float = false;
|
||||
if in_uninit {
|
||||
uninits.push((tok_upper, kind));
|
||||
} else {
|
||||
args.push((tok_upper, kind));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Add args to locals map (they go first)
|
||||
let base = self.compiling_locals.len();
|
||||
for arg in &args {
|
||||
self.compiling_locals.insert(base, arg.clone());
|
||||
}
|
||||
// Actually, args should be at the start of the locals list
|
||||
// with the first arg having the lowest index
|
||||
let n_args = args.len();
|
||||
let mut new_locals = args;
|
||||
// Append any already-added uninit locals
|
||||
new_locals.extend(self.compiling_locals.drain(base..));
|
||||
self.compiling_locals.splice(base..base, new_locals);
|
||||
|
||||
// Compile: pop args from data stack into locals (in reverse order)
|
||||
// The first arg is deepest on the stack, last arg is on top
|
||||
// Args first (assigned stack→local), then uninits (no init pop).
|
||||
for (name, kind) in args.iter().chain(uninits.iter()) {
|
||||
self.compiling_locals.push(name.clone());
|
||||
self.compiling_local_kinds.push(*kind);
|
||||
}
|
||||
|
||||
// Emit init: pop in reverse declaration order. Rightmost arg is on
|
||||
// the top of its stack, so it's assigned first.
|
||||
for i in (0..n_args).rev() {
|
||||
self.push_ir(IrOp::ForthLocalSet((base + i) as u32));
|
||||
let slot = base + i;
|
||||
let kind = self.compiling_local_kinds[slot];
|
||||
let kind_idx = self.compiling_local_kinds[0..slot]
|
||||
.iter()
|
||||
.filter(|k| **k == kind)
|
||||
.count() as u32;
|
||||
match kind {
|
||||
LocalKind::Int => self.push_ir(IrOp::ForthLocalSet(kind_idx)),
|
||||
LocalKind::Float => self.push_ir(IrOp::ForthFLocalSet(kind_idx)),
|
||||
}
|
||||
}
|
||||
|
||||
Ok(())
|
||||
@@ -1936,6 +2135,8 @@ impl<R: Runtime> ForthVM<R> {
|
||||
}
|
||||
|
||||
self.compiling_locals.clear();
|
||||
self.compiling_local_kinds.clear();
|
||||
self.local_batch_base = None;
|
||||
|
||||
let name = self
|
||||
.compiling_name
|
||||
@@ -1962,8 +2163,13 @@ impl<R: Runtime> ForthVM<R> {
|
||||
// Instantiate and install in the table
|
||||
self.instantiate_and_install(&compiled, word_id)?;
|
||||
|
||||
// Reveal the word
|
||||
self.dictionary.reveal();
|
||||
// Reveal the word by its saved address (not LATEST, which may have
|
||||
// moved due to intermediate dict entries — quotations, DOES> helpers).
|
||||
if self.compiling_word_addr != 0 {
|
||||
self.dictionary.reveal_at(self.compiling_word_addr);
|
||||
} else {
|
||||
self.dictionary.reveal();
|
||||
}
|
||||
// Check if IMMEDIATE was toggled (the word might be immediate)
|
||||
let is_immediate = self.dictionary.find(&name).is_some_and(|(_, _, imm)| imm);
|
||||
self.sync_word_lookup(&name, word_id, is_immediate);
|
||||
@@ -2522,6 +2728,9 @@ impl<R: Runtime> ForthVM<R> {
|
||||
// CS-PICK, CS-ROLL, __CTRL__ for Programming-Tools / POSTPONE of control words
|
||||
self.register_cs_pick_roll()?;
|
||||
|
||||
// (LOCAL) for Forth 2012 §13.6.1.0086 lower-level locals primitive
|
||||
self.register_local_paren()?;
|
||||
|
||||
// Runtime DOES> patch for double-DOES> support
|
||||
self.register_does_patch()?;
|
||||
|
||||
@@ -2580,6 +2789,9 @@ impl<R: Runtime> ForthVM<R> {
|
||||
// UTIME ( -- ud ) microseconds since epoch as double-cell
|
||||
self.register_utime()?;
|
||||
|
||||
// RANDOM ( -- u ), RND-SEED ( u -- )
|
||||
self.register_random()?;
|
||||
|
||||
// HOLDS
|
||||
// HOLDS: defined in boot.fth
|
||||
|
||||
@@ -3189,7 +3401,15 @@ impl<R: Runtime> ForthVM<R> {
|
||||
.iter()
|
||||
.position(|n| n.eq_ignore_ascii_case(&name))
|
||||
{
|
||||
self.push_ir(IrOp::ForthLocalSet(idx as u32));
|
||||
let kind = self.compiling_local_kinds[idx];
|
||||
let kind_idx = self.compiling_local_kinds[0..idx]
|
||||
.iter()
|
||||
.filter(|k| **k == kind)
|
||||
.count() as u32;
|
||||
match kind {
|
||||
LocalKind::Int => self.push_ir(IrOp::ForthLocalSet(kind_idx)),
|
||||
LocalKind::Float => self.push_ir(IrOp::ForthFLocalSet(kind_idx)),
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
|
||||
@@ -4053,6 +4273,8 @@ impl<R: Runtime> ForthVM<R> {
|
||||
let saved_word_id = self.compiling_word_id.take();
|
||||
let saved_control = std::mem::take(&mut self.control_stack);
|
||||
let saved_locals = std::mem::take(&mut self.compiling_locals);
|
||||
let saved_local_kinds = std::mem::take(&mut self.compiling_local_kinds);
|
||||
let saved_local_batch_base = self.local_batch_base.take();
|
||||
|
||||
self.compiling_ir.clear();
|
||||
self.compiling_name = Some("_does_action_".to_string());
|
||||
@@ -4096,6 +4318,8 @@ impl<R: Runtime> ForthVM<R> {
|
||||
self.compiling_word_id = saved_word_id;
|
||||
self.control_stack = saved_control;
|
||||
self.compiling_locals = saved_locals;
|
||||
self.compiling_local_kinds = saved_local_kinds;
|
||||
self.local_batch_base = saved_local_batch_base;
|
||||
|
||||
// Register the defining word as a "does-defining" word.
|
||||
let has_create = self.saw_create_in_def;
|
||||
@@ -4561,6 +4785,45 @@ impl<R: Runtime> ForthVM<R> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register `(LOCAL)` per Forth 2012 §13.6.1.0086.
|
||||
///
|
||||
/// Compile-time `( c-addr u -- )`. When `u > 0`, declare a local named by
|
||||
/// the byte slice at `c-addr`/`u`. When `u = 0`, emit the initialization
|
||||
/// code for all locals declared since the last sentinel (the runtime
|
||||
/// `ForthLocalSet`s that pop args from the data stack in reverse
|
||||
/// declaration order).
|
||||
///
|
||||
/// The word is non-immediate: it runs when its containing immediate word
|
||||
/// (typically user-defined `LOCAL` or `END-LOCALS`) executes during the
|
||||
/// outer compilation loop. Because `HostAccess` cannot reach into the
|
||||
/// outer-interpreter compile state directly, the actual mutation is
|
||||
/// deferred via `PendingAction::DeclareLocal` / `DeclareLocalEnd` and
|
||||
/// processed in `handle_pending_actions` once the immediate word returns.
|
||||
fn register_local_paren(&mut self) -> anyhow::Result<()> {
|
||||
let pending = Arc::clone(&self.pending_actions);
|
||||
|
||||
let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| {
|
||||
// ( c-addr u -- ) — pop both cells.
|
||||
let sp = ctx.get_dsp();
|
||||
let u = ctx.mem_read_i32(sp) as u32;
|
||||
let addr = ctx.mem_read_i32(sp + CELL_SIZE) as u32;
|
||||
ctx.set_dsp(sp + 2 * CELL_SIZE);
|
||||
|
||||
let action = if u == 0 {
|
||||
PendingAction::DeclareLocalEnd
|
||||
} else {
|
||||
let bytes = ctx.mem_read_slice(addr, u as usize);
|
||||
let name = String::from_utf8_lossy(&bytes).to_ascii_uppercase();
|
||||
PendingAction::DeclareLocal(name)
|
||||
};
|
||||
pending.lock().unwrap().push(action);
|
||||
Ok(())
|
||||
});
|
||||
|
||||
self.register_host_primitive("(LOCAL)", false, func)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Register `_does_patch_` as a host function for runtime DOES> patching.
|
||||
/// ( `does_action_id` -- ) Signals the outer interpreter to patch the most
|
||||
/// recently `CREATEd` word with a new DOES> action.
|
||||
@@ -4834,6 +5097,39 @@ impl<R: Runtime> ForthVM<R> {
|
||||
CTRL_AHEAD => self.compile_ahead()?,
|
||||
_ => anyhow::bail!("unknown control code: {code}"),
|
||||
},
|
||||
// Forth 2012 §13.6.1.0086 `(LOCAL)`: append the named local
|
||||
// to the current compile context. Locals declared via
|
||||
// `(LOCAL)` are int-only per spec (float locals are not
|
||||
// covered by this word).
|
||||
PendingAction::DeclareLocal(name) => {
|
||||
if self.state == 0 {
|
||||
anyhow::bail!("(LOCAL): only valid during compilation");
|
||||
}
|
||||
if self.local_batch_base.is_none() {
|
||||
self.local_batch_base = Some(self.compiling_locals.len());
|
||||
}
|
||||
self.compiling_locals.push(name);
|
||||
self.compiling_local_kinds.push(LocalKind::Int);
|
||||
}
|
||||
// Forth 2012 §13.6.1.0086 `(LOCAL)` sentinel: emit init
|
||||
// code for the batch of locals just declared. Pop the
|
||||
// runtime args from the data stack in reverse declaration
|
||||
// order — consistent with `compile_locals_block` at the
|
||||
// `{: ... :}` flow.
|
||||
PendingAction::DeclareLocalEnd => {
|
||||
if let Some(base) = self.local_batch_base.take() {
|
||||
for slot in (base..self.compiling_locals.len()).rev() {
|
||||
let kind_idx = self.compiling_local_kinds[0..slot]
|
||||
.iter()
|
||||
.filter(|k| **k == LocalKind::Int)
|
||||
.count() as u32;
|
||||
self.push_ir(IrOp::ForthLocalSet(kind_idx));
|
||||
}
|
||||
}
|
||||
// No-op if no batch is pending — spec-permissible for
|
||||
// a user that calls `0 0 (LOCAL)` at the top of a
|
||||
// definition before declaring anything.
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@@ -4911,11 +5207,24 @@ impl<R: Runtime> ForthVM<R> {
|
||||
/// Register `\` as an immediate host function that sets >IN to end of input.
|
||||
fn register_backslash(&mut self) -> anyhow::Result<()> {
|
||||
let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| {
|
||||
// Read #TIB (input buffer length)
|
||||
// Forth 2012 §6.2.2535 `\`: "Parse and discard the remainder of
|
||||
// the parse area." The parse area extends to the end of the
|
||||
// current **line**, not the end of the input buffer. Scan from
|
||||
// the current `>IN` forward for the first `\n`, and set `>IN`
|
||||
// to the position after it. If there's no newline, stop at
|
||||
// `#TIB` (end of buffer), matching the single-line case.
|
||||
let b: [u8; 4] = ctx.mem_read_i32(SYSVAR_NUM_TIB as u32).to_le_bytes();
|
||||
let num_tib = u32::from_le_bytes(b);
|
||||
// Set >IN to end of input
|
||||
ctx.mem_write_i32(SYSVAR_TO_IN as u32, num_tib as i32);
|
||||
let b: [u8; 4] = ctx.mem_read_i32(SYSVAR_TO_IN as u32).to_le_bytes();
|
||||
let mut to_in = u32::from_le_bytes(b);
|
||||
while to_in < num_tib {
|
||||
let ch = ctx.mem_read_u8(INPUT_BUFFER_BASE + to_in);
|
||||
to_in += 1;
|
||||
if ch == b'\n' {
|
||||
break;
|
||||
}
|
||||
}
|
||||
ctx.mem_write_i32(SYSVAR_TO_IN as u32, to_in as i32);
|
||||
Ok(())
|
||||
});
|
||||
|
||||
@@ -5094,6 +5403,46 @@ impl<R: Runtime> ForthVM<R> {
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// RANDOM ( -- u ) return a 32-bit pseudo-random cell (xorshift64).
|
||||
/// RND-SEED ( u -- ) reseed the PRNG; seed=0 is forced to a nonzero constant.
|
||||
fn register_random(&mut self) -> anyhow::Result<()> {
|
||||
let state = Arc::clone(&self.rng_state);
|
||||
let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| {
|
||||
let mut s = state.lock().unwrap();
|
||||
let mut x = *s;
|
||||
if x == 0 {
|
||||
x = 0xDEAD_BEEF_CAFE_BABE;
|
||||
}
|
||||
x ^= x << 13;
|
||||
x ^= x >> 7;
|
||||
x ^= x << 17;
|
||||
*s = x;
|
||||
drop(s);
|
||||
let sp = ctx.get_dsp();
|
||||
let new_sp = sp - CELL_SIZE;
|
||||
ctx.mem_write_i32(new_sp as u32, x as i32);
|
||||
ctx.set_dsp(new_sp);
|
||||
Ok(())
|
||||
});
|
||||
self.register_host_primitive("RANDOM", false, func)?;
|
||||
|
||||
let state = Arc::clone(&self.rng_state);
|
||||
let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| {
|
||||
let sp = ctx.get_dsp();
|
||||
let seed = ctx.mem_read_i32(sp as u32) as u32 as u64;
|
||||
ctx.set_dsp(sp + CELL_SIZE);
|
||||
let mut s = state.lock().unwrap();
|
||||
*s = if seed == 0 {
|
||||
0xDEAD_BEEF_CAFE_BABE
|
||||
} else {
|
||||
seed
|
||||
};
|
||||
Ok(())
|
||||
});
|
||||
self.register_host_primitive("RND-SEED", false, func)?;
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// PARSE ( char "ccc<char>" -- c-addr u ) as inline host function.
|
||||
fn register_parse_host(&mut self) -> anyhow::Result<()> {
|
||||
let func: HostFn = Box::new(move |ctx: &mut dyn HostAccess| {
|
||||
@@ -7626,6 +7975,257 @@ mod tests {
|
||||
assert_eq!(vm.take_output(), "test");
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
// Float locals: F: prefix in {: ... :}
|
||||
// ===================================================================
|
||||
|
||||
#[test]
|
||||
fn test_flocal_hypot() {
|
||||
// Classic Pythagorean: sqrt(x*x + y*y).
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": HYPOT {: F: x F: y :} x x F* y y F* F+ FSQRT ;")
|
||||
.unwrap();
|
||||
vm.evaluate("3E 4E HYPOT F>S").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![5]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flocal_to() {
|
||||
// TO on a float local reads from the float stack, not the data stack.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": SETF {: F: a :} 10E TO a a ;").unwrap();
|
||||
vm.evaluate("1E SETF F>S").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![10]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flocal_mixed_int_and_float_args() {
|
||||
// Declaration order matters for init: rightmost arg is popped first
|
||||
// from its stack. Here `n` is int (from dstack) and `f` is float (from fstack).
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": MIX {: n F: f :} f n S>F F+ ;").unwrap();
|
||||
vm.evaluate("3 4E MIX F>S").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![7]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_flocal_uninit() {
|
||||
// Uninitialized float local (after `|`) starts at 0.0 until assigned.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": U {: | F: tmp :} 9E TO tmp tmp ;").unwrap();
|
||||
vm.evaluate("U F>S").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![9]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_local_named_s_not_hijacked_by_s_shortcut() {
|
||||
// Forth 2012 §13.3.3.2: locals supersede dictionary names within
|
||||
// their scope. Regression — local `s` was previously hijacked by
|
||||
// the compile-mode `S` string shortcut in compile_token.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("VARIABLE V 42 V !").unwrap();
|
||||
vm.evaluate(": T {: | s :} V TO s s @ ;").unwrap();
|
||||
vm.evaluate("T").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![42]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_local_named_s_with_fetch_and_store() {
|
||||
// Exercises both ForthLocalGet and ForthLocalSet for a local named `s`.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("VARIABLE V 0 V !").unwrap();
|
||||
vm.evaluate(": STORE-VIA-S {: | s :} V TO s 99 s ! ;")
|
||||
.unwrap();
|
||||
vm.evaluate("STORE-VIA-S V @").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![99]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_int_uninit_local_via_pipe_syntax() {
|
||||
// Missing coverage: int uninit locals via `{: | name :}` — only the
|
||||
// float variant was covered (test_flocal_uninit).
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": U {: | tmp :} 7 TO tmp tmp ;").unwrap();
|
||||
vm.evaluate("U").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![7]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_local_primitive_lt32() {
|
||||
// Forth 2012 §13.6.1.0086 `(LOCAL)` — replica of LT32 from
|
||||
// localstest.fth line 118-120 (the test that was silently skipped
|
||||
// before `(LOCAL)` was implemented).
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": LOCAL BL WORD COUNT (LOCAL) ; IMMEDIATE")
|
||||
.unwrap();
|
||||
vm.evaluate(": END-LOCALS 0 0 (LOCAL) ; IMMEDIATE").unwrap();
|
||||
vm.evaluate(": LT32 LOCAL A LOCAL B LOCAL C END-LOCALS A B C ;")
|
||||
.unwrap();
|
||||
vm.evaluate("61 62 63 LT32").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![63, 62, 61]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiline_colon_then_variable() {
|
||||
// Regression: combined `:` def across newlines must leave state at
|
||||
// interpret afterwards. Earlier, WAFER's `\` (backslash comment)
|
||||
// consumed to `#TIB` instead of the next `\n`, so multi-line chunks
|
||||
// lost the closing `;` inside a comment and left state in compile
|
||||
// mode. The symptom was a later `VARIABLE X 0 X !` erroring on
|
||||
// `unknown word: X`, because the outer `:` never actually closed.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": EMPTY-STACK\n DEPTH ?DUP IF DUP 0< IF NEGATE 0 DO 0 LOOP ELSE 0 DO DROP LOOP THEN THEN ;").unwrap();
|
||||
vm.evaluate("VARIABLE #ERRORS 0 #ERRORS !").unwrap();
|
||||
vm.evaluate("#ERRORS @").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![0]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_backslash_stops_at_newline() {
|
||||
// Forth 2012 §6.2.2535 `\`: parse-and-discard ends at end-of-line,
|
||||
// not end of input buffer. Multi-line input must survive a `\`
|
||||
// comment on a prior line.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("\\ comment line\n42").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![42]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_local_primitive_end_sentinel_only() {
|
||||
// `0 0 (LOCAL)` with no prior names must be a harmless no-op.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": END-LOCALS 0 0 (LOCAL) ; IMMEDIATE").unwrap();
|
||||
vm.evaluate(": T END-LOCALS 42 ;").unwrap();
|
||||
vm.evaluate("T").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![42]);
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
// Quotations: [: ... ;]
|
||||
// ===================================================================
|
||||
|
||||
#[test]
|
||||
fn test_quotation_interpret() {
|
||||
assert_eq!(eval_stack("[: 42 ;] EXECUTE"), vec![42]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quotation_compile_mode() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": APPLY EXECUTE ;").unwrap();
|
||||
vm.evaluate("[: 1 2 + ;] APPLY .").unwrap();
|
||||
assert_eq!(vm.take_output(), "3 ");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quotation_inside_colon_def() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": MYDUP [: DUP ;] EXECUTE ;").unwrap();
|
||||
vm.evaluate("5 MYDUP").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![5, 5]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quotation_nested() {
|
||||
assert_eq!(eval_stack("[: [: 1 ;] EXECUTE ;] EXECUTE"), vec![1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_quotation_inside_if() {
|
||||
// Control stack must travel with the saved frame so the outer IF/ELSE
|
||||
// still finds its matching THEN after an inner [: ... ;].
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate(": CHOOSE IF [: 1 ;] ELSE [: 2 ;] THEN EXECUTE ;")
|
||||
.unwrap();
|
||||
vm.evaluate("-1 CHOOSE 0 CHOOSE").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![2, 1]);
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
// Structures (BEGIN-STRUCTURE / +FIELD / FIELD: / CFIELD: / END-STRUCTURE)
|
||||
// ===================================================================
|
||||
|
||||
#[test]
|
||||
fn test_struct_basic_point() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("BEGIN-STRUCTURE POINT FIELD: P.X FIELD: P.Y END-STRUCTURE")
|
||||
.unwrap();
|
||||
vm.evaluate("POINT").unwrap();
|
||||
assert_eq!(vm.pop_data_stack().unwrap(), 8);
|
||||
|
||||
vm.evaluate("CREATE ORIGIN POINT ALLOT").unwrap();
|
||||
vm.evaluate("1 ORIGIN P.X ! 2 ORIGIN P.Y !").unwrap();
|
||||
vm.evaluate("ORIGIN P.X @ ORIGIN P.Y @").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![2, 1]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_struct_field_offsets() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("BEGIN-STRUCTURE REC FIELD: A FIELD: B FIELD: C END-STRUCTURE")
|
||||
.unwrap();
|
||||
vm.evaluate("REC 0 A 0 B 0 C").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![8, 4, 0, 12]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_struct_mixed_cfield() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("BEGIN-STRUCTURE MIX CFIELD: TAG FIELD: VAL END-STRUCTURE")
|
||||
.unwrap();
|
||||
vm.evaluate("MIX 0 TAG 0 VAL").unwrap();
|
||||
assert_eq!(vm.data_stack(), vec![4, 0, 8]);
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
// New words: RANDOM / RND-SEED
|
||||
// ===================================================================
|
||||
|
||||
#[test]
|
||||
fn test_random_deterministic_after_seed() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("42 RND-SEED RANDOM RANDOM RANDOM").unwrap();
|
||||
let first = vm.data_stack().clone();
|
||||
|
||||
let mut vm2 = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm2.evaluate("42 RND-SEED RANDOM RANDOM RANDOM").unwrap();
|
||||
let second = vm2.data_stack().clone();
|
||||
|
||||
assert_eq!(first, second, "same seed must produce same sequence");
|
||||
assert_eq!(first.len(), 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_random_distinct_values() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("1 RND-SEED").unwrap();
|
||||
let mut seen = std::collections::HashSet::new();
|
||||
for _ in 0..1000 {
|
||||
vm.evaluate("RANDOM").unwrap();
|
||||
let v = vm.pop_data_stack().unwrap();
|
||||
seen.insert(v);
|
||||
}
|
||||
// xorshift64's low-32 sequence repeats after a long period; 1000 pulls
|
||||
// should hit at least 900 unique cells.
|
||||
assert!(
|
||||
seen.len() >= 900,
|
||||
"only {} distinct out of 1000",
|
||||
seen.len()
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_rnd_seed_zero_forced_nonzero() {
|
||||
// xorshift with state 0 is a fixed point; seeding with 0 must avoid that.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().unwrap();
|
||||
vm.evaluate("0 RND-SEED RANDOM RANDOM").unwrap();
|
||||
let stack = vm.data_stack();
|
||||
assert!(
|
||||
stack[0] != 0 || stack[1] != 0,
|
||||
"seed-0 must not freeze the stream"
|
||||
);
|
||||
}
|
||||
|
||||
// ===================================================================
|
||||
// New words: COUNT
|
||||
// ===================================================================
|
||||
|
||||
@@ -26,8 +26,7 @@ fn probe_gforth(candidate: &str) -> bool {
|
||||
.arg("-e")
|
||||
.arg("bye")
|
||||
.output()
|
||||
.map(|o| o.status.success())
|
||||
.unwrap_or(false)
|
||||
.is_ok_and(|o| o.status.success())
|
||||
}
|
||||
|
||||
fn find_gforth() -> Option<&'static str> {
|
||||
|
||||
+180
-24
@@ -13,41 +13,165 @@ const SUITE_DIR: &str = concat!(
|
||||
"/../../tests/forth2012-test-suite/src"
|
||||
);
|
||||
|
||||
/// Load a file and evaluate it line by line, ignoring errors on individual lines.
|
||||
fn load_file(vm: &mut ForthVM<NativeRuntime>, path: &str) {
|
||||
/// Load a file line-by-line, returning the number of lines that raised an
|
||||
/// `evaluate` error. Each failing line is printed (visible under
|
||||
/// `cargo test -- --nocapture`) so failures can be triaged without a
|
||||
/// debugger.
|
||||
///
|
||||
/// Historically this helper discarded errors silently, which caused tests
|
||||
/// like LT32 in `localstest.fth` (compile errors from unknown words such
|
||||
/// as `(LOCAL)` before it was implemented) to vanish — the T{ }T error
|
||||
/// counter was never incremented because the `:` definition never ran.
|
||||
/// Returning the count surfaces silent skips as real failures.
|
||||
///
|
||||
/// **Note on multi-line definitions.** WAFER's DOES> handler collects
|
||||
/// the does-body to `;` via `next_token()` within a *single* `evaluate`
|
||||
/// call and treats end-of-input as end-of-body. Files with a `DOES>`
|
||||
/// split across lines (e.g. `errorreport.fth`) therefore cannot be
|
||||
/// loaded line-by-line; use [`load_file_whole`] for those.
|
||||
fn load_file(vm: &mut ForthVM<NativeRuntime>, path: &str) -> u32 {
|
||||
let source = std::fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read {path}"));
|
||||
for line in source.lines() {
|
||||
let _ = vm.evaluate(line);
|
||||
let mut fails = 0u32;
|
||||
for (lineno, line) in source.lines().enumerate() {
|
||||
if let Err(e) = vm.evaluate(line) {
|
||||
fails += 1;
|
||||
eprintln!("{path}:{}: {e}\n line: {line}", lineno + 1);
|
||||
}
|
||||
}
|
||||
vm.take_output(); // discard output
|
||||
fails
|
||||
}
|
||||
|
||||
/// Load a file as a single `evaluate` call (not line-by-line). Required
|
||||
/// for files with multi-line definitions that WAFER's per-line handlers
|
||||
/// can't stitch across calls (notably `: X ... DOES> ... ;` spanning
|
||||
/// lines — see [`load_file`] note).
|
||||
///
|
||||
/// Returns `1` on any failure, `0` on success, so the caller can apply
|
||||
/// baselines the same way as [`load_file`].
|
||||
fn load_file_whole(vm: &mut ForthVM<NativeRuntime>, path: &str) -> u32 {
|
||||
let source = std::fs::read_to_string(path).unwrap_or_else(|_| panic!("Failed to read {path}"));
|
||||
let fails = match vm.evaluate(&source) {
|
||||
Ok(()) => 0,
|
||||
Err(e) => {
|
||||
eprintln!("{path}: {e}");
|
||||
1
|
||||
}
|
||||
};
|
||||
vm.take_output();
|
||||
fails
|
||||
}
|
||||
|
||||
/// Baseline of *known* line-level failures per prerequisite file. The runner
|
||||
/// asserts `load_fails == expected_load_failures(path)`, so any regression
|
||||
/// above (or silently-fixed case below) the baseline is caught.
|
||||
///
|
||||
/// Baselines are not an allowlist to paper over bugs — they are an explicit
|
||||
/// tech-debt ledger. Each non-zero entry here is a bug that should be fixed
|
||||
/// and the baseline lowered to zero. See the in-tree follow-up tasks.
|
||||
fn expected_load_failures(path: &str) -> u32 {
|
||||
// core.fr exercises two constructs WAFER does not yet support:
|
||||
// 1. Nested colon definitions (`: NOP : POSTPONE ; ;` at line 751,
|
||||
// defining NOP, NOP1, NOP2 — four silent lines).
|
||||
// 2. `SOURCE`/`>IN` round-trip through `EVALUATE` at line 797
|
||||
// (GS1 definition) — one line.
|
||||
// Total: 5. Fix these and drop the baseline to 0.
|
||||
if path.ends_with("/core.fr") {
|
||||
return 5;
|
||||
}
|
||||
// coreexttest.fth uses two Core-Extension features WAFER lacks:
|
||||
// 1. SAVE-INPUT / RESTORE-INPUT at line 548 — not implemented.
|
||||
// 2. `.(` inside `[ ... ]` brackets at line 559 — `.(` isn't
|
||||
// handled by `compile_token`'s `[ ... ]` interpret-mode path,
|
||||
// so `First message via .(` tokens leak to the compiler as
|
||||
// undefined words.
|
||||
// Total: 2. Fix these and drop the baseline to 0.
|
||||
if path.ends_with("/coreexttest.fth") {
|
||||
return 2;
|
||||
}
|
||||
// exceptiontest.fth line 95 fails with a garbled parse ("unknown word"
|
||||
// over non-ASCII bytes): WAFER's parser reads past a prior test's
|
||||
// scratch region after the preceding `C6` / `T9` frame exercises
|
||||
// CATCH/THROW source stacking. Root cause not yet diagnosed; baseline
|
||||
// until fixed.
|
||||
if path.ends_with("/exceptiontest.fth") {
|
||||
return 1;
|
||||
}
|
||||
// toolstest.fth uses the `\?` conditional-skip idiom defined in
|
||||
// utilities.fth:37 as `: \? (\?) @ IF EXIT THEN SOURCE >IN ! DROP ;
|
||||
// IMMEDIATE`. Under WAFER's per-line `evaluate` loader, the
|
||||
// `SOURCE >IN ! DROP` path does not consume the remainder of the
|
||||
// current line correctly, so 37 `\?`-guarded lines inside the
|
||||
// TRAVERSE-WORDLIST / NAME>COMPILE / NAME>INTERPRET blocks leak as
|
||||
// unknown-word errors. Fix the SOURCE/`>IN` interaction with
|
||||
// line-mode input and drop this to 0.
|
||||
if path.ends_with("/toolstest.fth") {
|
||||
return 37;
|
||||
}
|
||||
0
|
||||
}
|
||||
|
||||
/// Assert a file loaded with exactly its baseline number of line-level
|
||||
/// failures. Used for prerequisites; keeps the runner tight without
|
||||
/// blocking the whole suite on known gaps.
|
||||
fn assert_load_fails_within_baseline(path: &str, fails: u32) {
|
||||
let expected = expected_load_failures(path);
|
||||
assert_eq!(
|
||||
fails, expected,
|
||||
"{path} had {fails} line-level failures (expected baseline: {expected})"
|
||||
);
|
||||
}
|
||||
|
||||
/// Boot a WAFER VM with full prerequisites loaded.
|
||||
///
|
||||
/// Every prerequisite file must load with zero line-level errors. Any
|
||||
/// regression here points to a missing primitive or a parser bug and must
|
||||
/// be fixed, not silently tolerated.
|
||||
fn boot_with_prerequisites() -> ForthVM<NativeRuntime> {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().expect("Failed to create ForthVM");
|
||||
|
||||
// Load test framework
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/tester.fr"));
|
||||
let tester_path = format!("{SUITE_DIR}/tester.fr");
|
||||
let f1 = load_file(&mut vm, &tester_path);
|
||||
assert_load_fails_within_baseline(&tester_path, f1);
|
||||
// Load core tests (prerequisite)
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/core.fr"));
|
||||
let core_path = format!("{SUITE_DIR}/core.fr");
|
||||
let f2 = load_file(&mut vm, &core_path);
|
||||
assert_load_fails_within_baseline(&core_path, f2);
|
||||
// Switch to decimal and load utilities
|
||||
let _ = vm.evaluate("DECIMAL");
|
||||
vm.take_output();
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/utilities.fth"));
|
||||
let util_path = format!("{SUITE_DIR}/utilities.fth");
|
||||
let f3 = load_file(&mut vm, &util_path);
|
||||
assert_load_fails_within_baseline(&util_path, f3);
|
||||
// errorreport.fth defines SET-ERROR-COUNT and the per-wordset counter
|
||||
// accessors (CORE-ERRORS, STRING-ERRORS, LOCALS-ERRORS, ...). Every
|
||||
// suite's final `X-ERRORS SET-ERROR-COUNT` line depends on this file,
|
||||
// and silently errored before the runner was tightened.
|
||||
let errorreport_path = format!("{SUITE_DIR}/errorreport.fth");
|
||||
let f_err = load_file_whole(&mut vm, &errorreport_path);
|
||||
assert_load_fails_within_baseline(&errorreport_path, f_err);
|
||||
// Load core extensions
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/coreexttest.fth"));
|
||||
let ext_path = format!("{SUITE_DIR}/coreexttest.fth");
|
||||
let f4 = load_file(&mut vm, &ext_path);
|
||||
assert_load_fails_within_baseline(&ext_path, f4);
|
||||
|
||||
vm
|
||||
}
|
||||
|
||||
/// Run a test suite file and return the #ERRORS count.
|
||||
/// Run a test suite file and return the *total* error count:
|
||||
/// `#ERRORS` from the Forth test framework plus any lines where
|
||||
/// `vm.evaluate` itself failed (e.g. unknown word in a `:` definition
|
||||
/// outside `T{ }T`, which the framework cannot catch).
|
||||
fn run_suite(vm: &mut ForthVM<NativeRuntime>, test_file: &str) -> u32 {
|
||||
// Reset error counter
|
||||
let _ = vm.evaluate("DECIMAL 0 #ERRORS !");
|
||||
vm.take_output();
|
||||
|
||||
// Load the test file
|
||||
load_file(vm, &format!("{SUITE_DIR}/{test_file}"));
|
||||
let file_path = format!("{SUITE_DIR}/{test_file}");
|
||||
let load_fails = load_file(vm, &file_path);
|
||||
assert_load_fails_within_baseline(&file_path, load_fails);
|
||||
|
||||
// Read error count -- try multiple approaches to be robust
|
||||
let _ = vm.evaluate("DECIMAL");
|
||||
@@ -76,8 +200,12 @@ fn run_suite(vm: &mut ForthVM<NativeRuntime>, test_file: &str) -> u32 {
|
||||
#[test]
|
||||
fn compliance_core() {
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().expect("Failed to create ForthVM");
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/tester.fr"));
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/core.fr"));
|
||||
let tester_path = format!("{SUITE_DIR}/tester.fr");
|
||||
let f1 = load_file(&mut vm, &tester_path);
|
||||
assert_load_fails_within_baseline(&tester_path, f1);
|
||||
let core_path = format!("{SUITE_DIR}/core.fr");
|
||||
let f2 = load_file(&mut vm, &core_path);
|
||||
assert_load_fails_within_baseline(&core_path, f2);
|
||||
|
||||
let _ = vm.evaluate("DECIMAL #ERRORS @");
|
||||
let errors = vm.data_stack().first().copied().unwrap_or(-1);
|
||||
@@ -96,17 +224,31 @@ fn compliance_core_ext() {
|
||||
// Core Extensions are loaded as part of prerequisites.
|
||||
// Run from scratch to get a clean error count.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().expect("Failed to create ForthVM");
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/tester.fr"));
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/core.fr"));
|
||||
let tester_path = format!("{SUITE_DIR}/tester.fr");
|
||||
let f1 = load_file(&mut vm, &tester_path);
|
||||
assert_load_fails_within_baseline(&tester_path, f1);
|
||||
let core_path = format!("{SUITE_DIR}/core.fr");
|
||||
let f2 = load_file(&mut vm, &core_path);
|
||||
assert_load_fails_within_baseline(&core_path, f2);
|
||||
let _ = vm.evaluate("DECIMAL");
|
||||
vm.take_output();
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/utilities.fth"));
|
||||
let util_path = format!("{SUITE_DIR}/utilities.fth");
|
||||
let f3 = load_file(&mut vm, &util_path);
|
||||
assert_load_fails_within_baseline(&util_path, f3);
|
||||
let errorreport_path = format!("{SUITE_DIR}/errorreport.fth");
|
||||
let f_err = load_file_whole(&mut vm, &errorreport_path);
|
||||
assert_load_fails_within_baseline(&errorreport_path, f_err);
|
||||
let _ = vm.evaluate("DECIMAL 0 #ERRORS !");
|
||||
vm.take_output();
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/coreexttest.fth"));
|
||||
let ext_path = format!("{SUITE_DIR}/coreexttest.fth");
|
||||
let load_fails = load_file(&mut vm, &ext_path);
|
||||
assert_load_fails_within_baseline(&ext_path, load_fails);
|
||||
let _ = vm.evaluate("DECIMAL #ERRORS @");
|
||||
let errors = vm.data_stack().first().copied().unwrap_or(-1) as u32;
|
||||
assert_eq!(errors, 0, "Core Extensions: {errors} test failures");
|
||||
let framework_errors = vm.data_stack().first().copied().unwrap_or(-1) as u32;
|
||||
assert_eq!(
|
||||
framework_errors, 0,
|
||||
"Core Extensions: {framework_errors} framework test failures"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -164,17 +306,31 @@ fn compliance_string() {
|
||||
// Run from scratch -- the stringtest includes CoreExt tests that
|
||||
// cascade failures when run on top of an already-loaded CoreExt suite.
|
||||
let mut vm = ForthVM::<NativeRuntime>::new().expect("Failed to create ForthVM");
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/tester.fr"));
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/core.fr"));
|
||||
let tester_path = format!("{SUITE_DIR}/tester.fr");
|
||||
let f1 = load_file(&mut vm, &tester_path);
|
||||
assert_load_fails_within_baseline(&tester_path, f1);
|
||||
let core_path = format!("{SUITE_DIR}/core.fr");
|
||||
let f2 = load_file(&mut vm, &core_path);
|
||||
assert_load_fails_within_baseline(&core_path, f2);
|
||||
let _ = vm.evaluate("DECIMAL");
|
||||
vm.take_output();
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/utilities.fth"));
|
||||
let util_path = format!("{SUITE_DIR}/utilities.fth");
|
||||
let f3 = load_file(&mut vm, &util_path);
|
||||
assert_load_fails_within_baseline(&util_path, f3);
|
||||
let errorreport_path = format!("{SUITE_DIR}/errorreport.fth");
|
||||
let f_err = load_file_whole(&mut vm, &errorreport_path);
|
||||
assert_load_fails_within_baseline(&errorreport_path, f_err);
|
||||
let _ = vm.evaluate("DECIMAL 0 #ERRORS !");
|
||||
vm.take_output();
|
||||
load_file(&mut vm, &format!("{SUITE_DIR}/stringtest.fth"));
|
||||
let str_path = format!("{SUITE_DIR}/stringtest.fth");
|
||||
let load_fails = load_file(&mut vm, &str_path);
|
||||
assert_load_fails_within_baseline(&str_path, load_fails);
|
||||
let _ = vm.evaluate("DECIMAL #ERRORS @");
|
||||
let errors = vm.data_stack().first().copied().unwrap_or(-1) as u32;
|
||||
assert_eq!(errors, 0, "String: {errors} test failures");
|
||||
let framework_errors = vm.data_stack().first().copied().unwrap_or(-1) as u32;
|
||||
assert_eq!(
|
||||
framework_errors, 0,
|
||||
"String: {framework_errors} framework test failures"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
//! End-to-end tests for the `SHA1` / `SHA256` / `SHA512` Forth host words.
|
||||
//!
|
||||
//! These run inside a real WAFER VM (NativeRuntime). The Forth program writes
|
||||
//! These run inside a real WAFER VM (`NativeRuntime`). The Forth program writes
|
||||
//! a counted string into `PAD`, calls the hash word, then the test reads the
|
||||
//! digest out of WAFER linear memory and compares it to the RFC-3174 / FIPS-180
|
||||
//! reference vectors.
|
||||
@@ -26,10 +26,16 @@ fn hash_via_forth(word: &str, input: &[u8]) -> Vec<u8> {
|
||||
|
||||
// Stack now: ( c-addr2 u2 ). Read u2 then c-addr2 from data stack.
|
||||
let stack = vm.data_stack();
|
||||
assert!(stack.len() >= 2, "expected (addr len) on stack, got {stack:?}");
|
||||
assert!(
|
||||
stack.len() >= 2,
|
||||
"expected (addr len) on stack, got {stack:?}"
|
||||
);
|
||||
let u2 = stack[0] as usize;
|
||||
let addr2 = stack[1] as u32;
|
||||
assert_eq!(addr2, HASH_SCRATCH_BASE, "digest should land in HASH_SCRATCH");
|
||||
assert_eq!(
|
||||
addr2, HASH_SCRATCH_BASE,
|
||||
"digest should land in HASH_SCRATCH"
|
||||
);
|
||||
|
||||
// Read the digest out of WAFER linear memory.
|
||||
let mut bytes = Vec::with_capacity(u2);
|
||||
|
||||
+334
-162
@@ -1,6 +1,11 @@
|
||||
WAFER Architecture Reference (updated 2026-04-13)
|
||||
WAFER Architecture Reference (updated 2026-04-16)
|
||||
===================================================
|
||||
|
||||
WAFER = WebAssembly Forth Engine in Rust. Optimizing Forth-2012 compiler that
|
||||
emits WASM at run time. Each colon definition becomes its own WASM module that
|
||||
shares memory, globals, and a function table with every other word.
|
||||
|
||||
|
||||
1. COMPILATION PIPELINE
|
||||
-----------------------
|
||||
|
||||
@@ -11,96 +16,134 @@ WAFER Architecture Reference (updated 2026-04-13)
|
||||
+--------------------------------------------+
|
||||
| Tokenizer: whitespace-delimited words |
|
||||
| For each token: |
|
||||
| 1. Dictionary lookup (find) |
|
||||
| 2. If found + interpret mode: EXECUTE |
|
||||
| 3. If found + compile mode: |
|
||||
| - Immediate? Execute now |
|
||||
| 1. Dictionary lookup (HashMap + wordlist |
|
||||
| search order) |
|
||||
| 2. Found + interpret mode: EXECUTE |
|
||||
| 3. Found + compile mode: |
|
||||
| - IMMEDIATE? Execute now |
|
||||
| - Normal? Append Call(WordId) to IR |
|
||||
| 4. Not found: try parse as number |
|
||||
| - Interpret: push to data stack |
|
||||
| - Compile: append PushI32(n) to IR |
|
||||
| - Compile: append PushI32/64/F64 |
|
||||
| 5. Neither: error "unknown word" |
|
||||
| Special cases handled here, not via IR: |
|
||||
| defining words (CREATE, VARIABLE, :), |
|
||||
| DOES> dispatch, S" / ." string parsing, |
|
||||
| {: ... :} locals, [: ... ;] quotations. |
|
||||
+--------------------------------------------+
|
||||
| On `;` (end of colon definition):
|
||||
v
|
||||
Optimizer (optimizer.rs)
|
||||
Optimizer (optimizer.rs) — IR -> IR
|
||||
+--------------------------------------------+
|
||||
| Phase 1: Simplify |
|
||||
| Peephole -> Constant Fold -> |
|
||||
| Strength Reduce -> Peephole |
|
||||
| Phase 2: Inline then re-simplify |
|
||||
| Inline(max=8) -> Peephole -> |
|
||||
| Constant Fold -> Strength Reduce -> |
|
||||
| Peephole |
|
||||
| Phase 3: Eliminate dead code |
|
||||
| DCE -> Peephole |
|
||||
| Phase 4: Tail calls (must be last) |
|
||||
| Tail Call Detect |
|
||||
| Phase 1 simplify: |
|
||||
| peephole -> fold -> strength -> peephole |
|
||||
| Phase 2 inline (max 8 ops) then re-simpl.: |
|
||||
| inline -> peephole -> fold -> strength |
|
||||
| -> peephole |
|
||||
| Phase 3 dead code: dce -> peephole |
|
||||
| Phase 4 tail calls (must be last) |
|
||||
| Total peephole passes: 5 |
|
||||
+--------------------------------------------+
|
||||
|
|
||||
v
|
||||
Codegen (codegen.rs)
|
||||
Codegen (codegen.rs) — IR -> WASM bytes
|
||||
+--------------------------------------------+
|
||||
| IR -> WASM bytecode via wasm-encoder |
|
||||
| Each word = one WASM module with: |
|
||||
| Imports: emit, memory, dsp, rsp, fsp, |
|
||||
| table |
|
||||
| Types: void () -> (), i32 (i32) -> () |
|
||||
| One defined function (the word body) |
|
||||
| DSP cached in local 0, writeback before |
|
||||
| calls, reload after calls |
|
||||
| Scratch locals start at index 1 |
|
||||
| wasm-encoder builds one module per word. |
|
||||
| Function locals (laid out in order): |
|
||||
| 0 cached DSP (i32) |
|
||||
| 1..s scratch i32 (or promoted |
|
||||
| stack-to-local slots) |
|
||||
| s..f Forth locals from {: ... :} |
|
||||
| (i32 then f64) |
|
||||
| f..l loop locals: 2 per nested |
|
||||
| DO/?DO (index, limit) |
|
||||
| DSP write-back before every Call, |
|
||||
| reload after — keeps host functions and |
|
||||
| call_indirect targets coherent. |
|
||||
| Stack-to-local promotion (codegen flag): |
|
||||
| straight-line + simple control flow |
|
||||
| words skip the linear-memory data stack |
|
||||
| entirely; values stay in WASM locals. |
|
||||
+--------------------------------------------+
|
||||
|
|
||||
v
|
||||
Runtime trait (runtime.rs)
|
||||
Runtime trait (runtime.rs) — execution backend
|
||||
+--------------------------------------------+
|
||||
| ForthVM<R: Runtime> — generic over backend |
|
||||
| Runtime provides: |
|
||||
| - Memory r/w (mem_read_i32, etc.) |
|
||||
| - Globals (get/set_dsp, rsp, fsp) |
|
||||
| - Table (ensure_table_size) |
|
||||
| - instantiate_and_install(wasm_bytes) |
|
||||
| - call_func(fn_index) |
|
||||
| - register_host_func(fn_index, HostFn) |
|
||||
| ForthVM<R: Runtime> generic over backend. |
|
||||
| Runtime owns: |
|
||||
| - shared linear memory (16 pages init) |
|
||||
| - shared funcref table (grows on demand) |
|
||||
| - 3 mutable i32 globals (dsp/rsp/fsp) |
|
||||
| - emit() import bound to output buffer |
|
||||
| Runtime methods: |
|
||||
| mem_read/write_{i32,u8,slice} |
|
||||
| get/set_{dsp,rsp,fsp} |
|
||||
| ensure_table_size(n) |
|
||||
| instantiate_and_install(wasm, fn_index) |
|
||||
| call_func(fn_index) |
|
||||
| register_host_func(fn_index, HostFn) |
|
||||
| |
|
||||
| HostAccess trait — memory/global ops for |
|
||||
| host function callbacks |
|
||||
| HostFn = Box<dyn Fn(&mut dyn HostAccess)> |
|
||||
| HostAccess trait — same memory/global ops |
|
||||
| exposed to host-fn callbacks; lets one |
|
||||
| HostFn closure run on either runtime. |
|
||||
| HostFn = Box<dyn Fn(&mut dyn HostAccess) |
|
||||
| -> Result<()> + Send + Sync> |
|
||||
+--------------------------------------------+
|
||||
| |
|
||||
v v
|
||||
NativeRuntime WebRuntime
|
||||
(runtime_native.rs) (crates/web/runtime_web.rs)
|
||||
(runtime_native.rs, (crates/web/src/
|
||||
feature = "native") runtime_web.rs)
|
||||
+------------------+ +------------------+
|
||||
| wasmtime Engine | | js_sys::WebAsm |
|
||||
| Store, Memory | | Memory, Table |
|
||||
| Table, Globals | | Global objects |
|
||||
| Func closures | | JS Closures |
|
||||
| wasmtime Engine, | | js_sys WebAsm |
|
||||
| Store, Memory, | | Memory, Table, |
|
||||
| Table, Globals, | | Global, JS |
|
||||
| Func closures | | Closures |
|
||||
+------------------+ +------------------+
|
||||
|
||||
|
||||
2. MEMORY LAYOUT (Linear Memory)
|
||||
--------------------------------
|
||||
2. MEMORY LAYOUT (linear memory, single shared instance)
|
||||
--------------------------------------------------------
|
||||
|
||||
Address Region Size Notes
|
||||
-------- ------------------ ------- -------------------------
|
||||
-------- ------------------ ------- --------------------------
|
||||
0x0000 System Variables 64 B STATE, BASE, >IN, HERE,
|
||||
LATEST, SOURCE-ID, #TIB,
|
||||
HLD, LEAVE-FLAG
|
||||
0x0040 Input Buffer 1024 B Source parsing
|
||||
0x0440 PAD 256 B Scratch area
|
||||
0x0540 Pictured Output 128 B <# ... #> (grows down)
|
||||
0x0040 Input Buffer (TIB) 1024 B Source line being parsed
|
||||
0x0440 PAD 256 B Scratch for string ops
|
||||
0x0540 Pictured Output 128 B <# ... #> (HLD grows down)
|
||||
0x05C0 WORD Buffer 64 B Transient counted string
|
||||
0x0600 Data Stack 4096 B 1024 cells, grows DOWN
|
||||
0x1600 (Data Stack Top) DSP starts here
|
||||
0x1540 Return Stack 4096 B Grows DOWN
|
||||
0x2540 Float Stack 2048 B 256 doubles, grows DOWN
|
||||
0x2D40 Dictionary grows UP Linked list of word entries
|
||||
^ DSP starts at top = 0x1600
|
||||
0x1600 Return Stack 4096 B Grows DOWN
|
||||
^ RSP starts at top = 0x2600
|
||||
0x2600 Float Stack 2048 B 256 doubles, grows DOWN
|
||||
^ FSP starts at top = 0x2E00
|
||||
0x2E00 Hash Scratch 128 B SHA1/256/512 output
|
||||
0x2E80 Dictionary grows UP Linked list of entries
|
||||
|
||||
Total initial memory: 16 pages = 1 MiB (max 256 pages = 16 MiB)
|
||||
Cell size: 4 bytes (i32)
|
||||
Float size: 8 bytes (f64)
|
||||
Constants from crates/core/src/memory.rs (authoritative):
|
||||
SYSVAR_BASE 0x0000 size 64
|
||||
INPUT_BUFFER_BASE 0x0040 size 1024
|
||||
PAD_BASE 0x0440 size 256
|
||||
PICT_BUF_BASE 0x0540 size 128
|
||||
WORD_BUF_BASE 0x05C0 size 64
|
||||
DATA_STACK_BASE 0x0600 size 4096 (DATA_STACK_TOP = 0x1600)
|
||||
RETURN_STACK_BASE 0x1600 size 4096 (RETURN_STACK_TOP = 0x2600)
|
||||
FLOAT_STACK_BASE 0x2600 size 2048 (FLOAT_STACK_TOP = 0x2E00)
|
||||
HASH_SCRATCH_BASE 0x2E00 size 128
|
||||
DICTIONARY_BASE 0x2E80 grows up to memory.len()
|
||||
(Some inline `// 0x...` comments in memory.rs are stale — the
|
||||
computed values above are correct; the consts are derived.)
|
||||
|
||||
Total initial memory: 16 pages = 1 MiB (max 256 pages = 16 MiB).
|
||||
Cell size: 4 bytes (i32). Float size: 8 bytes (f64).
|
||||
|
||||
Stack layout note: linear-memory data and float stacks are the
|
||||
fallback used whenever the optimizer can't keep values in WASM
|
||||
locals. After stack-to-local promotion, many words touch DSP
|
||||
only on entry/exit.
|
||||
|
||||
|
||||
3. SYSTEM VARIABLES (offsets from 0x0000)
|
||||
@@ -113,60 +156,86 @@ WAFER Architecture Reference (updated 2026-04-13)
|
||||
8 >IN Parse offset into input buffer
|
||||
12 HERE Next free dictionary address
|
||||
16 LATEST Most recent dictionary entry addr
|
||||
20 SOURCE-ID 0=user input, -1=string
|
||||
20 SOURCE-ID 0=user input, -1=string, fileid>0
|
||||
24 #TIB Length of current input
|
||||
28 HLD Pictured numeric output pointer
|
||||
32 LEAVE-FLAG Nonzero when LEAVE called in loop
|
||||
|
||||
|
||||
4. DICTIONARY ENTRY FORMAT
|
||||
--------------------------
|
||||
4. DICTIONARY (dictionary.rs)
|
||||
-----------------------------
|
||||
|
||||
+--------+-------+----------+---------+-----------+
|
||||
| Link | Flags | Name | Padding | Code |
|
||||
| 4 bytes| 1 byte| N bytes | 0-3 B | 4 bytes |
|
||||
+--------+-------+----------+---------+-----------+
|
||||
Entry layout in linear memory:
|
||||
|
||||
+--------+-------+----------+---------+-----------+----------+
|
||||
| Link | Flags | Name | Padding | Code | Param |
|
||||
| 4 B | 1 B | N B | 0-3 B | 4 B | optional |
|
||||
+--------+-------+----------+---------+-----------+----------+
|
||||
^ ^
|
||||
entry_addr code field (fn table index)
|
||||
entry_addr code field (fn-table idx)
|
||||
|
||||
Flags byte:
|
||||
Bit 7 (0x80): IMMEDIATE
|
||||
Bit 6 (0x40): HIDDEN (during compilation)
|
||||
Bits 0-4 (0x1F): name length (max 31)
|
||||
Bits 0-4 : name length (max 31)
|
||||
|
||||
Link points to previous entry (0 = end of list).
|
||||
Name stored uppercase, padded to 4-byte alignment.
|
||||
Code field: index into WASM function table.
|
||||
Parameter field (if any) follows immediately after code field.
|
||||
Code field: index into shared WASM function table.
|
||||
Parameter field follows the code field for CREATE'd /
|
||||
DOES> / VARIABLE / CONSTANT bodies.
|
||||
|
||||
Lookup is NOT linear: dictionary.rs maintains a HashMap
|
||||
index from name -> Vec<(wid, addr, fn_index, immediate)>.
|
||||
Each entry is tagged with its wordlist id; resolution
|
||||
walks the current search order.
|
||||
|
||||
Wordlists / Search-Order:
|
||||
wordlist ids are u32; the FORTH wordlist is id 1.
|
||||
`current_wid` selects where new definitions land;
|
||||
`search_order` is the lookup chain (top first).
|
||||
Implements the Forth-2012 Search-Order word set.
|
||||
|
||||
|
||||
5. THREE TYPES OF WORDS
|
||||
-----------------------
|
||||
5. WORD CATEGORIES
|
||||
------------------
|
||||
|
||||
a) IR Primitives (compiled to WASM)
|
||||
register_primitive("DUP", false, vec![IrOp::Dup])
|
||||
a) IR Primitives — register_primitive("DUP", false, vec![IrOp::Dup])
|
||||
- Body stored as Vec<IrOp>
|
||||
- Optimized, then compiled to WASM module
|
||||
- Optimized, then compiled to WASM
|
||||
- Inlineable by optimizer
|
||||
- FAST: no function call overhead when inlined
|
||||
- Batched at boot: ~110 primitive registrations compiled
|
||||
into a single WASM module to amortize instantiation cost
|
||||
|
||||
b) Host Functions (HostFn closures)
|
||||
register_host_primitive(".", false, func)
|
||||
- HostFn = Box<dyn Fn(&mut dyn HostAccess) -> Result<()>>
|
||||
- Access memory/globals via HostAccess trait (runtime-agnostic)
|
||||
b) Host Functions — register_host_primitive(".", false, func)
|
||||
- HostFn = Box<dyn Fn(&mut dyn HostAccess)
|
||||
-> Result<()> + Send + Sync>
|
||||
- Access memory/globals via HostAccess trait
|
||||
- NOT inlineable
|
||||
- Used for: I/O, dictionary manipulation, complex logic
|
||||
- Same closure works on NativeRuntime and WebRuntime
|
||||
- Used for I/O, dictionary manipulation, complex stack ops
|
||||
- Same closure runs on NativeRuntime and WebRuntime
|
||||
|
||||
c) Forth-defined words
|
||||
: SQUARE DUP * ;
|
||||
- Compiled by outer interpreter
|
||||
- Goes through full optimize -> codegen pipeline
|
||||
- Stored in ir_bodies for future inlining
|
||||
c) Forth-defined words — `: SQUARE DUP * ;`
|
||||
- Compiled by the outer interpreter
|
||||
- Goes through the full optimize -> codegen pipeline
|
||||
- Stored in `ir_bodies` for future inlining
|
||||
|
||||
d) Special interpreter tokens (immediate, with custom parsing)
|
||||
- Defining words: CREATE, VARIABLE, CONSTANT, :, ;, DOES>
|
||||
- String literals: S", ."
|
||||
- Control structures: IF/ELSE/THEN, BEGIN/UNTIL/WHILE/REPEAT,
|
||||
DO/?DO/LOOP/+LOOP, [: ... ;] quotations, {: ... :} locals
|
||||
- CONSOLIDATE
|
||||
Their body-collection / dictionary-side-effect logic lives
|
||||
directly in compile_token / interpret_token_immediate.
|
||||
They still emit IR ops (e.g. IrOp::If, IrOp::DoLoop,
|
||||
IrOp::ForthLocalGet) — the difference is that they are NOT
|
||||
registered via register_primitive; the outer interpreter
|
||||
handles them as special syntax.
|
||||
|
||||
|
||||
6. WASM MODULE STRUCTURE (per word)
|
||||
-----------------------------------
|
||||
6. WASM MODULE STRUCTURE (per JIT-compiled word)
|
||||
------------------------------------------------
|
||||
|
||||
Imports (6) — provided by Runtime impl:
|
||||
0. emit (func: i32 -> void) Character output callback
|
||||
@@ -176,25 +245,59 @@ WAFER Architecture Reference (updated 2026-04-13)
|
||||
4. fsp (global: mut i32) Float stack pointer
|
||||
5. table (table: funcref) Shared function table
|
||||
|
||||
Types (2):
|
||||
0. void: () -> ()
|
||||
1. i32: (i32) -> ()
|
||||
Types: () -> () for word bodies; (i32) -> () for emit.
|
||||
|
||||
Functions (1):
|
||||
The compiled word body
|
||||
The compiled word body, typed () -> ().
|
||||
|
||||
Element section:
|
||||
table[base_fn_index] = function 1
|
||||
|
||||
Runtime::instantiate_and_install(wasm_bytes, fn_index):
|
||||
- NativeRuntime: Module::new + Instance::new with 6 wasmtime imports
|
||||
- WebRuntime: WebAssembly.instantiate with JS import objects
|
||||
- NativeRuntime: wasmtime Module::new + Instance::new
|
||||
with the 6 imports above
|
||||
- WebRuntime: WebAssembly.instantiate with JS import
|
||||
objects pulled from the shared WaferRepl state
|
||||
|
||||
|
||||
7. OPTIMIZATION PASSES (detail)
|
||||
7. IR OPS (ir.rs — IrOp enum)
|
||||
-----------------------------
|
||||
|
||||
Stack: Drop, Dup, Swap, Over, Rot, Nip, Tuck,
|
||||
TwoDup, TwoDrop
|
||||
Literals: PushI32, PushI64, PushF64
|
||||
Arithmetic: Add, Sub, Mul, DivMod, Negate, Abs
|
||||
Compare: Eq, NotEq, Lt, Gt, LtUnsigned,
|
||||
ZeroEq, ZeroLt
|
||||
Logic: And, Or, Xor, Invert,
|
||||
Lshift, Rshift, ArithRshift
|
||||
Memory: Fetch, Store, CFetch, CStore, PlusStore
|
||||
Control: Call, TailCall, Exit,
|
||||
If{then, else?},
|
||||
DoLoop{body, is_plus_loop},
|
||||
BeginUntil, BeginAgain,
|
||||
BeginWhileRepeat,
|
||||
BeginDoubleWhileRepeat,
|
||||
LoopRestartIfFalse,
|
||||
Block(label), BranchIfFalse(label),
|
||||
EndBlock(label) -- for CS-ROLL'd patterns
|
||||
Return stack: ToR, FromR, RFetch, LoopJ
|
||||
Forth locals: ForthLocalGet/Set,
|
||||
ForthFLocalGet/Set
|
||||
I/O: Emit, Dot, Cr, Type
|
||||
System: Execute, SpFetch
|
||||
Float stack: FDup, FDrop, FSwap, FOver
|
||||
Float math: FAdd, FSub, FMul, FDiv, FNegate, FAbs,
|
||||
FSqrt, FMin, FMax, FFloor, FRound
|
||||
Float compare:FZeroEq, FZeroLt, FEq, FLt
|
||||
Float memory: FetchFloat, StoreFloat
|
||||
Conversion: StoF, FtoS
|
||||
|
||||
|
||||
8. OPTIMIZATION PASSES (detail)
|
||||
-------------------------------
|
||||
|
||||
PEEPHOLE (runs 5x across full pipeline):
|
||||
PEEPHOLE (5x across pipeline):
|
||||
PushI32(n), Drop -> (removed) Unused literal
|
||||
Dup, Drop -> (removed) Redundant copy
|
||||
Swap, Swap -> (removed) Self-inverse
|
||||
@@ -205,16 +308,17 @@ WAFER Architecture Reference (updated 2026-04-13)
|
||||
PushI32(1), Mul -> (removed) Identity
|
||||
Over, Over -> TwoDup Combine
|
||||
Drop, Drop -> TwoDrop Combine
|
||||
(+ float variants: PushF64/FDrop, FDup/FDrop, FSwap/FSwap, FNegate/FNegate)
|
||||
Float variants:
|
||||
PushF64(_), FDrop / FDup, FDrop /
|
||||
FSwap, FSwap / FNegate, FNegate
|
||||
|
||||
CONSTANT FOLD:
|
||||
Binary: PushI32(a), PushI32(b), <op> -> PushI32(result)
|
||||
Supports: Add, Sub, Mul, And, Or, Xor, Lshift, Rshift, ArithRshift,
|
||||
Eq, NotEq, Lt, Gt, LtUnsigned
|
||||
Unary: PushI32(n), <op> -> PushI32(result)
|
||||
Supports: Negate, Abs, Invert, ZeroEq, ZeroLt
|
||||
Float binary: PushF64(a), PushF64(b), <op> -> PushF64(result)
|
||||
Float unary: PushF64(n), <op> -> PushF64(result)
|
||||
Binary i32: PushI32(a), PushI32(b), <op> -> PushI32(r)
|
||||
Add, Sub, Mul, And, Or, Xor,
|
||||
Lshift, Rshift, ArithRshift,
|
||||
Eq, NotEq, Lt, Gt, LtUnsigned
|
||||
Unary i32: Negate, Abs, Invert, ZeroEq, ZeroLt
|
||||
Float binary/unary equivalents on PushF64.
|
||||
|
||||
STRENGTH REDUCE:
|
||||
PushI32(2^n), Mul -> PushI32(n), Lshift
|
||||
@@ -222,85 +326,153 @@ WAFER Architecture Reference (updated 2026-04-13)
|
||||
PushI32(0), Lt -> ZeroLt
|
||||
|
||||
DCE:
|
||||
PushI32(nonzero), If{then,else} -> then_body only
|
||||
PushI32(0), If{then,else} -> else_body only
|
||||
PushI32(nonzero), If{then,else} -> then_body only
|
||||
PushI32(0), If{then,else} -> else_body only
|
||||
Everything after Exit -> removed
|
||||
|
||||
INLINE (max_size=8, single pass):
|
||||
Call(id) -> inline body if:
|
||||
- Body length <= 8 ops
|
||||
- No self-recursion
|
||||
- No Exit (would return from caller)
|
||||
- No ForthLocalGet/Set (would collide with caller's locals)
|
||||
INLINE (max 8 ops, single pass):
|
||||
Call(id) -> body if all of:
|
||||
- body length <= 8 ops
|
||||
- no self-recursion
|
||||
- no Exit (would return from caller)
|
||||
- no ForthLocalGet/Set (would collide with caller locals)
|
||||
TailCall -> Call when inlined (no longer tail position)
|
||||
|
||||
TAIL CALL (last pass):
|
||||
Last Call(id) -> TailCall(id) if:
|
||||
- Return stack balanced (equal ToR and FromR)
|
||||
Recurses into If branches for conditional tail calls
|
||||
TAIL CALL (last pass, must be last):
|
||||
trailing Call(id) -> TailCall(id) if return stack balanced
|
||||
(equal ToR / FromR pairs).
|
||||
Recurses into If branches for conditional tail calls.
|
||||
|
||||
STACK-TO-LOCAL PROMOTION (codegen pass, not optimizer):
|
||||
Words whose effects on the data stack can be statically
|
||||
tracked are compiled to use WASM locals 1..s instead of
|
||||
DSP loads/stores. Triggered by `is_promotable(body)`.
|
||||
DSP is still written back before any Call so callees and
|
||||
host functions see a consistent stack.
|
||||
|
||||
|
||||
8. CONSOLIDATION
|
||||
----------------
|
||||
9. CONSOLIDATION (consolidate.rs + codegen.rs)
|
||||
----------------------------------------------
|
||||
|
||||
CONSOLIDATE word recompiles all JIT-compiled words into a
|
||||
single WASM module:
|
||||
- All call_indirect -> direct call (for words in module)
|
||||
- External calls (host functions) remain call_indirect
|
||||
- Maximum performance for final program
|
||||
CONSOLIDATE recompiles every JIT-compiled word into ONE WASM
|
||||
module:
|
||||
- All call_indirect to consolidated words become direct
|
||||
`call` (single-module direct calls)
|
||||
- External calls (host functions) stay call_indirect
|
||||
- Removes per-word instantiation overhead and lets the
|
||||
WASM engine inline / specialize across word boundaries
|
||||
|
||||
Two-part implementation:
|
||||
codegen::compile_consolidated_module() - builds multi-function module
|
||||
outer::ForthVM::consolidate() - orchestrates collection + table update
|
||||
Two parts:
|
||||
codegen::compile_consolidated_module()
|
||||
Builds the multi-function module.
|
||||
outer::ForthVM::consolidate()
|
||||
Collects ir_bodies, computes table layout, compiles,
|
||||
instantiates, and patches the shared function table.
|
||||
|
||||
|
||||
9. EXPORT PIPELINE (wafer build)
|
||||
--------------------------------
|
||||
10. EXPORT PIPELINE (`wafer build`)
|
||||
----------------------------------
|
||||
|
||||
1. Evaluate source file with recording_toplevel=true
|
||||
2. Collect all IR words + top-level IR
|
||||
3. Determine entry: --entry flag > MAIN word > top-level execution
|
||||
4. Build consolidated module with data section (memory snapshot)
|
||||
5. Embed metadata in "wafer" custom section (JSON)
|
||||
6. Optional: --js generates JS loader + HTML page
|
||||
7. Optional: --native AOT-compiles and appends to wafer binary
|
||||
Format: [wafer binary][precompiled WASM][metadata][trailer]
|
||||
Trailer: payload_len(8) + metadata_len(8) + "WAFEREXE"(8)
|
||||
export.rs::export_module() steps:
|
||||
1. Evaluate the source file with recording_toplevel = true
|
||||
2. Collect every IR word + recorded top-level IR
|
||||
3. Resolve entry point (priority):
|
||||
--entry <name> > MAIN > synthetic _start from the
|
||||
recorded top-level
|
||||
4. Snapshot WASM linear memory (system vars + dictionary +
|
||||
any user data)
|
||||
5. Walk the IR, find every Call/TailCall to a host word
|
||||
not in the consolidated set: those become required
|
||||
imports of the exported module
|
||||
6. Build metadata (JSON, custom "wafer" section):
|
||||
version, entry_table_index, host_functions,
|
||||
memory_size, dsp/rsp/fsp_init
|
||||
7. compile_exportable_module() emits the final WASM with
|
||||
a passive data section seeded from the memory snapshot
|
||||
8. Optional --js: also emit a JS loader + minimal HTML
|
||||
9. Optional --native: AOT-compile and append to the wafer
|
||||
binary itself, in this layout:
|
||||
[wafer ELF/Mach-O][precompiled WASM][metadata]
|
||||
[trailer: payload_len(8) | metadata_len(8) | "WAFEREXE"]
|
||||
The CLI detects the trailer at startup and runs the
|
||||
embedded payload directly (single-file distribution).
|
||||
|
||||
|
||||
10. CRATE STRUCTURE
|
||||
11. CRATE STRUCTURE
|
||||
-------------------
|
||||
|
||||
crates/
|
||||
core/ wafer-core: compiler, optimizer, codegen, dictionary, Runtime trait
|
||||
Feature flags: default=["native"], "native" enables wasmtime
|
||||
Without features: pure Rust (dictionary, IR, optimizer, codegen, outer)
|
||||
cli/ wafer: CLI REPL (rustyline), wafer build/run commands
|
||||
web/ wafer-web: browser REPL (wasm-bindgen + WebRuntime + HTML/CSS/JS)
|
||||
core/ wafer-core: compiler, optimizer, codegen,
|
||||
dictionary, runtime trait, outer interpreter.
|
||||
Largest file: codegen.rs (~4.3k LOC).
|
||||
Feature flags:
|
||||
default = ["native"]
|
||||
"native" pulls in wasmtime + NativeRuntime +
|
||||
runner.rs (CLI executor) + export.rs
|
||||
"crypto" enables SHA1/256/512 host words
|
||||
No features: pure-Rust core for wafer-web
|
||||
(dictionary, IR, optimizer, codegen,
|
||||
outer interpreter only)
|
||||
cli/ wafer: rustyline REPL + `wafer build` / `wafer run`
|
||||
web/ wafer-web: browser REPL.
|
||||
|
||||
Key web files:
|
||||
crates/web/src/lib.rs WaferRepl wasm-bindgen entry point
|
||||
crates/web/src/runtime_web.rs WebRuntime: js_sys WebAssembly API
|
||||
crates/web/www/app.js Frontend JS (terminal emulation)
|
||||
crates/web/www/index.html HTML shell
|
||||
crates/web/www/style.css Styling
|
||||
crates/web/src/lib.rs WaferRepl wasm-bindgen entry
|
||||
crates/web/src/runtime_web.rs WebRuntime: js_sys WebAssembly
|
||||
crates/web/www/app.js Frontend (terminal emulation)
|
||||
crates/web/www/index.html HTML shell
|
||||
crates/web/www/style.css Styling
|
||||
crates/web/www/pkg/ wasm-pack output (gitignored)
|
||||
|
||||
|
||||
11. BOOT SEQUENCE
|
||||
12. BOOT SEQUENCE
|
||||
-----------------
|
||||
|
||||
ForthVM::<R>::new() ->
|
||||
1. R::new() — create runtime (wasmtime or browser WASM)
|
||||
2. register_primitives() in batch_mode:
|
||||
- ~40 IR primitives (DUP, +, @, etc.)
|
||||
- ~60 host functions (., .S, M*, ACCEPT, etc.)
|
||||
- ~30 special words (IF, DO, :, VARIABLE, etc.)
|
||||
3. compile_batch() - single WASM module for all IR primitives
|
||||
4. Load boot.fth - Forth replaces Rust host functions:
|
||||
Phase 1: Stack/memory (DEPTH, PICK, 2OVER, FILL, MOVE)
|
||||
Phase 2: Double-cell arithmetic (D+, DNEGATE, D<)
|
||||
Phase 3: Mixed arithmetic (SM/REM, FM/MOD, */, */MOD)
|
||||
Phase 4: HERE, ALLOT, comma, ALIGN
|
||||
Phase 5: I/O, pictured numeric output (., U., TYPE, <# # #>)
|
||||
Phase 6: DEFER support
|
||||
Phase 7: String operations (COMPARE, SOURCE, FALIGNED)
|
||||
2. register_primitives() in batch_mode = true:
|
||||
- ~110 IR primitive registrations (DUP, +, @, ...)
|
||||
- ~87 host primitive registrations (., .S, M*, ACCEPT, ...)
|
||||
- special interpreter tokens (IF, DO, :, VARIABLE, S",
|
||||
{: :}, [: ;], CONSOLIDATE, ...) handled directly in
|
||||
interpret_token_immediate / compile_token, no IR op
|
||||
3. Word-set registrations:
|
||||
core, double, exception, facility, file (subset),
|
||||
floating-point, locals, memory, search-order,
|
||||
programming-tools, string, optional crypto
|
||||
4. batch_compile_deferred() — single WASM module for all
|
||||
deferred IR primitives
|
||||
5. Load boot.fth (include_str!), evaluated line by line so
|
||||
`\` comments terminate at end-of-line:
|
||||
Phase 1: stack/memory (DEPTH, PICK, 2OVER, FILL, MOVE,
|
||||
CMOVE, /STRING, -TRAILING)
|
||||
Phase 2: double-cell arithmetic (D+, DNEGATE, D<, D=)
|
||||
Phase 3: mixed arithmetic (SM/REM, FM/MOD, */, */MOD)
|
||||
Phase 4: HERE, ALLOT, comma, ALIGN, ALIGNED
|
||||
Phase 5: I/O + pictured output (., U., TYPE, <# # #>,
|
||||
SIGN, HOLD)
|
||||
Phase 6: DEFER support (DEFER, IS, ACTION-OF)
|
||||
Phase 7: more replacements (COMPARE, SOURCE, FALIGNED,
|
||||
DFALIGN, structures, S" hint, ...)
|
||||
|
||||
|
||||
13. RUNTIME-VS-EXPORT NOTE
|
||||
--------------------------
|
||||
|
||||
Two separate codegen entry points produce multi-function
|
||||
WASM modules from the same IR:
|
||||
|
||||
compile_consolidated_module() used by CONSOLIDATE
|
||||
- Targets the live runtime
|
||||
- Re-uses the shared globals/table/memory imports
|
||||
- External calls remain call_indirect
|
||||
|
||||
compile_exportable_module() used by `wafer build`
|
||||
- Targets a standalone module
|
||||
- Carries its own memory (passive data section seeded
|
||||
from the snapshot) and embeds metadata
|
||||
- Required host functions become imports the runner
|
||||
(or AOT loader) must satisfy
|
||||
|
||||
Both share the same per-IrOp lowering helpers; the
|
||||
difference is in module-level wiring.
|
||||
|
||||
@@ -0,0 +1,47 @@
|
||||
# Editor support for WAFER
|
||||
|
||||
Syntax highlighting assets for editors and pagers.
|
||||
|
||||
## bat (and other Sublime-Text-compatible tools)
|
||||
|
||||
`bat/WAFER.sublime-syntax` is a Sublime Text grammar covering Forth 2012 plus
|
||||
WAFER-specific words (`CONSOLIDATE`, `RANDOM`, `RND-SEED`, `UTIME`).
|
||||
|
||||
### Install
|
||||
|
||||
```
|
||||
just install-syntax
|
||||
```
|
||||
|
||||
or manually:
|
||||
|
||||
```
|
||||
mkdir -p ~/.config/bat/syntaxes
|
||||
cp tools/editor-support/bat/WAFER.sublime-syntax ~/.config/bat/syntaxes/
|
||||
bat cache --build
|
||||
```
|
||||
|
||||
### Verify
|
||||
|
||||
```
|
||||
bat --list-languages | grep -i forth # should list Forth
|
||||
bat --language forth crates/core/boot.fth # should render with colour
|
||||
```
|
||||
|
||||
### Use with `oked`
|
||||
|
||||
`oked` auto-detects `.fth` / `.4th` / `.forth` files and invokes `bat` with
|
||||
`--language forth`. After the install step above, opening any WAFER source in
|
||||
`oked` and toggling highlight (`H` command, or `oked -S forth`) will use this
|
||||
syntax.
|
||||
|
||||
### Updating the keyword list
|
||||
|
||||
Primitives live in `crates/core/src/outer.rs` (`register_primitive` and
|
||||
`register_host_primitive` calls). When a new **user-facing, non-standard** word
|
||||
is added, append it to the `wafer_extras` context in
|
||||
`bat/WAFER.sublime-syntax`. Standard Forth 2012 words are already covered by
|
||||
the main contexts.
|
||||
|
||||
Internal symbols (names that start with `_`) should not be added — they are
|
||||
implementation details that user code never types.
|
||||
@@ -0,0 +1,189 @@
|
||||
%YAML 1.2
|
||||
---
|
||||
# WAFER / Forth 2012 syntax for `bat` (and any Sublime Text compatible highlighter).
|
||||
#
|
||||
# Keyword list is derived from the primitives registered in
|
||||
# crates/core/src/outer.rs plus the Forth 2012 core-ext wordset and the boot.fth
|
||||
# definitions in crates/core/boot.fth. WAFER-specific additions are tagged below.
|
||||
#
|
||||
# Install: see tools/editor-support/README.md.
|
||||
name: Forth
|
||||
file_extensions:
|
||||
- fth
|
||||
- 4th
|
||||
- forth
|
||||
scope: source.forth
|
||||
|
||||
variables:
|
||||
ident_break: '(?=\s|$)'
|
||||
|
||||
contexts:
|
||||
main:
|
||||
- include: comments
|
||||
- include: strings
|
||||
- include: numbers
|
||||
- include: definitions
|
||||
- include: locals
|
||||
- include: structures
|
||||
- include: control
|
||||
- include: stack_ops
|
||||
- include: return_stack
|
||||
- include: arithmetic
|
||||
- include: logic
|
||||
- include: compare
|
||||
- include: memory
|
||||
- include: io
|
||||
- include: float
|
||||
- include: dictionary
|
||||
- include: exception
|
||||
- include: parsing
|
||||
- include: literals
|
||||
- include: hashing
|
||||
- include: wafer_extras
|
||||
|
||||
comments:
|
||||
# Line comment: backslash to end of line, must be followed by whitespace or EOL.
|
||||
- match: '(?i)(?:^|(?<=\s))\\(?=\s|$).*$'
|
||||
scope: comment.line.backslash.forth
|
||||
# Stack-effect / block comment: ( ... ) — the `(` must be followed by whitespace.
|
||||
- match: '(?i)(?:^|(?<=\s))\((?=\s|$)'
|
||||
scope: punctuation.definition.comment.forth
|
||||
push:
|
||||
- meta_scope: comment.block.paren.forth
|
||||
- match: '\)'
|
||||
scope: punctuation.definition.comment.forth
|
||||
pop: true
|
||||
# Immediate print comment: .( ... )
|
||||
- match: '(?i)(?:^|(?<=\s))\.\((?=\s|$)'
|
||||
scope: punctuation.definition.comment.forth
|
||||
push:
|
||||
- meta_scope: comment.block.dot-paren.forth
|
||||
- match: '\)'
|
||||
scope: punctuation.definition.comment.forth
|
||||
pop: true
|
||||
|
||||
strings:
|
||||
# Standard Forth strings: leading word followed by space then body, closed with ".
|
||||
- match: '(?i)(?:^|(?<=\s))(S\\"|S"|C"|\."|ABORT")(\s)'
|
||||
captures:
|
||||
1: keyword.other.string-prefix.forth
|
||||
push:
|
||||
- meta_scope: string.quoted.double.forth
|
||||
- match: '"'
|
||||
pop: true
|
||||
|
||||
numbers:
|
||||
# Hex / binary / decimal / char literals / negatives; all whitespace-delimited.
|
||||
- match: '(?i)(?:^|(?<=\s))\$[0-9A-F]+{{ident_break}}'
|
||||
scope: constant.numeric.hex.forth
|
||||
- match: '(?i)(?:^|(?<=\s))#-?[0-9]+{{ident_break}}'
|
||||
scope: constant.numeric.decimal.forth
|
||||
- match: '(?i)(?:^|(?<=\s))%[01]+{{ident_break}}'
|
||||
scope: constant.numeric.binary.forth
|
||||
- match: "(?i)(?:^|(?<=\\s))'.'{{ident_break}}"
|
||||
scope: constant.character.forth
|
||||
- match: '(?i)(?:^|(?<=\s))-?[0-9]+(?:\.[0-9]*)?(?:[eE]-?[0-9]+)?{{ident_break}}'
|
||||
scope: constant.numeric.forth
|
||||
|
||||
definitions:
|
||||
- match: '(?i)(?:^|(?<=\s))(:|:NONAME)(\s+)(\S+)?'
|
||||
captures:
|
||||
1: keyword.other.definition.forth
|
||||
3: entity.name.function.forth
|
||||
- match: '(?i)(?:^|(?<=\s));{{ident_break}}'
|
||||
scope: keyword.other.definition.forth
|
||||
# Quotations (Core-Ext 6.2.0455): [: ... ;] compiles an anonymous word.
|
||||
- match: '(?i)(?:^|(?<=\s))(\[:|;\]){{ident_break}}'
|
||||
scope: keyword.other.definition.forth
|
||||
- match: '(?i)(?:^|(?<=\s))(VARIABLE|2VARIABLE|CONSTANT|2CONSTANT|VALUE|CREATE|DEFER|MARKER|BUFFER:|FCONSTANT|FVARIABLE)(\s+)(\S+)?'
|
||||
captures:
|
||||
1: keyword.other.defining.forth
|
||||
3: entity.name.constant.forth
|
||||
- match: '(?i)(?:^|(?<=\s))(DOES>|IMMEDIATE|RECURSE|POSTPONE|COMPILE,|LITERAL|2LITERAL|FLITERAL|SLITERAL){{ident_break}}'
|
||||
scope: keyword.other.defining.forth
|
||||
|
||||
control:
|
||||
- match: '(?i)(?:^|(?<=\s))(IF|THEN|ELSE|BEGIN|UNTIL|WHILE|REPEAT|AGAIN|DO|\?DO|LOOP|\+LOOP|LEAVE|UNLOOP|EXIT|CASE|OF|ENDOF|ENDCASE|QUIT){{ident_break}}'
|
||||
scope: keyword.control.forth
|
||||
|
||||
stack_ops:
|
||||
- match: '(?i)(?:^|(?<=\s))(DUP|\?DUP|DROP|SWAP|OVER|ROT|-ROT|NIP|TUCK|PICK|ROLL|2DUP|2DROP|2SWAP|2OVER|2ROT|DEPTH|SP@){{ident_break}}'
|
||||
scope: support.function.stack.forth
|
||||
|
||||
return_stack:
|
||||
- match: '(?i)(?:^|(?<=\s))(>R|R>|R@|2>R|2R>|2R@|N>R|NR>|I|J|CS-PICK|CS-ROLL){{ident_break}}'
|
||||
scope: support.function.return-stack.forth
|
||||
|
||||
arithmetic:
|
||||
- match: '(?i)(?:^|(?<=\s))(\+|-|\*|/|MOD|/MOD|\*/|\*/MOD|NEGATE|ABS|MIN|MAX|1\+|1-|2\*|2/|M\*|M\+|M\*/|UM\*|UM/MOD|FM/MOD|SM/REM|S>D|D>S){{ident_break}}'
|
||||
scope: keyword.operator.arithmetic.forth
|
||||
|
||||
logic:
|
||||
- match: '(?i)(?:^|(?<=\s))(AND|OR|XOR|INVERT|LSHIFT|RSHIFT){{ident_break}}'
|
||||
scope: keyword.operator.logical.forth
|
||||
|
||||
compare:
|
||||
- match: '(?i)(?:^|(?<=\s))(=|<>|<|>|<=|>=|U<|U>|0=|0<>|0<|0>){{ident_break}}'
|
||||
scope: keyword.operator.comparison.forth
|
||||
|
||||
memory:
|
||||
- match: '(?i)(?:^|(?<=\s))(@|!|C@|C!|\+!|2@|2!|ALLOT|HERE|ALIGN|ALIGNED|CELL\+|CELLS|CHAR\+|CHARS|UNUSED|MOVE|CMOVE|CMOVE>|FILL|ERASE|BLANK|ALLOCATE|FREE|RESIZE|PAD){{ident_break}}'
|
||||
scope: support.function.memory.forth
|
||||
|
||||
io:
|
||||
- match: '(?i)(?:^|(?<=\s))(EMIT|CR|SPACE|SPACES|TYPE|\.|U\.|\.R|U\.R|D\.|D\.R|\?|KEY|KEY\?|PAGE|AT-XY|ACCEPT|EXPECT|\.S){{ident_break}}'
|
||||
scope: support.function.io.forth
|
||||
|
||||
float:
|
||||
- match: '(?i)(?:^|(?<=\s))(F\+|F-|F\*|F/|FNEGATE|FABS|FMAX|FMIN|FSQRT|FFLOOR|FROUND|FSINCOS|F=|F<|F0=|F0<|F~|FDUP|FDROP|FSWAP|FOVER|FROT|FNIP|FTUCK|FDEPTH|F@|F!|FE\.|FS\.|F\.|F>D|D>F|F>S|S>F|>FLOAT|REPRESENT|PRECISION|SET-PRECISION|FALIGNED|DFALIGNED|SFALIGNED|DF@|DF!|SF@|SF!){{ident_break}}'
|
||||
scope: support.function.float.forth
|
||||
|
||||
dictionary:
|
||||
- match: "(?i)(?:^|(?<=\\s))('|\\[']|,|>BODY|FIND|WORDS|ONLY|ALSO|PREVIOUS|DEFINITIONS|FORTH|GET-ORDER|SET-ORDER|GET-CURRENT|SET-CURRENT|WORDLIST|SEARCH-WORDLIST|FORTH-WORDLIST|ENVIRONMENT\\?|EXECUTE){{ident_break}}"
|
||||
scope: support.function.dictionary.forth
|
||||
|
||||
exception:
|
||||
- match: '(?i)(?:^|(?<=\s))(CATCH|THROW|ABORT){{ident_break}}'
|
||||
scope: keyword.control.exception.forth
|
||||
|
||||
parsing:
|
||||
- match: '(?i)(?:^|(?<=\s))(PARSE|PARSE-NAME|WORD|REFILL|EVALUATE|SOURCE|SOURCE-ID|>IN|BASE|STATE|>NUMBER|SEARCH|SUBSTITUTE|UNESCAPE|REPLACES|S){{ident_break}}'
|
||||
scope: support.function.parsing.forth
|
||||
|
||||
literals:
|
||||
- match: '(?i)(?:^|(?<=\s))(TRUE|FALSE|BL|CHAR|\[CHAR\]|\[COMPILE\]){{ident_break}}'
|
||||
scope: constant.language.forth
|
||||
|
||||
# Forth 2012 §13 Locals. `{: ... :}` is the user-facing form; `{F:` is the
|
||||
# float-locals variant (gforth/SwiftForth-style). `(LOCAL)` is the low-level
|
||||
# primitive from §13.6.1.0086; user code typically builds `LOCAL` /
|
||||
# `END-LOCALS` on top of it. `TO` rebinds a VALUE or local; `LOCALS|` is the
|
||||
# §13 legacy (Forth-94) form.
|
||||
locals:
|
||||
- match: '(?i)(?:^|(?<=\s))(\{:|:\}|\{F:|LOCALS\|){{ident_break}}'
|
||||
scope: keyword.other.locals.forth
|
||||
- match: '(?i)(?:^|(?<=\s))(TO|END-LOCALS){{ident_break}}'
|
||||
scope: keyword.other.locals.forth
|
||||
- match: '(?i)(?:^|(?<=\s))\(LOCAL\){{ident_break}}'
|
||||
scope: support.function.locals.forth
|
||||
|
||||
# Structure words — Facility-ext 10.6.2.0935 (defined in boot.fth).
|
||||
structures:
|
||||
- match: '(?i)(?:^|(?<=\s))(BEGIN-STRUCTURE)(\s+)(\S+)?'
|
||||
captures:
|
||||
1: keyword.other.struct.forth
|
||||
3: entity.name.struct.forth
|
||||
- match: '(?i)(?:^|(?<=\s))(END-STRUCTURE|\+FIELD|FIELD:|CFIELD:|FFIELD:|SFFIELD:|DFFIELD:){{ident_break}}'
|
||||
scope: keyword.other.struct.forth
|
||||
|
||||
# Hash primitives — mirrors the registry in crates/core/src/crypto.rs. When
|
||||
# new algorithms are added to `crypto::ALGOS`, extend this alternation.
|
||||
hashing:
|
||||
- match: '(?i)(?:^|(?<=\s))(SHA1|SHA256|SHA512){{ident_break}}'
|
||||
scope: support.function.hash.forth
|
||||
|
||||
wafer_extras:
|
||||
# WAFER-specific extensions beyond the Forth 2012 standard.
|
||||
# When the language grows new user-facing non-standard words, add them here.
|
||||
- match: '(?i)(?:^|(?<=\s))(CONSOLIDATE|RANDOM|RND-SEED|UTIME|READ-PASSWORD){{ident_break}}'
|
||||
scope: support.function.wafer-extra.forth
|
||||
Reference in New Issue
Block a user