Skip to main content

WebAssembly Backend Design

This document defines the design for a Morphir backend that compiles Morphir IR to WebAssembly, with full support for the Component Model and Canonical ABI.

Overview

The Wasm backend transforms Morphir IR into WebAssembly modules that can:

  1. Run standalone as core Wasm modules
  2. Interoperate with other components via the Component Model
  3. Be embedded in browsers, edge runtimes, or server environments
┌─────────────────┐     ┌─────────────────┐     ┌─────────────────┐
│ Morphir IR │────►│ Wasm Backend │────►│ .wasm file │
│ (Distribution) │ │ │ │ (Component) │
└─────────────────┘ └─────────────────┘ └─────────────────┘

├── Type lowering
├── Value codegen
├── ABI generation
└── Memory management

Architecture

Backend Phases

┌──────────────────────────────────────────────────────────────────┐
│ Wasm Backend │
├──────────────────────────────────────────────────────────────────┤
│ │
│ Phase 1: Analysis │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Boundary │ │ Type │ │ Closure │ │
│ │ Detection │ │ Analysis │ │ Analysis │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
│ Phase 2: Lowering │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Type │ │ ABI │ │ Memory │ │
│ │ Lowering │ │ Generation │ │ Layout │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
│ Phase 3: Code Generation │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Function │ │ Lifting/ │ │ Runtime │ │
│ │ Codegen │ │ Lowering │ │ Support │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
│ Phase 4: Emission │
│ ┌─────────────┐ ┌─────────────┐ ┌─────────────┐ │
│ │ Wasm │ │ Component │ │ WIT │ │
│ │ Module │ │ Wrapper │ │ Generation │ │
│ └─────────────┘ └─────────────┘ └─────────────┘ │
│ │
└──────────────────────────────────────────────────────────────────┘

Core Data Structures

// Backend configuration
case class WasmBackendConfig(
// Boundary handling
arbitraryIntHandling: ArbitraryIntHandling = ArbitraryIntHandling.Error,
arbitraryFloatHandling: ArbitraryFloatHandling = ArbitraryFloatHandling.F64,

// Validation
validateInboundStrings: Boolean = true,
validateOutboundStrings: Boolean = false,
invalidUtf8Handling: InvalidUtf8Handling = InvalidUtf8Handling.Trap,

// Output
emitComponentWrapper: Boolean = true,
emitWitInterface: Boolean = true,
optimizationLevel: OptimizationLevel = OptimizationLevel.Default,
)

enum ArbitraryIntHandling:
case Error
case Warn
case DefaultI32
case DefaultI64

enum ArbitraryFloatHandling:
case Error
case Warn
case F64

enum InvalidUtf8Handling:
case Trap
case Replace
case Skip

enum OptimizationLevel:
case None
case Default
case Aggressive

Phase 1: Analysis

Boundary Detection

Identify functions that cross component boundaries by checking for boundary extensions in the IR.

case class BoundaryInfo(
direction: BoundaryDirection,
fqName: FQName,
wasmName: String,
inputTypes: List[(Name, Type)],
outputType: Type,
)

enum BoundaryDirection:
case Export
case Import(source: String)

def detectBoundaries(module: ModuleDefinition): List[BoundaryInfo] =
module.values.collect {
case (name, valueDef) if hasBoundaryExtension(valueDef) =>
BoundaryInfo(
direction = extractDirection(valueDef),
fqName = module.fqName / name,
wasmName = toWasmName(name),
inputTypes = extractInputTypes(valueDef),
outputType = extractOutputType(valueDef),
)
}

private def hasBoundaryExtension(valueDef: ValueDefinition): Boolean =
val extensions = valueDef.attributes.extensions
extensions.contains(FQName.parse("morphir/wasm:boundary#export")) ||
extensions.contains(FQName.parse("morphir/wasm:boundary#import"))

Type Analysis

Analyze types for boundary compatibility and compute memory layouts.

enum TypeCompatibility:
case Compatible(abiType: AbiType)
case Incompatible(reason: String)

enum AbiType:
case I32
case I64
case F32
case F64
case Pointer // i32 pointing to memory
case Composite(layout: MemoryLayout)

case class MemoryLayout(
size: Int,
alignment: Int,
fields: List[LayoutField],
)

case class LayoutField(
name: Name,
offset: Int,
abiType: AbiType,
)

def analyzeType(typ: Type, config: WasmBackendConfig): TypeCompatibility =
typ match
case Type.Reference(_, fqn, args) =>
fqn match
case FQName.sdk("Basics", "Bool") =>
Compatible(AbiType.I32)

case FQName.sdk("Basics", "Int") =>
config.arbitraryIntHandling match
case ArbitraryIntHandling.Error =>
Incompatible("Arbitrary-precision Int not allowed at boundary")
case ArbitraryIntHandling.DefaultI32 =>
Compatible(AbiType.I32)
case ArbitraryIntHandling.DefaultI64 | ArbitraryIntHandling.Warn =>
Compatible(AbiType.I64)

case FQName.sdk("Int", "Int32") =>
Compatible(AbiType.I32)

case FQName.sdk("Int", "Int64") =>
Compatible(AbiType.I64)

case FQName.sdk("Basics", "Float") =>
Compatible(AbiType.F64)

case FQName.sdk("String", "String") =>
Compatible(AbiType.Composite(stringLayout))

case FQName.sdk("Maybe", "Maybe") =>
analyzeOptionType(args.head, config)

case FQName.sdk("List", "List") =>
analyzeListType(args.head, config)

case _ =>
analyzeCustomType(fqn, args, config)

case Type.Record(_, fields) =>
analyzeRecordType(fields, config)

case Type.Tuple(_, elements) =>
analyzeTupleType(elements, config)

case Type.Variable(_, _) =>
Incompatible("Type variables not allowed at boundary")

case Type.ExtensibleRecord(_, _, _) =>
Incompatible("Extensible records not allowed at boundary")

case Type.Function(_, _, _) =>
Incompatible("Higher-order functions require resource pattern")

case Type.Unit(_) =>
Compatible(AbiType.I32) // Unit maps to no value, but we use i32(0)

private val stringLayout = MemoryLayout(
size = 8,
alignment = 4,
fields = List(
LayoutField(Name("ptr"), offset = 0, AbiType.I32),
LayoutField(Name("len"), offset = 4, AbiType.I32),
),
)

Phase 2: Lowering

ABI Signature Generation

Generate the Canonical ABI signature for boundary functions.

case class AbiSignature(
params: List[AbiParam],
results: List[AbiResult],
needsRealloc: Boolean,
)

case class AbiParam(
name: String,
abiType: AbiType,
original: Type,
)

case class AbiResult(
abiType: AbiType,
original: Type,
)

def generateAbiSignature(boundary: BoundaryInfo, config: WasmBackendConfig): Either[String, AbiSignature] =
for
params <- boundary.inputTypes.traverse { (name, typ) =>
analyzeType(typ, config) match
case TypeCompatibility.Compatible(abiType) =>
Right(flattenToParams(name, abiType))
case TypeCompatibility.Incompatible(reason) =>
Left(s"Parameter '$name': $reason")
}.map(_.flatten)

results <- analyzeType(boundary.outputType, config) match
case TypeCompatibility.Compatible(abiType) =>
Right(flattenToResults(abiType))
case TypeCompatibility.Incompatible(reason) =>
Left(s"Return type: $reason")
yield
AbiSignature(
params = params,
results = results,
needsRealloc = params.exists(_.abiType.needsMemory) ||
results.exists(_.abiType.needsMemory),
)

// Flatten composite types into multiple params (Canonical ABI flattening)
private def flattenToParams(name: Name, abiType: AbiType): List[AbiParam] =
abiType match
case AbiType.Composite(layout) if layout.fields.size <= 16 =>
// Flatten small composites
layout.fields.map { field =>
AbiParam(s"${name}_${field.name}", field.abiType, ???)
}
case _ =>
// Pass as single value (or pointer for large composites)
List(AbiParam(name.toString, abiType, ???))

Phase 3: Code Generation

Value Expression Compilation

Compile Morphir IR value expressions to Wasm instructions.

// Wasm instruction representation
enum WasmInstr:
// Constants
case I32Const(value: Int)
case I64Const(value: Long)
case F32Const(value: Float)
case F64Const(value: Double)

// Local variables
case LocalGet(index: Int)
case LocalSet(index: Int)
case LocalTee(index: Int)

// Globals
case GlobalGet(name: String)
case GlobalSet(name: String)

// Memory
case I32Load(offset: Int, align: Int)
case I32Store(offset: Int, align: Int)
case I64Load(offset: Int, align: Int)
case I64Store(offset: Int, align: Int)

// Arithmetic
case I32Add, I32Sub, I32Mul, I32DivS, I32DivU
case I64Add, I64Sub, I64Mul, I64DivS, I64DivU
case F64Add, F64Sub, F64Mul, F64Div

// Comparison
case I32Eq, I32Ne, I32LtS, I32GtS, I32LeS, I32GeS
case I64Eq, I64Ne, I64LtS, I64GtS
case F64Eq, F64Ne, F64Lt, F64Gt, F64Le, F64Ge

// Control flow
case Block(label: String, body: List[WasmInstr])
case Loop(label: String, body: List[WasmInstr])
case If(thenBranch: List[WasmInstr], elseBranch: List[WasmInstr])
case Br(label: String)
case BrIf(label: String)
case Return

// Calls
case Call(name: String)
case CallIndirect(typeIndex: Int)

// Misc
case Drop
case Unreachable

class CodeGenerator(config: WasmBackendConfig):
private var localIndex = 0
private val locals = mutable.Map[Name, Int]()

def compileValue(value: Value): List[WasmInstr] =
value match
case Value.Literal(_, lit) =>
compileLiteral(lit)

case Value.Variable(_, name) =>
List(WasmInstr.LocalGet(locals(name)))

case Value.Reference(_, fqName) =>
// Function reference - will be resolved at link time
List(WasmInstr.Call(toWasmName(fqName)))

case Value.Apply(_, func, arg) =>
compileApply(func, arg)

case Value.Lambda(_, pattern, body) =>
compileLambda(pattern, body)

case Value.IfThenElse(_, cond, thenBranch, elseBranch) =>
compileValue(cond) ++
List(WasmInstr.If(
compileValue(thenBranch),
compileValue(elseBranch)
))

case Value.PatternMatch(_, subject, cases) =>
compilePatternMatch(subject, cases)

case Value.Tuple(_, elements) =>
compileTuple(elements)

case Value.Record(_, fields) =>
compileRecord(fields)

case Value.List(_, items) =>
compileList(items)

case _ =>
throw new UnsupportedOperationException(s"Unsupported value: $value")

private def compileLiteral(lit: Literal): List[WasmInstr] =
lit match
case Literal.BoolLiteral(b) =>
List(WasmInstr.I32Const(if b then 1 else 0))
case Literal.IntegerLiteral(n) =>
if n.isValidInt then List(WasmInstr.I32Const(n.toInt))
else List(WasmInstr.I64Const(n.toLong))
case Literal.FloatLiteral(f) =>
List(WasmInstr.F64Const(f))
case Literal.StringLiteral(s) =>
compileStringLiteral(s)
case _ =>
throw new UnsupportedOperationException(s"Unsupported literal: $lit")

Lifting and Lowering Stubs

Generate wrapper functions for boundary crossing.

case class LiftLowerStubs(
lowerStub: WasmFunction, // High-level -> Core Wasm (for exports)
liftStub: WasmFunction, // Core Wasm -> High-level (for imports)
)

def generateStringLowerStub(): List[WasmInstr] =
// Input: string value on stack (internal representation)
// Output: (ptr: i32, len: i32) on stack
List(
// Assume string is already in memory as (ptr, len) struct
// Load ptr
WasmInstr.LocalGet(0), // string struct ptr
WasmInstr.I32Load(0, 4),
// Load len
WasmInstr.LocalGet(0),
WasmInstr.I32Load(4, 4),
)

def generateStringLiftStub(): List[WasmInstr] =
// Input: (ptr: i32, len: i32) on stack
// Output: string value on stack (internal representation)
List(
// Allocate string struct
WasmInstr.I32Const(8), // size of (ptr, len)
WasmInstr.Call("malloc"),
WasmInstr.LocalTee(2), // struct ptr

// Store ptr
WasmInstr.LocalGet(0), // input ptr
WasmInstr.I32Store(0, 4),

// Store len
WasmInstr.LocalGet(2),
WasmInstr.LocalGet(1), // input len
WasmInstr.I32Store(4, 4),

// Return struct ptr
WasmInstr.LocalGet(2),
)

def generateRecordLowerStub(layout: MemoryLayout): List[WasmInstr] =
// Flatten record fields to stack values
layout.fields.flatMap { field =>
List(
WasmInstr.LocalGet(0), // record ptr
field.abiType match
case AbiType.I32 => WasmInstr.I32Load(field.offset, 4)
case AbiType.I64 => WasmInstr.I64Load(field.offset, 8)
case AbiType.F64 => WasmInstr.F64Load(field.offset, 8)
case _ => throw new UnsupportedOperationException
)
}

Phase 4: Emission

Module Assembly

Assemble the complete Wasm module from generated functions.

case class WasmModule(
types: List[WasmFuncType],
imports: List[WasmImport],
functions: List[WasmFunction],
tables: List[WasmTable],
memories: List[WasmMemory],
globals: List[WasmGlobal],
exports: List[WasmExport],
start: Option[Int],
elements: List[WasmElement],
data: List[WasmData],
)

case class WasmFunction(
name: String,
typeIndex: Int,
locals: List[WasmValType],
body: List[WasmInstr],
)

case class WasmExport(
name: String,
kind: ExportKind,
index: Int,
)

enum ExportKind:
case Func, Table, Memory, Global

class ModuleBuilder:
private val types = mutable.ListBuffer[WasmFuncType]()
private val imports = mutable.ListBuffer[WasmImport]()
private val functions = mutable.ListBuffer[WasmFunction]()
private val exports = mutable.ListBuffer[WasmExport]()
private val data = mutable.ListBuffer[WasmData]()

def addFunction(func: WasmFunction, export: Boolean = false): Int =
val index = functions.size
functions += func
if export then
exports += WasmExport(func.name, ExportKind.Func, index)
index

def addStringConstant(s: String): Int =
val bytes = s.getBytes(StandardCharsets.UTF_8)
val offset = data.map(_.bytes.length).sum
data += WasmData(offset, bytes.toList)
offset

def build(): WasmModule =
WasmModule(
types = types.toList,
imports = imports.toList,
functions = functions.toList,
tables = List(WasmTable(WasmTableType.FuncRef, 0, None)),
memories = List(WasmMemory(1, None)), // 1 page = 64KB
globals = List.empty,
exports = exports.toList,
start = None,
elements = List.empty,
data = data.toList,
)

def emitBinary(): Array[Byte] =
WasmBinaryWriter.write(build())

Runtime Support

The Wasm backend requires a small runtime for memory management and common operations.

Memory Allocator

// Runtime functions that must be linked into every module
val runtimeFunctions: List[WasmFunction] = List(
// malloc: allocate memory
WasmFunction(
name = "malloc",
typeIndex = 0, // (i32) -> i32
locals = List(WasmValType.I32),
body = List(
// Simple bump allocator
WasmInstr.GlobalGet("heap_ptr"),
WasmInstr.LocalTee(1),
WasmInstr.LocalGet(0), // size
WasmInstr.I32Add,
WasmInstr.GlobalSet("heap_ptr"),
WasmInstr.LocalGet(1), // return old heap_ptr
),
),

// realloc: reallocate memory (Component Model requirement)
WasmFunction(
name = "cabi_realloc",
typeIndex = 1, // (i32, i32, i32, i32) -> i32
locals = List.empty,
body = List(
// old_ptr, old_size, align, new_size
// For now, just allocate new and ignore old
WasmInstr.LocalGet(3), // new_size
WasmInstr.Call("malloc"),
),
),
)

WIT Interface Generation

For Component Model interop, generate WIT interface definitions from Morphir modules.

def generateWit(module: ModuleDefinition, boundaries: List[BoundaryInfo]): String =
val sb = StringBuilder()

sb.append(s"package ${toWitPackage(module.fqName)};\n\n")
sb.append(s"interface ${toWitInterface(module.fqName)} {\n")

// Generate type definitions
for (name, typeDef) <- module.types do
sb.append(s" ${generateWitType(name, typeDef)}\n")

// Generate function signatures for exports
for boundary <- boundaries.filter(_.direction == BoundaryDirection.Export) do
sb.append(s" ${generateWitFunction(boundary)}\n")

sb.append("}\n")
sb.toString

private def generateWitFunction(boundary: BoundaryInfo): String =
val params = boundary.inputTypes.map { (name, typ) =>
s"${toWitName(name)}: ${toWitType(typ)}"
}.mkString(", ")

val result = toWitType(boundary.outputType)

s"${toWitName(boundary.fqName.localName)}: func($params) -> $result;"

private def toWitType(typ: Type): String =
typ match
case Type.Reference(_, fqn, _) =>
fqn match
case FQName.sdk("Basics", "Bool") => "bool"
case FQName.sdk("Basics", "Int") => "s64"
case FQName.sdk("Int", "Int32") => "s32"
case FQName.sdk("Int", "Int64") => "s64"
case FQName.sdk("UInt", "UInt32") => "u32"
case FQName.sdk("UInt", "UInt64") => "u64"
case FQName.sdk("Basics", "Float") => "float64"
case FQName.sdk("String", "String") => "string"
case FQName.sdk("List", "List") => s"list<${toWitType(typ.args.head)}>"
case FQName.sdk("Maybe", "Maybe") => s"option<${toWitType(typ.args.head)}>"
case _ => toWitName(fqn.localName)
case Type.Record(_, fields) =>
val fieldStrs = fields.map(f => s"${toWitName(f.name)}: ${toWitType(f.fieldType)}")
s"record { ${fieldStrs.mkString(", ")} }"
case Type.Tuple(_, elems) =>
s"tuple<${elems.map(toWitType).mkString(", ")}>"
case _ =>
throw new UnsupportedOperationException(s"Cannot convert to WIT: $typ")

Configuration

Backend configuration in morphir.toml:

[backend.wasm]
# Boundary handling
arbitrary_int = "error" # "error" | "warn" | "i32" | "i64"
arbitrary_float = "f64" # "error" | "warn" | "f64"

# String handling
validate_inbound_strings = true
validate_outbound_strings = false
invalid_utf8 = "trap" # "trap" | "replace" | "skip"

# Output
emit_component = true # Wrap in Component Model
emit_wit = true # Generate WIT interface file
optimization = "default" # "none" | "default" | "aggressive"

# Memory
initial_memory_pages = 1 # Initial memory size (64KB per page)
max_memory_pages = 256 # Maximum memory size

# Debug
emit_names = true # Include function names in output
emit_source_map = false # Emit DWARF debug info

Open Questions

  1. Garbage Collection: Should we target Wasm GC proposal or use manual memory management?

  2. Tail Calls: Morphir's functional style benefits from tail call optimization. Target the tail-call proposal?

  3. Exception Handling: Use Wasm exceptions proposal or encode errors in return values?

  4. SIMD: Should we detect and use SIMD operations for list processing?

  5. Threading: Support for shared memory and atomics for concurrent Morphir programs?