Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ uv run ruff format gpu_test/
- **Operations**: All take stack as input and produce stack as output (except `forth.stack`)
- **Supported Words**: literals, `DUP DROP SWAP OVER ROT NIP TUCK PICK ROLL`, `+ - * / MOD`, `AND OR XOR NOT LSHIFT RSHIFT`, `= < > <> <= >= 0=`, `@ !`, `CELLS`, `IF ELSE THEN`, `BEGIN UNTIL`, `BEGIN WHILE REPEAT`, `DO LOOP +LOOP I J K`, `LEAVE UNLOOP EXIT`, `TID-X/Y/Z BID-X/Y/Z BDIM-X/Y/Z GDIM-X/Y/Z GLOBAL-ID` (GPU indexing).
- **Kernel Parameters**: Declared in the `\!` header. `\! kernel <name>` is required and must appear first. `\! param <name> i64[<N>]` becomes a `memref<Nxi64>` argument; `\! param <name> i64` becomes an `i64` argument. Using a param name in code emits `forth.param_ref` (arrays push address; scalars push value).
- **Shared Memory**: `\! shared <name> i64[<N>]` declares GPU shared (workgroup) memory. Emits a tagged `memref.alloca` at kernel entry; ForthToGPU converts it to a `gpu.func` workgroup attribution (`memref<Nxi64, #gpu.address_space<workgroup>>`). Using the shared name in code pushes its base address onto the stack. Cannot be referenced inside word definitions.
- **Conversion**: `!forth.stack` → `memref<256xi64>` with explicit stack pointer
- **GPU**: Functions wrapped in `gpu.module`, `main` gets `gpu.kernel` attribute, configured with bare pointers for NVVM conversion
- **User-defined Words**: Modeled as `func.func` with signature `(!forth.stack) -> !forth.stack`, called via `func.call`
Expand Down
16 changes: 15 additions & 1 deletion lib/Conversion/ForthToGPU/ForthToGPU.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -158,7 +158,10 @@ struct ConvertForthToGPUPass
}

// Clone ops from each source block into the corresponding destination
// block. Replace func.return with gpu.return.
// block, with two transformations:
// - func.return -> gpu.return
// - shared memref.alloca -> gpu.func workgroup attribution
auto *ctx = funcOp.getContext();
for (auto [srcBlock, dstBlock] :
llvm::zip(funcOp.getBody(), gpuFunc.getBody())) {
rewriter.setInsertionPointToEnd(&dstBlock);
Expand All @@ -168,6 +171,17 @@ struct ConvertForthToGPUPass
for (Value operand : returnOp.getOperands())
remappedOperands.push_back(mapping.lookup(operand));
rewriter.create<gpu::ReturnOp>(returnOp.getLoc(), remappedOperands);
} else if (auto allocaOp = dyn_cast<memref::AllocaOp>(&op);
allocaOp && allocaOp->hasAttr("forth.shared_name")) {
auto origType = cast<MemRefType>(allocaOp.getType());
auto addressSpace =
gpu::AddressSpaceAttr::get(ctx, gpu::AddressSpace::Workgroup);
auto sharedType =
MemRefType::get(origType.getShape(), origType.getElementType(),
MemRefLayoutAttrInterface{}, addressSpace);
BlockArgument attr =
gpuFunc.addWorkgroupAttribution(sharedType, allocaOp.getLoc());
mapping.map(allocaOp.getResult(), attr);
} else {
rewriter.clone(op, mapping);
}
Expand Down
41 changes: 38 additions & 3 deletions lib/Translation/ForthToMLIR/ForthToMLIR.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -279,13 +279,14 @@ LogicalResult ForthParser::parseHeader() {
std::string nameUpper = toUpperCase(tokens[1]);
for (const auto &param : paramDecls) {
if (param.name == nameUpper) {
return emitErrorAt(lineLoc,
"duplicate parameter name: " + nameUpper);
return emitErrorAt(lineLoc, "duplicate name: " + nameUpper +
" (already declared as param)");
}
}
for (const auto &shared : sharedDecls) {
if (shared.name == nameUpper) {
return emitErrorAt(lineLoc, "duplicate shared name: " + nameUpper);
return emitErrorAt(lineLoc, "duplicate name: " + nameUpper +
" (already declared as shared)");
}
}

Expand Down Expand Up @@ -389,6 +390,30 @@ Value ForthParser::emitOperation(StringRef word, Value inputStack,
}
}

// Check if word is a shared memory name (only valid outside word definitions)
if (!inWordDefinition) {
auto it = sharedAllocs.find(word);
if (it != sharedAllocs.end()) {
Value alloca = it->second;
Value ptrIndex =
builder.create<memref::ExtractAlignedPointerAsIndexOp>(loc, alloca);
Value ptrI64 = builder.create<arith::IndexCastOp>(
loc, builder.getI64Type(), ptrIndex);
return builder
.create<forth::PushValueOp>(loc, stackType, inputStack, ptrI64)
.getOutputStack();
}
} else {
for (const auto &shared : sharedDecls) {
if (word == shared.name) {
(void)emitError("shared memory '" + shared.name +
"' cannot be referenced inside a word definition; "
"pass the address from the caller instead");
return nullptr;
}
}
}

// Check user-defined words first
std::string mangledWord = mangleForthName(word);
if (wordDefs.count(mangledWord)) {
Expand Down Expand Up @@ -1019,6 +1044,16 @@ OwningOpRef<ModuleOp> ForthParser::parseModule() {
Block *entryBlock = funcOp.addEntryBlock();
builder.setInsertionPointToStart(entryBlock);

// Emit shared memory allocations at kernel entry
for (const auto &shared : sharedDecls) {
int64_t size = shared.isArray ? shared.size : 1;
auto memrefType = MemRefType::get({size}, builder.getI64Type());
Value alloca = builder.create<memref::AllocaOp>(loc, memrefType);
alloca.getDefiningOp()->setAttr("forth.shared_name",
builder.getStringAttr(shared.name));
sharedAllocs[shared.name] = alloca;
}

// Parse Forth operations
Value finalStack;
if (failed(parseOperations(finalStack))) {
Expand Down
3 changes: 2 additions & 1 deletion lib/Translation/ForthToMLIR/ForthToMLIR.h
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
#include "mlir/IR/BuiltinOps.h"
#include "mlir/IR/MLIRContext.h"
#include "llvm/Support/SourceMgr.h"
#include "llvm/ADT/StringMap.h"
#include <string>
#include <unordered_set>
#include <vector>
Expand All @@ -25,7 +26,6 @@ struct ParamDecl {
};

/// A declared shared memory region: `shared <name> <type>`.
/// TODO: Not yet consumed — scaffolding for shared memory support.
struct SharedDecl {
std::string name;
bool isArray = false;
Expand Down Expand Up @@ -92,6 +92,7 @@ class ForthParser {
std::unordered_set<std::string> wordDefs;
std::vector<ParamDecl> paramDecls;
std::vector<SharedDecl> sharedDecls;
llvm::StringMap<Value> sharedAllocs;
std::string kernelName;
const char *headerEndPtr = nullptr;
bool inWordDefinition = false;
Expand Down
18 changes: 18 additions & 0 deletions test/Conversion/ForthToGPU/shared-memory.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
// RUN: %warpforth-opt --convert-forth-to-gpu %s | %FileCheck %s

// CHECK: gpu.module @warpforth_module
// CHECK: gpu.func @main(%arg0: memref<256xi64> {forth.param_name = "DATA"})
// CHECK-SAME: workgroup(%{{.*}}: memref<256xi64, #gpu.address_space<workgroup>>)
// CHECK-SAME: kernel
// CHECK-NOT: memref.alloca() {forth.shared_name
// CHECK: memref.extract_aligned_pointer_as_index %{{.*}} : memref<256xi64, #gpu.address_space<workgroup>>
// CHECK: gpu.return

module {
func.func private @main(%arg0: memref<256xi64> {forth.param_name = "DATA"}) attributes {forth.kernel} {
%alloca = memref.alloca() {forth.shared_name = "SCRATCH"} : memref<256xi64>
%ptr = memref.extract_aligned_pointer_as_index %alloca : memref<256xi64> -> index
%c0 = arith.constant 0 : index
return
}
}
21 changes: 21 additions & 0 deletions test/Pipeline/shared-memory.forth
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
\ RUN: %warpforth-translate --forth-to-mlir %s | %warpforth-opt --warpforth-pipeline | %FileCheck %s
\ RUN: %warpforth-translate --forth-to-mlir %s | %warpforth-opt --convert-forth-to-memref --convert-forth-to-gpu | %FileCheck %s --check-prefix=MID

\ Verify that shared memory through the full pipeline produces a gpu.binary
\ CHECK: gpu.binary @warpforth_module

\ Verify intermediate MLIR structure: shared alloca becomes workgroup attribution
\ MID: gpu.module @warpforth_module
\ MID: gpu.func @main(%arg0: memref<256xi64> {forth.param_name = "DATA"})
\ MID-SAME: workgroup(%{{.*}}: memref<256xi64, #gpu.address_space<workgroup>>)
\ MID-SAME: kernel
\ MID: memref.extract_aligned_pointer_as_index %{{.*}} : memref<256xi64, #gpu.address_space<workgroup>>
\ MID: llvm.store
\ MID: gpu.return

\! kernel main
\! param DATA i64[256]
\! shared SCRATCH i64[256]
GLOBAL-ID CELLS SCRATCH + !
GLOBAL-ID CELLS SCRATCH + @
GLOBAL-ID CELLS DATA + !
2 changes: 1 addition & 1 deletion test/Translation/Forth/header-duplicate-param-error.forth
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
\ RUN: %not %warpforth-translate --forth-to-mlir %s 2>&1 | %FileCheck %s
\ CHECK: duplicate parameter name: A
\ CHECK: duplicate name: A (already declared as param)
\! kernel main
\! param A i64[4]
\! param A i64[8]
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
\ RUN: %not %warpforth-translate --forth-to-mlir %s 2>&1 | %FileCheck %s
\ CHECK: duplicate name: A (already declared as param)
\! kernel main
\! param A i64[4]
\! shared A i64[8]
12 changes: 12 additions & 0 deletions test/Translation/Forth/shared-declarations.forth
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
\ RUN: %warpforth-translate --forth-to-mlir %s | %FileCheck %s

\ Verify shared memory declarations produce tagged alloca and pointer push sequence
\ CHECK: func.func private @main(%arg0: memref<256xi64> {forth.param_name = "DATA"})
\ CHECK: memref.alloca() {forth.shared_name = "SCRATCH"} : memref<256xi64>
\ CHECK: memref.extract_aligned_pointer_as_index
\ CHECK: arith.index_cast
\ CHECK: forth.push_value
\! kernel main
\! param DATA i64[256]
\! shared SCRATCH i64[256]
SCRATCH
6 changes: 6 additions & 0 deletions test/Translation/Forth/shared-ref-in-word-error.forth
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
\ RUN: %not %warpforth-translate --forth-to-mlir %s 2>&1 | %FileCheck %s
\ CHECK: shared memory 'SCRATCH' cannot be referenced inside a word definition
\! kernel main
\! shared SCRATCH i64[256]
: BAD-WORD SCRATCH @ ;
BAD-WORD