Fixed FMod, other subtle changes. The result: sucessfully runs all shaders

This commit is contained in:
2022-05-27 15:30:31 +02:00
parent f842ba6480
commit 9473519a4e
4 changed files with 129 additions and 25 deletions

7
README.md Normal file
View File

@@ -0,0 +1,7 @@
Installing on RPi (3 or 4):
```
curl https://apt.llvm.org/llvm-snapshot.gpg.key|sudo apt-key add -
echo deb https://apt.llvm.org/bullseye llvm-toolchain-bullseye-14 main >/etc/apt/sources.list.d/llvm14.list
apt install llvm-14-tools llvm-14-dev clang build-essential cmake
```

View File

@@ -15,6 +15,7 @@ pkgs.clangStdenv.mkDerivation {
];
nativeBuildInputs = with pkgs; [
imagemagick
cmake
ninja
pkg-config

View File

@@ -30,6 +30,8 @@
#include <iostream>
#include <utility>
#include "spv_meta.hpp"
#include "llvm/Support/FormatVariadic.h"
#include <cstdio>
#pragma GCC diagnostic ignored "-Wunknown-pragmas"
@@ -382,6 +384,36 @@ struct CompilerImpl {
// decorations
std::map<uint32_t, std::string> value_names;
void generate_support_debug_fns() const {
std::vector<llvm::Type*> arg_types;
auto floatTy = llvm::Type::getFloatTy(*ctx);
auto voidTy = llvm::Type::getVoidTy(*ctx);
for (int i = 1; i <= 4; i++) {
// it'd be better to generate the calls to printf here, but I'll just include them in the render stub.
arg_types.push_back(floatTy);
auto vecTy = llvm::VectorType::get(floatTy, i, false);
char *afn_name, *vfn_name;
asprintf(&afn_name, "shs_debug_a%df", i);
asprintf(&vfn_name, "shs_debug_v%df", i);
// auto afn_name = llvm::formatv("shs_debug_a%df", i) ;
// auto vfn_name = llvm::formatv("shs_debug_v%df", i) ;
auto a_fn = llvm::Function::Create(llvm::FunctionType::get(voidTy, arg_types, false), llvm::Function::ExternalLinkage, afn_name, *module);
auto v_fn = llvm::Function::Create(llvm::FunctionType::get(voidTy, vecTy, false), llvm::Function::InternalLinkage, vfn_name, *module);
std::free(afn_name);
std::free(vfn_name);
auto bb = llvm::BasicBlock::Create(*ctx, "entry", v_fn);
builder->SetInsertPoint(bb);
std::vector<llvm::Value*> callArgs;
for (uint64_t j = 0; j < i; j++) {
callArgs.push_back(builder->CreateExtractElement(v_fn->getArg(0), j));
}
builder->CreateCall(a_fn, callArgs);
builder->CreateRetVoid();
}
}
public:
CompilerImpl() {
ctx = std::make_unique<llvm::LLVMContext>();
@@ -696,7 +728,7 @@ public:
auto vsum = builder->CreateFAddReduce(llvm::ConstantFP::get(sty, 0), vsq);
res = builder->CreateIntrinsic(llvm::Intrinsic::sqrt, sty, vsum);
} else {
res = builder->CreateUnaryIntrinsic(llvm::Intrinsic::sqrt, v);
res = builder->CreateUnaryIntrinsic(llvm::Intrinsic::fabs, v);
}
put_value(rid, res);
break;
@@ -704,25 +736,38 @@ public:
case GLSLstd450Normalize: { //69
auto v = OP_VALUE(4);
llvm::Value* res;
if (v->getType()->isVectorTy()) {
auto vty = llvm::dyn_cast<llvm::VectorType>(v->getType());
if (auto vty = llvm::dyn_cast<llvm::VectorType>(v->getType())) {
auto sty = vty->getScalarType();
auto vsq = builder->CreateFMul(v, v);
auto vsum = builder->CreateFAddReduce(llvm::ConstantFP::get(sty, 0), vsq);
res = builder->CreateIntrinsic(llvm::Intrinsic::sqrt, sty, vsum);
res = builder->CreateVectorSplat(vty->getElementCount(), res);
} else {
res = builder->CreateUnaryIntrinsic(llvm::Intrinsic::sqrt, v);
res = builder->CreateUnaryIntrinsic(llvm::Intrinsic::fabs, v);
}
res = builder->CreateFDiv(v, res);
put_value(rid, res);
break;
}
// case GLSLstd450Atanh: {
// auto val = cur_function->values[OP_WORD(4)];
// auto ty = val->getType()->getScalarType();
// put_value(rid, builder->CreateIntrinsic(llvm::Intrinsic::))
// }
case GLSLstd450Atanh: {
auto val = OP_VALUE(4);
if (auto vty = llvm::dyn_cast<llvm::VectorType>(val->getType())) {
vty->dump();
auto size = vty->getElementCount().getFixedValue();
char* fn_name;
(void)asprintf(&fn_name, "shs_debug_v%df", size);
auto fn = module->getFunction(fn_name);
BOOST_LOG_TRIVIAL(debug) << "Calling function " << fn_name << ": " << fn;
free(fn_name);
builder->CreateCall(fn, val);
} else {
auto fn = module->getFunction("shs_debug_a1f");
BOOST_LOG_TRIVIAL(debug) << "Calling function shs_debug_a1f" << ": " << fn;
builder->CreateCall(fn, val);
}
put_value(rid, val);
break;
}
default: {
put_value(rid, builder->CreateFreeze(llvm::UndefValue::get(types[rty]->get_llvm_type())));
BOOST_LOG_TRIVIAL(warning) << "Unhandled GLSL extinst " << ext_inst;
@@ -921,7 +966,7 @@ public:
auto el = idx < v1_len
? builder->CreateExtractElement(v1, idx)
: builder->CreateExtractElement(v2, idx-v1_len);
builder->CreateInsertElement(result, el, didx);
result = builder->CreateInsertElement(result, el, didx);
}
put_value(rid, result);
@@ -1000,7 +1045,13 @@ public:
break;
}
case Op::OpFMod: {
auto value = builder->CreateFRem(OP_VALUE(2), OP_VALUE(3));
llvm::Value* a = OP_VALUE(2);
llvm::Value* b = OP_VALUE(3);
llvm::Value* floor = builder->CreateUnaryIntrinsic(llvm::Intrinsic::floor, builder->CreateFDiv(a,b));
auto value = builder->CreateFSub(a, builder->CreateFMul(b, floor));
//llvm::Value* raw_rem = builder->CreateFRem(OP_VALUE(2), OP_VALUE(3));
put_value(rid, value);
break;
}
@@ -1012,8 +1063,7 @@ public:
break;
}
case Op::OpDot: {
auto vtv = builder->CreateFMul(cur_function->values[OP_WORD(2)],
cur_function->values[OP_WORD(3)]);
auto vtv = builder->CreateFMul(OP_VALUE(2),OP_VALUE(3));
auto value = builder->CreateFAddReduce(llvm::ConstantFP::get(vtv->getType()->getScalarType(), 0.0),
vtv);
put_value(rid, value);
@@ -1106,6 +1156,7 @@ public:
generate_support_render_pixel();
}
private:
void generate_support_get_context_size() const {
auto ty_fn = llvm::FunctionType::get(llvm::Type::getInt32Ty(*ctx), false);
auto fn = llvm::Function::Create(
@@ -1322,6 +1373,8 @@ Compiler::Compiler() {
llvm::Optional<llvm::orc::ThreadSafeModule> Compiler::compile(std::vector<uint32_t> &spv_module) {
std::error_code err_code;
auto impl = std::make_unique<CompilerImpl>();
impl->generate_support_debug_fns();
auto ret = impl->process_module(spv_module);
impl->generate_support();

View File

@@ -10,6 +10,14 @@
#include <sys/types.h>
#include <stdio.h>
#include <err.h>
#include <math.h>
#ifndef FRAME_DIM
#define FRAME_DIM 32
#endif
#ifndef FRAME_COUNT
#define FRAME_COUNT 100
#endif
// render API:
struct Uniforms {
@@ -23,7 +31,7 @@ struct Uniforms {
float iSampleRate;
float iChannelResolution[3][4];
} uniforms = {
.iResolution = {32, 32},
.iResolution = {FRAME_DIM, FRAME_DIM},
.iTime = 0,
.iTimeDelta = 0,
.iFrame = 0,
@@ -40,26 +48,48 @@ void setup_frame(struct frame_context_t* ctx, struct Uniforms *uniforms);
void render_pixel(struct frame_context_t* ctx, float x, float y, float pixel[3]);
unsigned int get_context_size(void);
#define STRINGIFY2(x) #x
#define STRINGIFY(x) STRINGIFY2(x)
// program utilities
const char* PPM_HDR = "P6\n32 32\n255\n";
const char* PPM_HDR = "P6\n" STRINGIFY(FRAME_DIM) " " STRINGIFY(FRAME_DIM) "\n255\n";
unsigned char frame_buf[32][32][3];
unsigned char frame_buf[FRAME_DIM][FRAME_DIM][3];
unsigned char linear_to_srgb(float lin) {
float srgb;
if (lin < 0) {
return 0;
} else if (lin > 1) {
return 255;
} else if (lin < 0.0031308) {
return (unsigned char)(lin * 12.92);
} else {
float srgb = (powf(lin, 1.0f/2.4f) * 1.055f - 0.055f) * 256;
if (srgb > 255.f) {
return 255;
} else {
return (unsigned char)srgb;
}
}
}
int main(int argc, char** argv) {
unsigned header_len = strlen(PPM_HDR);
float fdelta = 1. / 30.;
struct frame_context_t *ctx = malloc(get_context_size());
for (int frame = 0; frame < 100; frame++) {
uniforms.iFrame = frame;
// printf("%s", PPM_HDR);
for (int frame = 0; frame < FRAME_COUNT; frame++) {
uniforms.iFrame = (float)frame;
uniforms.iTimeDelta = fdelta;
uniforms.iTime = fdelta * frame;
uniforms.iTime = fdelta * (float)frame;
setup_frame(ctx, &uniforms);
for (int y = 0; y < 32; y++) {
for (int x = 0; x < 32; x++) {
for (int y = 0; y < FRAME_DIM; y++) {
for (int x = 0; x < FRAME_DIM; x++) {
float pixel[3];
unsigned char *pxl_dst = frame_buf[y][x];
render_pixel(ctx, (x + 0.5), (y + 0.5), pixel);
// printf("-- pixel %f,%f\n", (x+0.5f), (y+0.5f));
render_pixel(ctx, (x + 0.5f), (y + 0.5f), pixel);
for (int i = 0; i < 3; i++) {
pixel[i] *= 256;
if (pixel[i] < 0) { pixel[i] = 0; }
@@ -75,11 +105,24 @@ int main(int argc, char** argv) {
err(1, "Failed to open %s", fname);
}
write(fd, PPM_HDR, header_len);
write(fd, frame_buf, 32*32*3);
write(fd, frame_buf, FRAME_DIM*FRAME_DIM*3);
close(fd);
printf("%s\n", fname);
fprintf(stderr, "%s\n", fname);
free(fname);
}
return 0;
}
}
void shs_debug_a1f(float v1) {
printf("Debug: [%f]\n", v1);
}
void shs_debug_a2f(float v1, float v2) {
printf("Debug: [%f, %f]\n", v1, v2);
}
void shs_debug_a3f(float v1, float v2, float v3) {
printf("Debug: [%f, %f, %f]\n", v1, v2, v3);
}
void shs_debug_a4f(float v1, float v2, float v3, float v4) {
printf("Debug: [%f, %f, %f, %f]\n", v1, v2, v3, v4);
}