*/
#include "bit.h"
-#include "panfrost/pandecode/decode.h"
+#include "panfrost/lib/decode.h"
+#include "drm-uapi/panfrost_drm.h"
+#include "panfrost/lib/pan_encoder.h"
/* Standalone compiler tests submitting jobs directly to the hardware. Uses the
* `bit` prefix for `BIfrost Tests` and because bit sounds wicked cool. */
pandecode_inject_mmap(bo->gpu, bo->cpu, bo->size, NULL);
return bo;
}
+
+struct panfrost_device *
+bit_initialize(void *memctx)
+{
+ int fd = drmOpenWithType("panfrost", NULL, DRM_NODE_RENDER);
+
+ if (fd < 0)
+ unreachable("No panfrost device found. Try chmod?");
+
+ struct panfrost_device *dev = rzalloc(memctx, struct panfrost_device);
+ panfrost_open_device(memctx, fd, dev);
+
+ pandecode_initialize(true);
+ printf("%X\n", dev->gpu_id);
+
+ return dev;
+}
+
+static bool
+bit_submit(struct panfrost_device *dev,
+ enum mali_job_type T,
+ void *payload, size_t payload_size,
+ struct panfrost_bo **bos, size_t bo_count, enum bit_debug debug)
+{
+ struct mali_job_descriptor_header header = {
+ .job_descriptor_size = MALI_JOB_64,
+ .job_type = T,
+ .job_index = 1
+ };
+
+ struct panfrost_bo *job = bit_bo_create(dev, 4096);
+ memcpy(job->cpu, &header, sizeof(header));
+ memcpy(job->cpu + sizeof(header), payload, payload_size);
+
+ uint32_t *bo_handles = calloc(sizeof(uint32_t), bo_count);
+
+ for (unsigned i = 0; i < bo_count; ++i)
+ bo_handles[i] = bos[i]->gem_handle;
+
+ uint32_t syncobj = 0;
+ int ret = 0;
+
+ ret = drmSyncobjCreate(dev->fd, DRM_SYNCOBJ_CREATE_SIGNALED, &syncobj);
+ assert(!ret);
+
+ struct drm_panfrost_submit submit = {
+ .jc = job->gpu,
+ .bo_handles = (uintptr_t) bo_handles,
+ .bo_handle_count = bo_count,
+ .out_sync = syncobj,
+ };
+
+ ret = drmIoctl(dev->fd, DRM_IOCTL_PANFROST_SUBMIT, &submit);
+ assert(!ret);
+ free(bo_handles);
+
+ drmSyncobjWait(dev->fd, &syncobj, 1, INT64_MAX, 0, NULL);
+ if (debug >= BIT_DEBUG_ALL)
+ pandecode_jc(submit.jc, true, dev->gpu_id, false);
+ return true;
+}
+
+/* Checks that the device is alive and responding to basic jobs as a sanity
+ * check - prerequisite to running code on the device. We test this via a
+ * WRITE_VALUE job */
+
+bool
+bit_sanity_check(struct panfrost_device *dev)
+{
+ struct panfrost_bo *scratch = bit_bo_create(dev, 65536);
+ ((uint32_t *) scratch->cpu)[0] = 0xAA;
+
+ struct mali_payload_write_value payload = {
+ .address = scratch->gpu,
+ .value_descriptor = MALI_WRITE_VALUE_ZERO
+ };
+
+ struct panfrost_bo *bos[] = { scratch };
+ bool success = bit_submit(dev, MALI_JOB_TYPE_WRITE_VALUE,
+ &payload, sizeof(payload), bos, 1, false);
+
+ return success && (((uint8_t *) scratch->cpu)[0] == 0x0);
+}
+
+/* Constructs a vertex job */
+
+bool
+bit_vertex(struct panfrost_device *dev, panfrost_program prog,
+ uint32_t *iubo, size_t sz_ubo,
+ uint32_t *iattr, size_t sz_attr,
+ uint32_t *expected, size_t sz_expected, enum bit_debug debug)
+{
+
+ struct panfrost_bo *scratchpad = bit_bo_create(dev, 4096);
+ struct panfrost_bo *shader = bit_bo_create(dev, prog.compiled.size);
+ struct panfrost_bo *shader_desc = bit_bo_create(dev, 4096);
+ struct panfrost_bo *ubo = bit_bo_create(dev, 4096);
+ struct panfrost_bo *var = bit_bo_create(dev, 4096);
+ struct panfrost_bo *attr = bit_bo_create(dev, 4096);
+
+ pan_pack(var->cpu, ATTRIBUTE, cfg) {
+ cfg.format = (MALI_RGBA32UI << 12);
+ cfg.unknown = true;
+ }
+
+ pan_pack(attr->cpu, ATTRIBUTE, cfg)
+ cfg.format = (MALI_RGBA32UI << 12);
+
+ pan_pack(var->cpu + 256, ATTRIBUTE_BUFFER, cfg) {
+ cfg.pointer = (var->gpu + 1024);
+ cfg.size = 1024;
+ }
+
+ pan_pack(attr->cpu + 256, ATTRIBUTE_BUFFER, cfg) {
+ cfg.pointer = (attr->gpu + 1024);
+ cfg.size = 1024;
+ }
+
+ if (sz_ubo)
+ memcpy(ubo->cpu + 1024, iubo, sz_ubo);
+
+ if (sz_attr)
+ memcpy(attr->cpu + 1024, iattr, sz_attr);
+
+ struct panfrost_bo *shmem = bit_bo_create(dev, 4096);
+ struct mali_shared_memory shmemp = {
+ .scratchpad = scratchpad->gpu,
+ .shared_workgroup_count = 0x1f,
+ };
+
+ memcpy(shmem->cpu, &shmemp, sizeof(shmemp));
+
+ pan_pack(shader_desc->cpu, STATE, cfg) {
+ cfg.shader.shader = shader->gpu;
+ cfg.shader.attribute_count = cfg.shader.varying_count = 1;
+ cfg.properties = 0x80020001;
+ cfg.preload.uniform_count = (sz_ubo / 16);
+ }
+
+ memcpy(shader->cpu, prog.compiled.data, prog.compiled.size);
+
+ struct bifrost_payload_vertex payload = {
+ .prefix = {
+ },
+ .postfix = {
+ .gl_enables = 0x2,
+ .shared_memory = shmem->gpu,
+ .shader = shader_desc->gpu,
+ .uniforms = ubo->gpu + 1024,
+ .uniform_buffers = ubo->gpu,
+ .attribute_meta = attr->gpu,
+ .attributes = attr->gpu + 256,
+ .varying_meta = var->gpu,
+ .varyings = var->gpu + 256,
+ },
+ };
+
+ panfrost_pack_work_groups_compute(&payload.prefix,
+ 1, 1, 1,
+ 1, 1, 1,
+ true);
+
+ payload.prefix.workgroups_x_shift_3 = 5;
+
+ struct panfrost_bo *bos[] = {
+ scratchpad, shmem, shader, shader_desc, ubo, var, attr
+ };
+
+ bool succ = bit_submit(dev, MALI_JOB_TYPE_VERTEX, &payload,
+ sizeof(payload), bos, ARRAY_SIZE(bos), debug);
+
+ /* Check the output varyings */
+
+ uint32_t *output = (uint32_t *) (var->cpu + 1024);
+ float *foutput = (float *) output;
+ float *fexpected = (float *) expected;
+
+ if (sz_expected) {
+ unsigned comp = memcmp(output, expected, sz_expected);
+ succ &= (comp == 0);
+
+ if (comp && (debug >= BIT_DEBUG_FAIL)) {
+ fprintf(stderr, "expected [");
+
+ for (unsigned i = 0; i < (sz_expected >> 2); ++i)
+ fprintf(stderr, "%08X /* %f */ ", expected[i], fexpected[i]);
+
+ fprintf(stderr, "], got [");
+
+ for (unsigned i = 0; i < (sz_expected >> 2); ++i)
+ fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
+
+ fprintf(stderr, "\n");
+ }
+ } else if (debug == BIT_DEBUG_ALL) {
+ fprintf(stderr, "got [");
+
+ for (unsigned i = 0; i < 4; ++i)
+ fprintf(stderr, "%08X /* %f */ ", output[i], foutput[i]);
+
+ fprintf(stderr, "\n");
+ }
+
+ return succ;
+}