From 309a186987cea7f62dfd41fef66fac6d79fca96c Mon Sep 17 00:00:00 2001 From: Francisco Jerez Date: Tue, 20 Mar 2012 23:41:09 +0100 Subject: [PATCH] gallium/tests/trivial: Import compute unit tests. Add a test program that tries to exercise some of the language features commonly used by compute programs at the Gallium API level: - Correctness of the values returned by the grid parameters. - Proper functioning of resource LOADs and STOREs. - Subroutine calls. - Argument passing to the compute parameter through the INPUT memory space. - Mapping of buffer objects to the GLOBAL memory space. - Proper functioning of the PRIVATE and LOCAL memory spaces. - Texture sampling and constant buffers. - Support for multiple kernels in the same program. - Indirect resource indexing. - Formatted resource loads and stores (i.e. with channel conversion and scaling) using several different formats. - Proper functioning of work-group barriers. - Atomicity and semantics of the atomic opcodes. As of now all of them seem to pass on my nvA8. --- src/gallium/tests/trivial/Makefile | 3 +- src/gallium/tests/trivial/compute.c | 1592 +++++++++++++++++++++++++++ 2 files changed, 1594 insertions(+), 1 deletion(-) create mode 100644 src/gallium/tests/trivial/compute.c diff --git a/src/gallium/tests/trivial/Makefile b/src/gallium/tests/trivial/Makefile index bfe186b1607..8c032016538 100644 --- a/src/gallium/tests/trivial/Makefile +++ b/src/gallium/tests/trivial/Makefile @@ -18,7 +18,8 @@ LINKS += \ SOURCES = \ tri.c \ - quad-tex.c + quad-tex.c \ + compute.c OBJECTS = $(SOURCES:.c=.o) diff --git a/src/gallium/tests/trivial/compute.c b/src/gallium/tests/trivial/compute.c new file mode 100644 index 00000000000..1812090d3a0 --- /dev/null +++ b/src/gallium/tests/trivial/compute.c @@ -0,0 +1,1592 @@ +/* + * Copyright (C) 2011 Francisco Jerez. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE + * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION + * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION + * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + */ + +#include +#include +#include +#include +#include "pipe/p_state.h" +#include "pipe/p_context.h" +#include "pipe/p_screen.h" +#include "pipe/p_defines.h" +#include "pipe/p_shader_tokens.h" +#include "util/u_memory.h" +#include "util/u_inlines.h" +#include "util/u_sampler.h" +#include "util/u_format.h" +#include "tgsi/tgsi_text.h" +#include "pipe-loader/pipe_loader.h" + +#define MAX_RESOURCES 4 + +struct context { + struct pipe_loader_device *dev; + struct pipe_screen *screen; + struct pipe_context *pipe; + void *hwcs; + void *hwsmp[MAX_RESOURCES]; + struct pipe_resource *tex[MAX_RESOURCES]; + bool tex_rw[MAX_RESOURCES]; + struct pipe_sampler_view *view[MAX_RESOURCES]; + struct pipe_surface *surf[MAX_RESOURCES]; +}; + +#define DUMP_COMPUTE_PARAM(p, c) do { \ + uint64_t __v[4]; \ + int __i, __n; \ + \ + __n = ctx->screen->get_compute_param(ctx->screen, c, __v); \ + printf("%s: {", #c); \ + \ + for (__i = 0; __i < __n / sizeof(*__v); ++__i) \ + printf(" %"PRIu64, __v[__i]); \ + \ + printf(" }\n"); \ + } while (0) + +static void init_ctx(struct context *ctx) +{ + int ret; + + ret = pipe_loader_probe(&ctx->dev, 1); + assert(ret); + + ctx->screen = pipe_loader_create_screen(ctx->dev, PIPE_SEARCH_DIR); + assert(ctx->screen); + + ctx->pipe = ctx->screen->context_create(ctx->screen, NULL); + assert(ctx->pipe); + + DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_GRID_DIMENSION); + DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_GRID_SIZE); + DUMP_COMPUTE_PARAM(p, PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE); +} + +static void destroy_ctx(struct context *ctx) +{ + ctx->pipe->destroy(ctx->pipe); + ctx->screen->destroy(ctx->screen); + pipe_loader_release(&ctx->dev, 1); + FREE(ctx); +} + +static char * +preprocess_prog(struct context *ctx, const char *src, const char *defs) +{ + const char header[] = + "#define RGLOBAL RES[32767]\n" + "#define RLOCAL RES[32766]\n" + "#define RPRIVATE RES[32765]\n" + "#define RINPUT RES[32764]\n"; + char cmd[512]; + char tmp[] = "/tmp/test-compute.tgsi-XXXXXX"; + char *buf; + int fd, ret; + struct stat st; + FILE *p; + + /* Open a temporary file */ + fd = mkstemp(tmp); + assert(fd >= 0); + snprintf(cmd, sizeof(cmd), "cpp -P -nostdinc -undef %s > %s", + defs ? defs : "", tmp); + + /* Preprocess */ + p = popen(cmd, "w"); + fwrite(header, strlen(header), 1, p); + fwrite(src, strlen(src), 1, p); + ret = pclose(p); + assert(!ret); + + /* Read back */ + ret = fstat(fd, &st); + assert(!ret); + + buf = malloc(st.st_size + 1); + ret = read(fd, buf, st.st_size); + assert(ret == st.st_size); + buf[ret] = 0; + + /* Clean up */ + close(fd); + unlink(tmp); + + return buf; +} + +static void init_prog(struct context *ctx, unsigned local_sz, + unsigned private_sz, unsigned input_sz, + const char *src, const char *defs) +{ + struct pipe_context *pipe = ctx->pipe; + struct tgsi_token prog[1024]; + struct pipe_compute_state cs = { + .prog = prog, + .req_local_mem = local_sz, + .req_private_mem = private_sz, + .req_input_mem = input_sz + }; + char *psrc = preprocess_prog(ctx, src, defs); + int ret; + + ret = tgsi_text_translate(psrc, prog, Elements(prog)); + assert(ret); + free(psrc); + + ctx->hwcs = pipe->create_compute_state(pipe, &cs); + assert(ctx->hwcs); + + pipe->bind_compute_state(pipe, ctx->hwcs); +} + +static void destroy_prog(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->delete_compute_state(pipe, ctx->hwcs); + ctx->hwcs = NULL; +} + +static void init_tex(struct context *ctx, int slot, + enum pipe_texture_target target, bool rw, + enum pipe_format format, int w, int h, + void (*init)(void *, int, int, int)) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_resource **tex = &ctx->tex[slot]; + struct pipe_resource ttex = { + .target = target, + .format = format, + .width0 = w, + .height0 = h, + .depth0 = 1, + .array_size = 1, + .bind = (PIPE_BIND_SAMPLER_VIEW | + PIPE_BIND_COMPUTE_RESOURCE | + PIPE_BIND_GLOBAL) + }; + int dx = util_format_get_blocksize(format); + int dy = util_format_get_stride(format, w); + int nx = (target == PIPE_BUFFER ? (w / dx) : + util_format_get_nblocksx(format, w)); + int ny = (target == PIPE_BUFFER ? 1 : + util_format_get_nblocksy(format, h)); + struct pipe_transfer *xfer; + char *map; + int x, y; + + *tex = ctx->screen->resource_create(ctx->screen, &ttex); + assert(*tex); + + xfer = pipe->get_transfer(pipe, *tex, 0, PIPE_TRANSFER_WRITE, + &(struct pipe_box) { .width = w, + .height = h, + .depth = 1 }); + assert(xfer); + + map = pipe->transfer_map(pipe, xfer); + assert(map); + + for (y = 0; y < ny; ++y) { + for (x = 0; x < nx; ++x) { + init(map + y * dy + x * dx, slot, x, y); + } + } + + pipe->transfer_unmap(pipe, xfer); + pipe->transfer_destroy(pipe, xfer); + + ctx->tex_rw[slot] = rw; +} + +static bool default_check(void *x, void *y, int sz) { + return !memcmp(x, y, sz); +} + +static void check_tex(struct context *ctx, int slot, + void (*expect)(void *, int, int, int), + bool (*check)(void *, void *, int)) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_resource *tex = ctx->tex[slot]; + int dx = util_format_get_blocksize(tex->format); + int dy = util_format_get_stride(tex->format, tex->width0); + int nx = (tex->target == PIPE_BUFFER ? (tex->width0 / dx) : + util_format_get_nblocksx(tex->format, tex->width0)); + int ny = (tex->target == PIPE_BUFFER ? 1 : + util_format_get_nblocksy(tex->format, tex->height0)); + struct pipe_transfer *xfer; + char *map; + int x, y, i; + int err = 0; + + if (!check) + check = default_check; + + xfer = pipe->get_transfer(pipe, tex, 0, PIPE_TRANSFER_READ, + &(struct pipe_box) { .width = tex->width0, + .height = tex->height0, + .depth = 1 }); + assert(xfer); + + map = pipe->transfer_map(pipe, xfer); + assert(map); + + for (y = 0; y < ny; ++y) { + for (x = 0; x < nx; ++x) { + uint32_t exp[4]; + uint32_t *res = (uint32_t *)(map + y * dy + x * dx); + + expect(exp, slot, x, y); + if (check(res, exp, dx) || (++err) > 20) + continue; + + if (dx < 4) { + uint32_t u = 0, v = 0; + + for (i = 0; i < dx; i++) { + u |= ((uint8_t *)exp)[i] << (8 * i); + v |= ((uint8_t *)res)[i] << (8 * i); + } + printf("(%d, %d): got 0x%x, expected 0x%x\n", + x, y, v, u); + } else { + for (i = 0; i < dx / 4; i++) { + printf("(%d, %d)[%d]: got 0x%x/%f," + " expected 0x%x/%f\n", x, y, i, + res[i], ((float *)res)[i], + exp[i], ((float *)exp)[i]); + } + } + } + } + + pipe->transfer_unmap(pipe, xfer); + pipe->transfer_destroy(pipe, xfer); + + if (err) + printf("(%d, %d): \x1b[31mFAIL\x1b[0m (%d)\n", x, y, err); + else + printf("(%d, %d): \x1b[32mOK\x1b[0m\n", x, y); +} + +static void destroy_tex(struct context *ctx) +{ + int i; + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->tex[i]) + pipe_resource_reference(&ctx->tex[i], NULL); + } +} + +static void init_sampler_views(struct context *ctx, const int *slots) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_sampler_view tview; + int i; + + for (i = 0; *slots >= 0; ++i, ++slots) { + u_sampler_view_default_template(&tview, ctx->tex[*slots], + ctx->tex[*slots]->format); + + ctx->view[i] = pipe->create_sampler_view(pipe, ctx->tex[*slots], + &tview); + assert(ctx->view[i]); + } + + pipe->set_compute_sampler_views(pipe, 0, i, ctx->view); +} + +static void destroy_sampler_views(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + pipe->set_compute_sampler_views(pipe, 0, MAX_RESOURCES, NULL); + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->view[i]) { + pipe->sampler_view_destroy(pipe, ctx->view[i]); + ctx->view[i] = NULL; + } + } +} + +static void init_compute_resources(struct context *ctx, const int *slots) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + for (i = 0; *slots >= 0; ++i, ++slots) { + struct pipe_surface tsurf = { + .format = ctx->tex[*slots]->format, + .usage = ctx->tex[*slots]->bind, + .writable = ctx->tex_rw[*slots] + }; + + if (ctx->tex[*slots]->target == PIPE_BUFFER) + tsurf.u.buf.last_element = ctx->tex[*slots]->width0 - 1; + + ctx->surf[i] = pipe->create_surface(pipe, ctx->tex[*slots], + &tsurf); + assert(ctx->surf[i]); + } + + pipe->set_compute_resources(pipe, 0, i, ctx->surf); +} + +static void destroy_compute_resources(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + pipe->set_compute_resources(pipe, 0, MAX_RESOURCES, NULL); + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->surf[i]) { + pipe->surface_destroy(pipe, ctx->surf[i]); + ctx->surf[i] = NULL; + } + } +} + +static void init_sampler_states(struct context *ctx, int n) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_sampler_state smp = { + .normalized_coords = 1, + }; + int i; + + for (i = 0; i < n; ++i) { + ctx->hwsmp[i] = pipe->create_sampler_state(pipe, &smp); + assert(ctx->hwsmp[i]); + } + + pipe->bind_compute_sampler_states(pipe, 0, i, ctx->hwsmp); +} + +static void destroy_sampler_states(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + int i; + + pipe->bind_compute_sampler_states(pipe, 0, MAX_RESOURCES, NULL); + + for (i = 0; i < MAX_RESOURCES; ++i) { + if (ctx->hwsmp[i]) { + pipe->delete_sampler_state(pipe, ctx->hwsmp[i]); + ctx->hwsmp[i] = NULL; + } + } +} + +static void init_globals(struct context *ctx, const int *slots, + uint32_t **handles) +{ + struct pipe_context *pipe = ctx->pipe; + struct pipe_resource *res[MAX_RESOURCES]; + int i; + + for (i = 0; *slots >= 0; ++i, ++slots) + res[i] = ctx->tex[*slots]; + + pipe->set_global_binding(pipe, 0, i, res, handles); +} + +static void destroy_globals(struct context *ctx) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->set_global_binding(pipe, 0, MAX_RESOURCES, NULL, NULL); +} + +static void launch_grid(struct context *ctx, const uint *block_layout, + const uint *grid_layout, uint32_t pc, + const void *input) +{ + struct pipe_context *pipe = ctx->pipe; + + pipe->launch_grid(pipe, block_layout, grid_layout, pc, input); +} + +static void test_system_values(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], GRID_SIZE[0]\n" + "DCL SV[3], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 64, 0, 0, 0 }\n" + "IMM UINT32 { 16, 0, 0, 0 }\n" + "IMM UINT32 { 0, 0, 0, 0 }\n" + "\n" + "BGNSUB" + " UMUL TEMP[0], SV[0], SV[1]\n" + " UADD TEMP[0], TEMP[0], SV[3]\n" + " UMUL TEMP[1], SV[1], SV[2]\n" + " UMUL TEMP[0].w, TEMP[0], TEMP[1].zzzz\n" + " UMUL TEMP[0].zw, TEMP[0], TEMP[1].yyyy\n" + " UMUL TEMP[0].yzw, TEMP[0], TEMP[1].xxxx\n" + " UADD TEMP[0].xy, TEMP[0].xyxy, TEMP[0].zwzw\n" + " UADD TEMP[0].x, TEMP[0].xxxx, TEMP[0].yyyy\n" + " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" + " STORE RES[0].xyzw, TEMP[0], SV[0]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " STORE RES[0].xyzw, TEMP[0], SV[1]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " STORE RES[0].xyzw, TEMP[0], SV[2]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " STORE RES[0].xyzw, TEMP[0], SV[3]\n" + " RET\n" + "ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + int id = x / 16, sv = (x % 16) / 4, c = x % 4; + int tid[] = { id % 20, (id % 240) / 20, id / 240, 0 }; + int bsz[] = { 4, 3, 5, 1}; + int gsz[] = { 5, 4, 1, 1}; + + switch (sv) { + case 0: + *(uint32_t *)p = tid[c] / bsz[c]; + break; + case 1: + *(uint32_t *)p = bsz[c]; + break; + case 2: + *(uint32_t *)p = gsz[c]; + break; + case 3: + *(uint32_t *)p = tid[c] % bsz[c]; + break; + } + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 76800, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){4, 3, 5}, (uint []){5, 4, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_resource_access(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL RES[1], 2D, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 15, 0, 0, 0 }\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UADD TEMP[0].x, SV[0].xxxx, SV[0].yyyy\n" + " AND TEMP[0].x, TEMP[0], IMM[0]\n" + " UMUL TEMP[0].x, TEMP[0], IMM[1]\n" + " LOAD TEMP[0].xyzw, RES[0], TEMP[0]\n" + " UMUL TEMP[1], SV[0], IMM[1]\n" + " STORE RES[1].xyzw, TEMP[1], TEMP[0]\n" + " RET\n" + " ENDSUB\n"; + void init0(void *p, int s, int x, int y) { + *(float *)p = 8.0 - (float)x; + } + void init1(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(float *)p = 8.0 - (float)((x + 4*y) & 0x3f); + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 256, 0, init0); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 60, 12, init1); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){15, 12, 1}, 0, NULL); + check_tex(ctx, 1, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_function_calls(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], 2D, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], GRID_SIZE[0]\n" + "DCL SV[3], THREAD_ID[0]\n" + "DCL TEMP[0]\n" + "DCL TEMP[1]\n" + "DCL TEMP[2], LOCAL\n" + "IMM UINT32 { 0, 11, 22, 33 }\n" + "IMM FLT32 { 11, 33, 55, 99 }\n" + "IMM UINT32 { 4, 1, 0, 0 }\n" + "IMM UINT32 { 12, 0, 0, 0 }\n" + "\n" + "00: BGNSUB\n" + "01: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" + "02: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" + "03: USLT TEMP[0].x, TEMP[0], IMM[0]\n" + "04: RET\n" + "05: ENDSUB\n" + "06: BGNSUB\n" + "07: UMUL TEMP[0].x, TEMP[0], TEMP[0]\n" + "08: UADD TEMP[1].x, TEMP[1], IMM[2].yyyy\n" + "09: USLT TEMP[0].x, TEMP[0], IMM[0].yyyy\n" + "10: IF TEMP[0].xxxx\n" + "11: CAL :0\n" + "12: ENDIF\n" + "13: RET\n" + "14: ENDSUB\n" + "15: BGNSUB\n" + "16: UMUL TEMP[2], SV[0], SV[1]\n" + "17: UADD TEMP[2], TEMP[2], SV[3]\n" + "18: UMUL TEMP[2], TEMP[2], IMM[2]\n" + "00: MOV TEMP[1].x, IMM[2].wwww\n" + "19: LOAD TEMP[0].x, RES[0].xxxx, TEMP[2]\n" + "20: CAL :6\n" + "21: STORE RES[0].x, TEMP[2], TEMP[1].xxxx\n" + "22: RET\n" + "23: ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 15 * y + x; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = (15 * y + x) < 4 ? 2 : 1 ; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 15, 12, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){3, 3, 3}, (uint []){5, 4, 1}, 15, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_input_global(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL SV[0], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 8, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " LOAD TEMP[1].xy, RINPUT, TEMP[0]\n" + " LOAD TEMP[0].x, RGLOBAL, TEMP[1].yyyy\n" + " UADD TEMP[1].x, TEMP[0], -TEMP[1]\n" + " STORE RGLOBAL.x, TEMP[1].yyyy, TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef - (x == 0 ? 0x10001 + 2 * s : 0); + } + uint32_t input[8] = { 0x10001, 0x10002, 0x10003, 0x10004, + 0x10005, 0x10006, 0x10007, 0x10008 }; + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 32, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_tex(ctx, 2, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_tex(ctx, 3, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, 32, 0, init); + init_globals(ctx, (int []){ 0, 1, 2, 3, -1 }, + (uint32_t *[]){ &input[1], &input[3], + &input[5], &input[7] }); + launch_grid(ctx, (uint []){4, 1, 1}, (uint []){1, 1, 1}, 0, input); + check_tex(ctx, 0, expect, NULL); + check_tex(ctx, 1, expect, NULL); + check_tex(ctx, 2, expect, NULL); + check_tex(ctx, 3, expect, NULL); + destroy_globals(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_private(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "DCL TEMP[2], LOCAL\n" + "IMM UINT32 { 128, 0, 0, 0 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[0], SV[1]\n" + " UADD TEMP[0].x, TEMP[0], SV[2]\n" + " MOV TEMP[1].x, IMM[0].wwww\n" + " BGNLOOP\n" + " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " UDIV TEMP[2].x, TEMP[1], IMM[1]\n" + " UADD TEMP[2].x, TEMP[2], TEMP[0]\n" + " STORE RPRIVATE.x, TEMP[1], TEMP[2]\n" + " UADD TEMP[1].x, TEMP[1], IMM[1]\n" + " ENDLOOP\n" + " MOV TEMP[1].x, IMM[0].wwww\n" + " UMUL TEMP[0].x, TEMP[0], IMM[0]\n" + " BGNLOOP\n" + " USEQ TEMP[2].x, TEMP[1], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " LOAD TEMP[2].x, RPRIVATE, TEMP[1]\n" + " STORE RES[0].x, TEMP[0], TEMP[2]\n" + " UADD TEMP[0].x, TEMP[0], IMM[1]\n" + " UADD TEMP[1].x, TEMP[1], IMM[1]\n" + " ENDLOOP\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = (x / 32) + x % 32; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 128, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 32768, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){16, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_local(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "DCL TEMP[2], LOCAL\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "IMM UINT32 { 2, 0, 0, 0 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "IMM UINT32 { 32, 0, 0, 0 }\n" + "IMM UINT32 { 128, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[2], IMM[2]\n" + " STORE RLOCAL.x, TEMP[0], IMM[0].wwww\n" + " MFENCE RLOCAL\n" + " USLT TEMP[1].x, SV[2], IMM[3]\n" + " IF TEMP[1]\n" + " UADD TEMP[1].x, TEMP[0], IMM[4]\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " STORE RLOCAL.x, TEMP[0], IMM[0]\n" + " MFENCE RLOCAL\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[1]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " ELSE\n" + " UADD TEMP[1].x, TEMP[0], -IMM[4]\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[0].wwww\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " STORE RLOCAL.x, TEMP[0], IMM[0]\n" + " MFENCE RLOCAL\n" + " BGNLOOP\n" + " LOAD TEMP[2].x, RLOCAL, TEMP[1]\n" + " USEQ TEMP[2].x, TEMP[2], IMM[0]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " STORE RLOCAL.x, TEMP[0], IMM[1]\n" + " MFENCE RLOCAL\n" + " ENDIF\n" + " UMUL TEMP[1].x, SV[0], SV[1]\n" + " UMUL TEMP[1].x, TEMP[1], IMM[2]\n" + " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" + " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" + " STORE RES[0].x, TEMP[1], TEMP[0]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = x & 0x20 ? 2 : 1; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 256, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 4096, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_sample(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL SVIEW[0], 2D, FLOAT\n" + "DCL RES[0], 2D, RAW, WR\n" + "DCL SAMP[0]\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "IMM FLT32 { 128, 32, 0, 0 }\n" + "\n" + " BGNSUB\n" + " I2F TEMP[1], SV[0]\n" + " DIV TEMP[1], TEMP[1], IMM[1]\n" + " SAMPLE TEMP[1], TEMP[1], SVIEW[0], SAMP[0]\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " STORE RES[0].xyzw, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(float *)p = s ? 1 : x * y; + } + void expect(void *p, int s, int x, int y) { + switch (x % 4) { + case 0: + *(float *)p = x / 4 * y; + break; + case 1: + case 2: + *(float *)p = 0; + break; + case 3: + *(float *)p = 1; + break; + } + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 128, 32, init); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 512, 32, init); + init_compute_resources(ctx, (int []) { 1, -1 }); + init_sampler_views(ctx, (int []) { 0, -1 }); + init_sampler_states(ctx, 2); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, NULL); + check_tex(ctx, 1, expect, NULL); + destroy_sampler_states(ctx); + destroy_sampler_views(ctx); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_many_kern(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL TEMP[0], LOCAL\n" + "IMM UINT32 { 0, 1, 2, 3 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].xxxx, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].xxxx\n" + " RET\n" + " ENDSUB\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].yyyy, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].yyyy\n" + " RET\n" + " ENDSUB\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].zzzz, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].zzzz\n" + " RET\n" + " ENDSUB\n" + " BGNSUB\n" + " UMUL TEMP[0].x, IMM[0].wwww, IMM[1].xxxx\n" + " STORE RES[0].x, TEMP[0], IMM[0].wwww\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = x; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 16, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 0, NULL); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 5, NULL); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 10, NULL); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){1, 1, 1}, 15, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_constant(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW\n" + "DCL RES[1], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[0], IMM[0]\n" + " LOAD TEMP[1].x, RES[0], TEMP[0]\n" + " STORE RES[1].x, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(float *)p = s ? 0xdeadbeef : 8.0 - (float)x; + } + void expect(void *p, int s, int x, int y) { + *(float *)p = 8.0 - (float)x; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 1, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); + check_tex(ctx, 1, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_resource_indirect(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL RES[1..3], BUFFER, RAW\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[0], IMM[0]\n" + " LOAD TEMP[1].x, RES[1], TEMP[0]\n" + " LOAD TEMP[1].x, RES[TEMP[1].x+2], TEMP[0]\n" + " STORE RES[0].x, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = s == 0 ? 0xdeadbeef : + s == 1 ? x % 2 : + s == 2 ? 2 * x : + 2 * x + 1; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = 2 * x + (x % 2 ? 1 : 0); + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 1, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 2, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_tex(ctx, 3, PIPE_BUFFER, false, PIPE_FORMAT_R32_FLOAT, + 256, 0, init); + init_compute_resources(ctx, (int []) { 0, 1, 2, 3, -1 }); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){64, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +enum pipe_format surface_fmts[] = { + PIPE_FORMAT_B8G8R8A8_UNORM, + PIPE_FORMAT_B8G8R8X8_UNORM, + PIPE_FORMAT_A8R8G8B8_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_X8R8G8B8_UNORM, + PIPE_FORMAT_L8_UNORM, + PIPE_FORMAT_A8_UNORM, + PIPE_FORMAT_I8_UNORM, + PIPE_FORMAT_L8A8_UNORM, + PIPE_FORMAT_R32_FLOAT, + PIPE_FORMAT_R32G32_FLOAT, + PIPE_FORMAT_R32G32B32A32_FLOAT, + PIPE_FORMAT_R32_UNORM, + PIPE_FORMAT_R32G32_UNORM, + PIPE_FORMAT_R32G32B32A32_UNORM, + PIPE_FORMAT_R32_SNORM, + PIPE_FORMAT_R32G32_SNORM, + PIPE_FORMAT_R32G32B32A32_SNORM, + PIPE_FORMAT_R8_UINT, + PIPE_FORMAT_R8G8_UINT, + PIPE_FORMAT_R8G8B8A8_UINT, + PIPE_FORMAT_R8_SINT, + PIPE_FORMAT_R8G8_SINT, + PIPE_FORMAT_R8G8B8A8_SINT, + PIPE_FORMAT_R32_UINT, + PIPE_FORMAT_R32G32_UINT, + PIPE_FORMAT_R32G32B32A32_UINT, + PIPE_FORMAT_R32_SINT, + PIPE_FORMAT_R32G32_SINT, + PIPE_FORMAT_R32G32B32A32_SINT +}; + +static void test_surface_ld(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], 2D\n" + "DCL RES[1], 2D, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "\n" + " BGNSUB\n" + " LOAD TEMP[1], RES[0], SV[0]\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " STORE RES[1].xyzw, TEMP[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + int i = 0; + void init0f(void *p, int s, int x, int y) { + float v[] = { 1.0, -.75, .50, -.25 }; + util_format_write_4f(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + void init0i(void *p, int s, int x, int y) { + int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; + util_format_write_4i(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + void init1(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expectf(void *p, int s, int x, int y) { + float v[4], w[4]; + init0f(v, s, x / 4, y); + util_format_read_4f(surface_fmts[i], w, 0, + v, 0, 0, 0, 1, 1); + *(float *)p = w[x % 4]; + } + void expecti(void *p, int s, int x, int y) { + int32_t v[4], w[4]; + init0i(v, s, x / 4, y); + util_format_read_4i(surface_fmts[i], w, 0, + v, 0, 0, 0, 1, 1); + *(uint32_t *)p = w[x % 4]; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + + for (i = 0; i < Elements(surface_fmts); i++) { + bool is_int = util_format_is_pure_integer(surface_fmts[i]); + + printf(" - %s\n", util_format_name(surface_fmts[i])); + + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, surface_fmts[i], + 128, 32, (is_int ? init0i : init0f)); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 512, 32, init1); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + init_sampler_states(ctx, 2); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, + NULL); + check_tex(ctx, 1, (is_int ? expecti : expectf), NULL); + destroy_sampler_states(ctx); + destroy_compute_resources(ctx); + destroy_tex(ctx); + } + + destroy_prog(ctx); +} + +static void test_surface_st(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], 2D, RAW\n" + "DCL RES[1], 2D, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "IMM UINT32 { 16, 1, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0], SV[0], IMM[0]\n" + " LOAD TEMP[1], RES[0], TEMP[0]\n" + " STORE RES[1], SV[0], TEMP[1]\n" + " RET\n" + " ENDSUB\n"; + int i = 0; + void init0f(void *p, int s, int x, int y) { + float v[] = { 1.0, -.75, 0.5, -.25 }; + *(float *)p = v[x % 4]; + } + void init0i(void *p, int s, int x, int y) { + int v[] = { 0xffffffff, 0xffff, 0xff, 0xf }; + *(int32_t *)p = v[x % 4]; + } + void init1(void *p, int s, int x, int y) { + memset(p, 1, util_format_get_blocksize(surface_fmts[i])); + } + void expectf(void *p, int s, int x, int y) { + float vf[4]; + int j; + + for (j = 0; j < 4; j++) + init0f(&vf[j], s, 4 * x + j, y); + util_format_write_4f(surface_fmts[i], vf, 0, + p, 0, 0, 0, 1, 1); + } + void expects(void *p, int s, int x, int y) { + int32_t v[4]; + int j; + + for (j = 0; j < 4; j++) + init0i(&v[j], s, 4 * x + j, y); + util_format_write_4i(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + void expectu(void *p, int s, int x, int y) { + uint32_t v[4]; + int j; + + for (j = 0; j < 4; j++) + init0i(&v[j], s, 4 * x + j, y); + util_format_write_4ui(surface_fmts[i], v, 0, + p, 0, 0, 0, 1, 1); + } + bool check(void *x, void *y, int sz) { + int j; + + if (util_format_is_float(surface_fmts[i])) { + return fabs(*(float *)x - *(float *)y) < 3.92156863e-3; + + } else if ((sz % 4) == 0) { + for (j = 0; j < sz / 4; j++) + if (abs(((uint32_t *)x)[j] - + ((uint32_t *)y)[j]) > 1) + return false; + return true; + } else { + return !memcmp(x, y, sz); + } + } + + printf("- %s\n", __func__); + + init_prog(ctx, 0, 0, 0, src, NULL); + + for (i = 0; i < Elements(surface_fmts); i++) { + bool is_signed = (util_format_description(surface_fmts[i]) + ->channel[0].type == UTIL_FORMAT_TYPE_SIGNED); + bool is_int = util_format_is_pure_integer(surface_fmts[i]); + + printf(" - %s\n", util_format_name(surface_fmts[i])); + + init_tex(ctx, 0, PIPE_TEXTURE_2D, true, PIPE_FORMAT_R32_FLOAT, + 512, 32, (is_int ? init0i : init0f)); + init_tex(ctx, 1, PIPE_TEXTURE_2D, true, surface_fmts[i], + 128, 32, init1); + init_compute_resources(ctx, (int []) { 0, 1, -1 }); + init_sampler_states(ctx, 2); + launch_grid(ctx, (uint []){1, 1, 1}, (uint []){128, 32, 1}, 0, + NULL); + check_tex(ctx, 1, (is_int && is_signed ? expects : + is_int && !is_signed ? expectu : + expectf), check); + destroy_sampler_states(ctx); + destroy_compute_resources(ctx); + destroy_tex(ctx); + } + + destroy_prog(ctx); +} + +static void test_barrier(struct context *ctx) +{ + const char *src = "COMP\n" + "DCL RES[0], BUFFER, RAW, WR\n" + "DCL SV[0], BLOCK_ID[0]\n" + "DCL SV[1], BLOCK_SIZE[0]\n" + "DCL SV[2], THREAD_ID[0]\n" + "DCL TEMP[0], LOCAL\n" + "DCL TEMP[1], LOCAL\n" + "DCL TEMP[2], LOCAL\n" + "DCL TEMP[3], LOCAL\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "IMM UINT32 { 32, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL TEMP[0].x, SV[2], IMM[1]\n" + " MOV TEMP[1].x, IMM[0].wwww\n" + " BGNLOOP\n" + " BARRIER\n" + " STORE RLOCAL.x, TEMP[0], TEMP[1]\n" + " BARRIER\n" + " MOV TEMP[2].x, IMM[0].wwww\n" + " BGNLOOP\n" + " UMUL TEMP[3].x, TEMP[2], IMM[1]\n" + " LOAD TEMP[3].x, RLOCAL, TEMP[3]\n" + " USNE TEMP[3].x, TEMP[3], TEMP[1]\n" + " IF TEMP[3]\n" + " END\n" + " ENDIF\n" + " UADD TEMP[2].x, TEMP[2], IMM[0]\n" + " USEQ TEMP[3].x, TEMP[2], SV[1]\n" + " IF TEMP[3]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " UADD TEMP[1].x, TEMP[1], IMM[0]\n" + " USEQ TEMP[2].x, TEMP[1], IMM[2]\n" + " IF TEMP[2]\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " UMUL TEMP[1].x, SV[0], SV[1]\n" + " UMUL TEMP[1].x, TEMP[1], IMM[1]\n" + " UADD TEMP[1].x, TEMP[1], TEMP[0]\n" + " LOAD TEMP[0].x, RLOCAL, TEMP[0]\n" + " STORE RES[0].x, TEMP[1], TEMP[0]\n" + " RET\n" + " ENDSUB\n"; + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = 31; + } + + printf("- %s\n", __func__); + + init_prog(ctx, 256, 0, 0, src, NULL); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 4096, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_atom_ops(struct context *ctx, bool global) +{ + const char *src = "COMP\n" + "#ifdef TARGET_GLOBAL\n" + "#define target RES[0]\n" + "#else\n" + "#define target RLOCAL\n" + "#endif\n" + "" + "DCL RES[0], BUFFER, RAW, WR\n" + "#define threadid SV[0]\n" + "DCL threadid, THREAD_ID[0]\n" + "" + "#define offset TEMP[0]\n" + "DCL offset, LOCAL\n" + "#define tmp TEMP[1]\n" + "DCL tmp, LOCAL\n" + "" + "#define k0 IMM[0]\n" + "IMM UINT32 { 0, 0, 0, 0 }\n" + "#define k1 IMM[1]\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "#define k2 IMM[2]\n" + "IMM UINT32 { 2, 0, 0, 0 }\n" + "#define k3 IMM[3]\n" + "IMM UINT32 { 3, 0, 0, 0 }\n" + "#define k4 IMM[4]\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "#define k5 IMM[5]\n" + "IMM UINT32 { 5, 0, 0, 0 }\n" + "#define k6 IMM[6]\n" + "IMM UINT32 { 6, 0, 0, 0 }\n" + "#define k7 IMM[7]\n" + "IMM UINT32 { 7, 0, 0, 0 }\n" + "#define k8 IMM[8]\n" + "IMM UINT32 { 8, 0, 0, 0 }\n" + "#define k9 IMM[9]\n" + "IMM UINT32 { 9, 0, 0, 0 }\n" + "#define korig IMM[10].xxxx\n" + "#define karg IMM[10].yyyy\n" + "IMM UINT32 { 3735928559, 286331153, 0, 0 }\n" + "\n" + " BGNSUB\n" + " UMUL offset.x, threadid, k4\n" + " STORE target.x, offset, korig\n" + " USEQ tmp.x, threadid, k0\n" + " IF tmp\n" + " ATOMUADD tmp.x, target, offset, karg\n" + " ATOMUADD tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k1\n" + " IF tmp\n" + " ATOMXCHG tmp.x, target, offset, karg\n" + " ATOMXCHG tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k2\n" + " IF tmp\n" + " ATOMCAS tmp.x, target, offset, korig, karg\n" + " ATOMCAS tmp.x, target, offset, tmp, k0\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k3\n" + " IF tmp\n" + " ATOMAND tmp.x, target, offset, karg\n" + " ATOMAND tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k4\n" + " IF tmp\n" + " ATOMOR tmp.x, target, offset, karg\n" + " ATOMOR tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k5\n" + " IF tmp\n" + " ATOMXOR tmp.x, target, offset, karg\n" + " ATOMXOR tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k6\n" + " IF tmp\n" + " ATOMUMIN tmp.x, target, offset, karg\n" + " ATOMUMIN tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k7\n" + " IF tmp\n" + " ATOMUMAX tmp.x, target, offset, karg\n" + " ATOMUMAX tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k8\n" + " IF tmp\n" + " ATOMIMIN tmp.x, target, offset, karg\n" + " ATOMIMIN tmp.x, target, offset, tmp\n" + " ENDIF\n" + " USEQ tmp.x, threadid, k9\n" + " IF tmp\n" + " ATOMIMAX tmp.x, target, offset, karg\n" + " ATOMIMAX tmp.x, target, offset, tmp\n" + " ENDIF\n" + "#ifdef TARGET_LOCAL\n" + " LOAD tmp.x, RLOCAL, offset\n" + " STORE RES[0].x, offset, tmp\n" + "#endif\n" + " RET\n" + " ENDSUB\n"; + + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xbad; + } + void expect(void *p, int s, int x, int y) { + switch (x) { + case 0: + *(uint32_t *)p = 0xce6c8eef; + break; + case 1: + *(uint32_t *)p = 0xdeadbeef; + break; + case 2: + *(uint32_t *)p = 0x11111111; + break; + case 3: + *(uint32_t *)p = 0x10011001; + break; + case 4: + *(uint32_t *)p = 0xdfbdbfff; + break; + case 5: + *(uint32_t *)p = 0x11111111; + break; + case 6: + *(uint32_t *)p = 0x11111111; + break; + case 7: + *(uint32_t *)p = 0xdeadbeef; + break; + case 8: + *(uint32_t *)p = 0xdeadbeef; + break; + case 9: + *(uint32_t *)p = 0x11111111; + break; + } + } + + printf("- %s (%s)\n", __func__, global ? "global" : "local"); + + init_prog(ctx, 40, 0, 0, src, + (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 40, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){10, 1, 1}, (uint []){1, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +static void test_atom_race(struct context *ctx, bool global) +{ + const char *src = "COMP\n" + "#ifdef TARGET_GLOBAL\n" + "#define target RES[0]\n" + "#else\n" + "#define target RLOCAL\n" + "#endif\n" + "" + "DCL RES[0], BUFFER, RAW, WR\n" + "" + "#define blockid SV[0]\n" + "DCL blockid, BLOCK_ID[0]\n" + "#define blocksz SV[1]\n" + "DCL blocksz, BLOCK_SIZE[0]\n" + "#define threadid SV[2]\n" + "DCL threadid, THREAD_ID[0]\n" + "" + "#define offset TEMP[0]\n" + "DCL offset, LOCAL\n" + "#define arg TEMP[1]\n" + "DCL arg, LOCAL\n" + "#define count TEMP[2]\n" + "DCL count, LOCAL\n" + "#define vlocal TEMP[3]\n" + "DCL vlocal, LOCAL\n" + "#define vshared TEMP[4]\n" + "DCL vshared, LOCAL\n" + "#define last TEMP[5]\n" + "DCL last, LOCAL\n" + "#define tmp0 TEMP[6]\n" + "DCL tmp0, LOCAL\n" + "#define tmp1 TEMP[7]\n" + "DCL tmp1, LOCAL\n" + "" + "#define k0 IMM[0]\n" + "IMM UINT32 { 0, 0, 0, 0 }\n" + "#define k1 IMM[1]\n" + "IMM UINT32 { 1, 0, 0, 0 }\n" + "#define k4 IMM[2]\n" + "IMM UINT32 { 4, 0, 0, 0 }\n" + "#define k32 IMM[3]\n" + "IMM UINT32 { 32, 0, 0, 0 }\n" + "#define k128 IMM[4]\n" + "IMM UINT32 { 128, 0, 0, 0 }\n" + "#define kdeadcafe IMM[5]\n" + "IMM UINT32 { 3735931646, 0, 0, 0 }\n" + "#define kallowed_set IMM[6]\n" + "IMM UINT32 { 559035650, 0, 0, 0 }\n" + "#define k11111111 IMM[7]\n" + "IMM UINT32 { 286331153, 0, 0, 0 }\n" + "\n" + " BGNSUB\n" + " MOV offset.x, threadid\n" + "#ifdef TARGET_GLOBAL\n" + " UMUL tmp0.x, blockid, blocksz\n" + " UADD offset.x, offset, tmp0\n" + "#endif\n" + " UMUL offset.x, offset, k4\n" + " USLT tmp0.x, threadid, k32\n" + " STORE target.x, offset, k0\n" + " BARRIER\n" + " IF tmp0\n" + " MOV vlocal.x, k0\n" + " MOV arg.x, kdeadcafe\n" + " BGNLOOP\n" + " INEG arg.x, arg\n" + " ATOMUADD vshared.x, target, offset, arg\n" + " SFENCE target\n" + " USNE tmp0.x, vshared, vlocal\n" + " IF tmp0\n" + " BRK\n" + " ENDIF\n" + " UADD vlocal.x, vlocal, arg\n" + " ENDLOOP\n" + " UADD vlocal.x, vshared, arg\n" + " LOAD vshared.x, target, offset\n" + " USEQ tmp0.x, vshared, vlocal\n" + " STORE target.x, offset, tmp0\n" + " ELSE\n" + " UADD offset.x, offset, -k128\n" + " MOV count.x, k0\n" + " MOV last.x, k0\n" + " BGNLOOP\n" + " LOAD vshared.x, target, offset\n" + " USEQ tmp0.x, vshared, kallowed_set.xxxx\n" + " USEQ tmp1.x, vshared, kallowed_set.yyyy\n" + " OR tmp0.x, tmp0, tmp1\n" + " IF tmp0\n" + " USEQ tmp0.x, vshared, last\n" + " IF tmp0\n" + " CONT\n" + " ENDIF\n" + " MOV last.x, vshared\n" + " ELSE\n" + " END\n" + " ENDIF\n" + " UADD count.x, count, k1\n" + " USEQ tmp0.x, count, k128\n" + " IF tmp0\n" + " BRK\n" + " ENDIF\n" + " ENDLOOP\n" + " ATOMXCHG tmp0.x, target, offset, k11111111\n" + " UADD offset.x, offset, k128\n" + " ATOMXCHG tmp0.x, target, offset, k11111111\n" + " SFENCE target\n" + " ENDIF\n" + "#ifdef TARGET_LOCAL\n" + " LOAD tmp0.x, RLOCAL, offset\n" + " UMUL tmp1.x, blockid, blocksz\n" + " UMUL tmp1.x, tmp1, k4\n" + " UADD offset.x, offset, tmp1\n" + " STORE RES[0].x, offset, tmp0\n" + "#endif\n" + " RET\n" + " ENDSUB\n"; + + void init(void *p, int s, int x, int y) { + *(uint32_t *)p = 0xdeadbeef; + } + void expect(void *p, int s, int x, int y) { + *(uint32_t *)p = x & 0x20 ? 0x11111111 : 0xffffffff; + } + + printf("- %s (%s)\n", __func__, global ? "global" : "local"); + + init_prog(ctx, 256, 0, 0, src, + (global ? "-DTARGET_GLOBAL" : "-DTARGET_LOCAL")); + init_tex(ctx, 0, PIPE_BUFFER, true, PIPE_FORMAT_R32_FLOAT, + 4096, 0, init); + init_compute_resources(ctx, (int []) { 0, -1 }); + launch_grid(ctx, (uint []){64, 1, 1}, (uint []){16, 1, 1}, 0, NULL); + check_tex(ctx, 0, expect, NULL); + destroy_compute_resources(ctx); + destroy_tex(ctx); + destroy_prog(ctx); +} + +int main(int argc, char *argv[]) +{ + struct context *ctx = CALLOC_STRUCT(context); + + init_ctx(ctx); + test_system_values(ctx); + test_resource_access(ctx); + test_function_calls(ctx); + test_input_global(ctx); + test_private(ctx); + test_local(ctx); + test_sample(ctx); + test_many_kern(ctx); + test_constant(ctx); + test_resource_indirect(ctx); + test_surface_ld(ctx); + test_surface_st(ctx); + test_barrier(ctx); + test_atom_ops(ctx, true); + test_atom_race(ctx, true); + test_atom_ops(ctx, false); + test_atom_race(ctx, false); + destroy_ctx(ctx); + + return 0; +} -- 2.30.2