src/gallium/drivers/softpipe/sp_compute.c

   1 /*
   2  * Copyright 2016 Red Hat.
   3  *
   4  * Permission is hereby granted, free of charge, to any person obtaining a
   5  * copy of this software and associated documentation files (the "Software"),
   6  * to deal in the Software without restriction, including without limitation
   7  * on the rights to use, copy, modify, merge, publish, distribute, sub
   8  * license, and/or sell copies of the Software, and to permit persons to whom
   9  * the Software is furnished to do so, subject to the following conditions:
  10  *
  11  * The above copyright notice and this permission notice (including the next
  12  * paragraph) shall be included in all copies or substantial portions of the
  13  * Software.
  14  *
  15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
  16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
  18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
  19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
  20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
  21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
  22  */
  23 #include "util/u_inlines.h"
  24 #include "util/u_math.h"
  25 #include "util/u_memory.h"
  26 #include "util/u_pstipple.h"
  27 #include "pipe/p_shader_tokens.h"
  28 #include "draw/draw_context.h"
  29 #include "draw/draw_vertex.h"
  30 #include "sp_context.h"
  31 #include "sp_screen.h"
  32 #include "sp_state.h"
  33 #include "sp_texture.h"
  34 #include "sp_tex_sample.h"
  35 #include "sp_tex_tile_cache.h"
  36 #include "tgsi/tgsi_parse.h"
  37
  38 static void
  39 cs_prepare(const struct sp_compute_shader *cs,
  40            struct tgsi_exec_machine *machine,
  41            int w, int h, int d,
  42            int g_w, int g_h, int g_d,
  43            int b_w, int b_h, int b_d,
  44            struct tgsi_sampler *sampler,
  45            struct tgsi_image *image,
  46            struct tgsi_buffer *buffer )
  47 {
  48    int j;
  49    /*
  50     * Bind tokens/shader to the interpreter's machine state.
  51     */
  52    tgsi_exec_machine_bind_shader(machine,
  53                                  cs->tokens,
  54                                  sampler, image, buffer);
  55
  56    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID] != -1) {
  57       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_THREAD_ID];
  58       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  59          machine->SystemValue[i].xyzw[0].i[j] = w;
  60          machine->SystemValue[i].xyzw[1].i[j] = h;
  61          machine->SystemValue[i].xyzw[2].i[j] = d;
  62       }
  63    }
  64
  65    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE] != -1) {
  66       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_GRID_SIZE];
  67       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  68          machine->SystemValue[i].xyzw[0].i[j] = g_w;
  69          machine->SystemValue[i].xyzw[1].i[j] = g_h;
  70          machine->SystemValue[i].xyzw[2].i[j] = g_d;
  71       }
  72    }
  73
  74    if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE] != -1) {
  75       unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_SIZE];
  76       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  77          machine->SystemValue[i].xyzw[0].i[j] = b_w;
  78          machine->SystemValue[i].xyzw[1].i[j] = b_h;
  79          machine->SystemValue[i].xyzw[2].i[j] = b_d;
  80       }
  81    }
  82 }
  83
  84 static bool
  85 cs_run(const struct sp_compute_shader *cs,
  86        int g_w, int g_h, int g_d,
  87        struct tgsi_exec_machine *machine, bool restart)
  88 {
  89    if (!restart) {
  90       if (machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID] != -1) {
  91          unsigned i = machine->SysSemanticToIndex[TGSI_SEMANTIC_BLOCK_ID];
  92          int j;
  93          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  94             machine->SystemValue[i].xyzw[0].i[j] = g_w;
  95             machine->SystemValue[i].xyzw[1].i[j] = g_h;
  96             machine->SystemValue[i].xyzw[2].i[j] = g_d;
  97          }
  98       }
  99       machine->NonHelperMask = (1 << 1) - 1;
 100    }
 101
 102    tgsi_exec_machine_run(machine, restart ? machine->pc : 0);
 103
 104    if (machine->pc != -1)
 105       return true;
 106    return false;
 107 }
 108
 109 static void
 110 run_workgroup(const struct sp_compute_shader *cs,
 111               int g_w, int g_h, int g_d, int num_threads,
 112               struct tgsi_exec_machine **machines)
 113 {
 114    int i;
 115    bool grp_hit_barrier, restart_threads = false;
 116
 117    do {
 118       grp_hit_barrier = false;
 119       for (i = 0; i < num_threads; i++) {
 120          grp_hit_barrier |= cs_run(cs, g_w, g_h, g_d, machines[i], restart_threads);
 121       }
 122       restart_threads = false;
 123       if (grp_hit_barrier) {
 124          grp_hit_barrier = false;
 125          restart_threads = true;
 126       }
 127    } while (restart_threads);
 128 }
 129
 130 static void
 131 cs_delete(const struct sp_compute_shader *cs,
 132           struct tgsi_exec_machine *machine)
 133 {
 134    if (machine->Tokens == cs->tokens) {
 135       tgsi_exec_machine_bind_shader(machine, NULL, NULL, NULL, NULL);
 136    }
 137 }
 138
 139 static void
 140 fill_grid_size(struct pipe_context *context,
 141                const struct pipe_grid_info *info,
 142                uint32_t grid_size[3])
 143 {
 144    struct pipe_transfer *transfer;
 145    uint32_t *params;
 146    if (!info->indirect) {
 147       grid_size[0] = info->grid[0];
 148       grid_size[1] = info->grid[1];
 149       grid_size[2] = info->grid[2];
 150       return;
 151    }
 152    params = pipe_buffer_map_range(context, info->indirect,
 153                                   info->indirect_offset,
 154                                   3 * sizeof(uint32_t),
 155                                   PIPE_TRANSFER_READ,
 156                                   &transfer);
 157
 158    if (!transfer)
 159       return;
 160
 161    grid_size[0] = params[0];
 162    grid_size[1] = params[1];
 163    grid_size[2] = params[2];
 164    pipe_buffer_unmap(context, transfer);
 165 }
 166
 167 void
 168 softpipe_launch_grid(struct pipe_context *context,
 169                      const struct pipe_grid_info *info)
 170 {
 171    struct softpipe_context *softpipe = softpipe_context(context);
 172    struct sp_compute_shader *cs = softpipe->cs;
 173    int num_threads_in_group;
 174    struct tgsi_exec_machine **machines;
 175    int bwidth, bheight, bdepth;
 176    int w, h, d, i;
 177    int g_w, g_h, g_d;
 178    uint32_t grid_size[3] = {0};
 179    void *local_mem = NULL;
 180
 181    softpipe_update_compute_samplers(softpipe);
 182    bwidth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_WIDTH];
 183    bheight = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_HEIGHT];
 184    bdepth = cs->info.properties[TGSI_PROPERTY_CS_FIXED_BLOCK_DEPTH];
 185    num_threads_in_group = bwidth * bheight * bdepth;
 186
 187    fill_grid_size(context, info, grid_size);
 188
 189    if (cs->shader.req_local_mem) {
 190       local_mem = CALLOC(1, cs->shader.req_local_mem);
 191    }
 192
 193    machines = CALLOC(sizeof(struct tgsi_exec_machine *), num_threads_in_group);
 194    if (!machines) {
 195       FREE(local_mem);
 196       return;
 197    }
 198
 199    /* initialise machines + GRID_SIZE + THREAD_ID  + BLOCK_SIZE */
 200    for (d = 0; d < bdepth; d++) {
 201       for (h = 0; h < bheight; h++) {
 202          for (w = 0; w < bwidth; w++) {
 203             int idx = w + (h * bwidth) + (d * bheight * bwidth);
 204             machines[idx] = tgsi_exec_machine_create(PIPE_SHADER_COMPUTE);
 205
 206             machines[idx]->LocalMem = local_mem;
 207             machines[idx]->LocalMemSize = cs->shader.req_local_mem;
 208             cs_prepare(cs, machines[idx],
 209                        w, h, d,
 210                        grid_size[0], grid_size[1], grid_size[2],
 211                        bwidth, bheight, bdepth,
 212                        (struct tgsi_sampler *)softpipe->tgsi.sampler[PIPE_SHADER_COMPUTE],
 213                        (struct tgsi_image *)softpipe->tgsi.image[PIPE_SHADER_COMPUTE],
 214                        (struct tgsi_buffer *)softpipe->tgsi.buffer[PIPE_SHADER_COMPUTE]);
 215             tgsi_exec_set_constant_buffers(machines[idx], PIPE_MAX_CONSTANT_BUFFERS,
 216                                            softpipe->mapped_constants[PIPE_SHADER_COMPUTE],
 217                                            softpipe->const_buffer_size[PIPE_SHADER_COMPUTE]);
 218          }
 219       }
 220    }
 221
 222    for (g_d = 0; g_d < grid_size[2]; g_d++) {
 223       for (g_h = 0; g_h < grid_size[1]; g_h++) {
 224          for (g_w = 0; g_w < grid_size[0]; g_w++) {
 225             run_workgroup(cs, g_w, g_h, g_d, num_threads_in_group, machines);
 226          }
 227       }
 228    }
 229
 230    for (i = 0; i < num_threads_in_group; i++) {
 231       cs_delete(cs, machines[i]);
 232       tgsi_exec_machine_destroy(machines[i]);
 233    }
 234
 235    FREE(local_mem);
 236    FREE(machines);
 237 }