projects
/
mesa.git
/ blobdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Merge remote-tracking branch 'mesa-public/master' into vulkan
[mesa.git]
/
src
/
mesa
/
drivers
/
dri
/
i965
/
brw_cs.cpp
diff --git
a/src/mesa/drivers/dri/i965/brw_cs.cpp
b/src/mesa/drivers/dri/i965/brw_cs.cpp
index fa8b5c8415d743a49b142810c72f81d61b905147..6ce5779137ef1bfe6f38a688ca8418f71169c07e 100644
(file)
--- a/
src/mesa/drivers/dri/i965/brw_cs.cpp
+++ b/
src/mesa/drivers/dri/i965/brw_cs.cpp
@@
-55,7
+55,7
@@
brw_cs_prog_data_compare(const void *in_a, const void *in_b)
}
}
-
static
const unsigned *
+const unsigned *
brw_cs_emit(struct brw_context *brw,
void *mem_ctx,
const struct brw_cs_prog_key *key,
brw_cs_emit(struct brw_context *brw,
void *mem_ctx,
const struct brw_cs_prog_key *key,
@@
-82,7
+82,7
@@
brw_cs_emit(struct brw_context *brw,
prog_data->local_size[0] = cp->LocalSize[0];
prog_data->local_size[1] = cp->LocalSize[1];
prog_data->local_size[2] = cp->LocalSize[2];
prog_data->local_size[0] = cp->LocalSize[0];
prog_data->local_size[1] = cp->LocalSize[1];
prog_data->local_size[2] = cp->LocalSize[2];
-
int
local_workgroup_size =
+
unsigned
local_workgroup_size =
cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
cfg_t *cfg = NULL;
cp->LocalSize[0] * cp->LocalSize[1] * cp->LocalSize[2];
cfg_t *cfg = NULL;
@@
-94,7
+94,8
@@
brw_cs_emit(struct brw_context *brw,
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
/* Now the main event: Visit the shader IR and generate our CS IR for it.
*/
- fs_visitor v8(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ fs_visitor v8(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
&cp->Base, 8, st_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
&cp->Base, 8, st_index);
if (!v8.run_cs()) {
fail_msg = v8.fail_msg;
@@
-103,7
+104,8
@@
brw_cs_emit(struct brw_context *brw,
prog_data->simd_size = 8;
}
prog_data->simd_size = 8;
}
- fs_visitor v16(brw, mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
+ fs_visitor v16(brw->intelScreen->compiler, brw,
+ mem_ctx, MESA_SHADER_COMPUTE, key, &prog_data->base, prog,
&cp->Base, 16, st_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
&cp->Base, 16, st_index);
if (likely(!(INTEL_DEBUG & DEBUG_NO16)) &&
!fail_msg && !v8.simd16_unsupported &&
@@
-180,7
+182,8
@@
brw_codegen_cs_prog(struct brw_context *brw,
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
*/
* prog_data associated with the compiled program, and which will be freed
* by the state cache.
*/
- int param_count = cs->num_uniform_components;
+ int param_count = cs->num_uniform_components +
+ cs->NumImages * BRW_IMAGE_PARAM_SIZE;
/* The backend also sometimes adds params for texture size. */
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
/* The backend also sometimes adds params for texture size. */
param_count += 2 * ctx->Const.Program[MESA_SHADER_COMPUTE].MaxTextureImageUnits;
@@
-188,7
+191,10
@@
brw_codegen_cs_prog(struct brw_context *brw,
rzalloc_array(NULL, const gl_constant_value *, param_count);
prog_data.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
rzalloc_array(NULL, const gl_constant_value *, param_count);
prog_data.base.pull_param =
rzalloc_array(NULL, const gl_constant_value *, param_count);
+ prog_data.base.image_param =
+ rzalloc_array(NULL, struct brw_image_param, cs->NumImages);
prog_data.base.nr_params = param_count;
prog_data.base.nr_params = param_count;
+ prog_data.base.nr_image_params = cs->NumImages;
program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
&cp->program, prog, &program_size);
program = brw_cs_emit(brw, mem_ctx, key, &prog_data,
&cp->program, prog, &program_size);
@@
-289,6
+295,17
@@
brw_cs_precompile(struct gl_context *ctx,
}
}
+static unsigned
+get_cs_thread_count(const struct brw_cs_prog_data *cs_prog_data)
+{
+ const unsigned simd_size = cs_prog_data->simd_size;
+ unsigned group_size = cs_prog_data->local_size[0] *
+ cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
+
+ return (group_size + simd_size - 1) / simd_size;
+}
+
+
static void
brw_upload_cs_state(struct brw_context *brw)
{
static void
brw_upload_cs_state(struct brw_context *brw)
{
@@
-314,6
+331,8
@@
brw_upload_cs_state(struct brw_context *brw)
prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
+ unsigned threads = get_cs_thread_count(cs_prog_data);
+
uint32_t dwords = brw->gen < 8 ? 8 : 9;
BEGIN_BATCH(dwords);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
uint32_t dwords = brw->gen < 8 ? 8 : 9;
BEGIN_BATCH(dwords);
OUT_BATCH(MEDIA_VFE_STATE << 16 | (dwords - 2));
@@
-363,6
+382,13
@@
brw_upload_cs_state(struct brw_context *brw)
desc[dw++] = 0;
desc[dw++] = 0;
desc[dw++] = stage_state->bind_bo_offset;
desc[dw++] = 0;
desc[dw++] = 0;
desc[dw++] = stage_state->bind_bo_offset;
+ desc[dw++] = 0;
+ const uint32_t media_threads =
+ brw->gen >= 8 ?
+ SET_FIELD(threads, GEN8_MEDIA_GPGPU_THREAD_COUNT) :
+ SET_FIELD(threads, MEDIA_GPGPU_THREAD_COUNT);
+ assert(threads <= brw->max_cs_threads);
+ desc[dw++] = media_threads;
BEGIN_BATCH(4);
OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));
BEGIN_BATCH(4);
OUT_BATCH(MEDIA_INTERFACE_DESCRIPTOR_LOAD << 16 | (4 - 2));