this->pull_constant_loc = v->pull_constant_loc;
this->uniforms = v->uniforms;
this->subgroup_id = v->subgroup_id;
+ for (unsigned i = 0; i < ARRAY_SIZE(this->group_size); i++)
+ this->group_size[i] = v->group_size[i];
}
void
this->first_non_payload_grf = payload.num_regs + prog_data->curb_read_length;
}
+/*
+ * Build up an array of indices into the urb_setup array that
+ * references the active entries of the urb_setup array.
+ * Used to accelerate walking the active entries of the urb_setup array
+ * on each upload.
+ */
+void
+brw_compute_urb_setup_index(struct brw_wm_prog_data *wm_prog_data)
+{
+ /* Make sure uint8_t is sufficient */
+ STATIC_ASSERT(VARYING_SLOT_MAX <= 0xff);
+ uint8_t index = 0;
+ for (uint8_t attr = 0; attr < VARYING_SLOT_MAX; attr++) {
+ if (wm_prog_data->urb_setup[attr] >= 0) {
+ wm_prog_data->urb_setup_attribs[index++] = attr;
+ }
+ }
+ wm_prog_data->urb_setup_attribs_count = index;
+}
+
static void
calculate_urb_setup(const struct gen_device_info *devinfo,
const struct brw_wm_prog_key *key,
struct brw_vue_map prev_stage_vue_map;
brw_compute_vue_map(devinfo, &prev_stage_vue_map,
key->input_slots_valid,
- nir->info.separate_shader);
+ nir->info.separate_shader, 1);
int first_slot =
brw_compute_first_urb_slot_required(nir->info.inputs_read,
}
prog_data->num_varying_inputs = urb_next;
+ prog_data->inputs = nir->info.inputs_read;
+
+ brw_compute_urb_setup_index(prog_data);
}
void
const int iteration = 99;
int pass_num = 0;
- if (OPT(opt_cmod_propagation))
- OPT(dead_code_eliminate);
+ if (OPT(opt_cmod_propagation)) {
+ /* dead_code_eliminate "undoes" the fixing done by
+ * fixup_3src_null_dest, so we have to do it again if
+ * dead_code_eliminiate makes any progress.
+ */
+ if (OPT(dead_code_eliminate))
+ fixup_3src_null_dest();
+ }
+
/* We only allow spilling for the last schedule mode and only if the
* allow_spilling parameter and dispatch width work out ok.
wm_prog_data->urb_setup[VARYING_SLOT_LAYER] = 0;
wm_prog_data->num_varying_inputs = 1;
+
+ brw_compute_urb_setup_index(wm_prog_data);
}
bool
return reg;
}
+unsigned
+brw_cs_push_const_total_size(const struct brw_cs_prog_data *cs_prog_data,
+ unsigned threads)
+{
+ assert(cs_prog_data->push.per_thread.size % REG_SIZE == 0);
+ assert(cs_prog_data->push.cross_thread.size % REG_SIZE == 0);
+ return cs_prog_data->push.per_thread.size * threads +
+ cs_prog_data->push.cross_thread.size;
+}
+
static void
fill_push_const_block_info(struct brw_push_const_block *block, unsigned dwords)
{
fill_push_const_block_info(&cs_prog_data->push.cross_thread, cross_thread_dwords);
fill_push_const_block_info(&cs_prog_data->push.per_thread, per_thread_dwords);
- unsigned total_dwords =
- (cs_prog_data->push.per_thread.size * cs_prog_data->threads +
- cs_prog_data->push.cross_thread.size) / 4;
- fill_push_const_block_info(&cs_prog_data->push.total, total_dwords);
-
assert(cs_prog_data->push.cross_thread.dwords % 8 == 0 ||
cs_prog_data->push.per_thread.size == 0);
assert(cs_prog_data->push.cross_thread.dwords +
prog_data->nr_params);
}
-static void
-cs_set_simd_size(struct brw_cs_prog_data *cs_prog_data, unsigned size)
-{
- cs_prog_data->simd_size = size;
- unsigned group_size = cs_prog_data->local_size[0] *
- cs_prog_data->local_size[1] * cs_prog_data->local_size[2];
- cs_prog_data->threads = (group_size + size - 1) / size;
-}
-
static nir_shader *
compile_cs_to_nir(const struct brw_compiler *compiler,
void *mem_ctx,
char **error_str)
{
prog_data->base.total_shared = src_shader->info.cs.shared_size;
- prog_data->local_size[0] = src_shader->info.cs.local_size[0];
- prog_data->local_size[1] = src_shader->info.cs.local_size[1];
- prog_data->local_size[2] = src_shader->info.cs.local_size[2];
prog_data->slm_size = src_shader->num_shared;
- unsigned local_workgroup_size =
- src_shader->info.cs.local_size[0] * src_shader->info.cs.local_size[1] *
- src_shader->info.cs.local_size[2];
+
+ unsigned local_workgroup_size;
+ if (prog_data->uses_variable_group_size) {
+ prog_data->max_variable_local_size =
+ src_shader->info.cs.max_variable_local_size;
+ local_workgroup_size = src_shader->info.cs.max_variable_local_size;
+ } else {
+ prog_data->local_size[0] = src_shader->info.cs.local_size[0];
+ prog_data->local_size[1] = src_shader->info.cs.local_size[1];
+ prog_data->local_size[2] = src_shader->info.cs.local_size[2];
+ local_workgroup_size = src_shader->info.cs.local_size[0] *
+ src_shader->info.cs.local_size[1] * src_shader->info.cs.local_size[2];
+ }
/* Limit max_threads to 64 for the GPGPU_WALKER command */
const uint32_t max_threads = MIN2(64, compiler->devinfo->max_cs_threads);
assert(v8->max_dispatch_width >= 32);
v = v8;
- cs_set_simd_size(prog_data, 8);
+ prog_data->simd_size = 8;
cs_fill_push_const_info(compiler->devinfo, prog_data);
}
}
assert(v16->max_dispatch_width >= 32);
v = v16;
- cs_set_simd_size(prog_data, 16);
+ prog_data->simd_size = 16;
cs_fill_push_const_info(compiler->devinfo, prog_data);
}
}
}
} else {
v = v32;
- cs_set_simd_size(prog_data, 32);
+ prog_data->simd_size = 32;
cs_fill_push_const_info(compiler->devinfo, prog_data);
}
}