#include <assert.h>
-#include "common/gen_device_info.h"
+#include "dev/gen_device_info.h"
#include "common/gen_sample_positions.h"
#include "genxml/gen_macros.h"
#include "main/state.h"
#include "brw_context.h"
-#if GEN_GEN == 6
-#include "brw_defines.h"
-#endif
#include "brw_draw.h"
#include "brw_multisample_state.h"
#include "brw_state.h"
UNUSED static void *
emit_dwords(struct brw_context *brw, unsigned n)
{
- intel_batchbuffer_begin(brw, n, RENDER_RING);
+ intel_batchbuffer_begin(brw, n);
uint32_t *map = brw->batch.map_next;
brw->batch.map_next += n;
intel_batchbuffer_advance(brw);
struct brw_address {
struct brw_bo *bo;
- uint32_t read_domains;
- uint32_t write_domain;
+ unsigned reloc_flags;
uint32_t offset;
};
-static uint64_t
-emit_reloc(struct brw_context *brw,
- void *location, struct brw_address address, uint32_t delta)
-{
- uint32_t offset = (char *) location - (char *) brw->batch.map;
-
- return brw_emit_reloc(&brw->batch, offset, address.bo,
- address.offset + delta,
- address.read_domains,
- address.write_domain);
-}
-
#define __gen_address_type struct brw_address
#define __gen_user_data struct brw_context
__gen_combine_address(struct brw_context *brw, void *location,
struct brw_address address, uint32_t delta)
{
+ struct intel_batchbuffer *batch = &brw->batch;
+ uint32_t offset;
+
if (address.bo == NULL) {
return address.offset + delta;
} else {
- return emit_reloc(brw, location, address, delta);
+ if (GEN_GEN < 6 && brw_ptr_in_state_buffer(batch, location)) {
+ offset = (char *) location - (char *) brw->batch.state.map;
+ return brw_state_reloc(batch, offset, address.bo,
+ address.offset + delta,
+ address.reloc_flags);
+ }
+
+ assert(!brw_ptr_in_state_buffer(batch, location));
+
+ offset = (char *) location - (char *) brw->batch.batch.map;
+ return brw_batch_reloc(batch, offset, address.bo,
+ address.offset + delta,
+ address.reloc_flags);
}
}
-static inline struct brw_address
-render_bo(struct brw_bo *bo, uint32_t offset)
+UNUSED static struct brw_address
+rw_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
.bo = bo,
.offset = offset,
- .read_domains = I915_GEM_DOMAIN_RENDER,
- .write_domain = I915_GEM_DOMAIN_RENDER,
+ .reloc_flags = RELOC_WRITE,
};
}
-static inline struct brw_address
-render_ro_bo(struct brw_bo *bo, uint32_t offset)
+static struct brw_address
+ro_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
.bo = bo,
.offset = offset,
- .read_domains = I915_GEM_DOMAIN_RENDER,
- .write_domain = 0,
};
}
-static inline struct brw_address
-instruction_bo(struct brw_bo *bo, uint32_t offset)
+static struct brw_address
+rw_32_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
.bo = bo,
.offset = offset,
- .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
- .write_domain = I915_GEM_DOMAIN_INSTRUCTION,
+ .reloc_flags = RELOC_WRITE | RELOC_32BIT,
};
}
-static inline struct brw_address
-instruction_ro_bo(struct brw_bo *bo, uint32_t offset)
+static struct brw_address
+ro_32_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
.bo = bo,
.offset = offset,
- .read_domains = I915_GEM_DOMAIN_INSTRUCTION,
- .write_domain = 0,
+ .reloc_flags = RELOC_32BIT,
};
}
-static inline struct brw_address
-vertex_bo(struct brw_bo *bo, uint32_t offset)
+UNUSED static struct brw_address
+ggtt_bo(struct brw_bo *bo, uint32_t offset)
{
return (struct brw_address) {
.bo = bo,
.offset = offset,
- .read_domains = I915_GEM_DOMAIN_VERTEX,
- .write_domain = 0,
+ .reloc_flags = RELOC_WRITE | RELOC_NEEDS_GGTT,
};
}
#if GEN_GEN == 4
-static inline struct brw_address
+static struct brw_address
KSP(struct brw_context *brw, uint32_t offset)
{
- return instruction_bo(brw->cache.bo, offset);
-}
-
-static inline struct brw_address
-KSP_ro(struct brw_context *brw, uint32_t offset)
-{
- return instruction_ro_bo(brw->cache.bo, offset);
+ return ro_bo(brw->cache.bo, offset);
}
#else
-static inline uint32_t
-KSP(struct brw_context *brw, uint32_t offset)
+static uint32_t
+KSP(UNUSED struct brw_context *brw, uint32_t offset)
{
return offset;
}
-
-#define KSP_ro KSP
-
#endif
#include "genxml/genX_pack.h"
})
#define brw_state_emit(brw, cmd, align, offset, name) \
- for (struct cmd name = { 0, }, \
+ for (struct cmd name = {}, \
*_dst = brw_state_batch(brw, _brw_cmd_length(cmd) * 4, \
align, offset); \
__builtin_expect(_dst != NULL, 1); \
* to a FBO (i.e. any named frame buffer object), we *don't*
* need to invert - we already match the layout.
*/
- if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+ if (ctx->DrawBuffer->FlipY) {
for (unsigned i = 0; i < 32; i++)
poly.PatternRow[i] = ctx->PolygonStipple[31 - i]; /* invert */
} else {
* to a user-created FBO then our native pixel coordinate system
* works just fine, and there's no window system to worry about.
*/
- if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) {
+ if (ctx->DrawBuffer->FlipY) {
poly.PolygonStippleYOffset =
(32 - (_mesa_geometric_height(ctx->DrawBuffer) & 31)) & 31;
}
unsigned buffer_nr,
struct brw_bo *bo,
unsigned start_offset,
- unsigned end_offset,
+ MAYBE_UNUSED unsigned end_offset,
unsigned stride,
- unsigned step_rate)
+ MAYBE_UNUSED unsigned step_rate)
{
struct GENX(VERTEX_BUFFER_STATE) buf_state = {
.VertexBufferIndex = buffer_nr,
.BufferPitch = stride,
- .BufferStartingAddress = vertex_bo(bo, start_offset),
+
+ /* The VF cache designers apparently cut corners, and made the cache
+ * only consider the bottom 32 bits of memory addresses. If you happen
+ * to have two vertex buffers which get placed exactly 4 GiB apart and
+ * use them in back-to-back draw calls, you can get collisions. To work
+ * around this problem, we restrict vertex buffers to the low 32 bits of
+ * the address space.
+ */
+ .BufferStartingAddress = ro_32_bo(bo, start_offset),
#if GEN_GEN >= 8
.BufferSize = end_offset - start_offset,
#endif
.BufferAccessType = step_rate ? INSTANCEDATA : VERTEXDATA,
.InstanceDataStepRate = step_rate,
#if GEN_GEN >= 5
- .EndAddress = vertex_bo(bo, end_offset - 1),
+ .EndAddress = ro_bo(bo, end_offset - 1),
#endif
#endif
-#if GEN_GEN == 10
+#if GEN_GEN == 11
+ .VertexBufferMOCS = ICL_MOCS_WB,
+#elif GEN_GEN == 10
.VertexBufferMOCS = CNL_MOCS_WB,
#elif GEN_GEN == 9
.VertexBufferMOCS = SKL_MOCS_WB,
}
UNUSED static int
-uploads_needed(uint32_t format)
+uploads_needed(uint32_t format,
+ bool is_dual_slot)
{
if (!is_passthru_format(format))
return 1;
+ if (is_dual_slot)
+ return 2;
+
switch (format) {
case ISL_FORMAT_R64_PASSTHRU:
case ISL_FORMAT_R64G64_PASSTHRU:
if (!is_passthru_format(format))
return format;
+ /* ISL_FORMAT_R64_PASSTHRU and ISL_FORMAT_R64G64_PASSTHRU with an upload ==
+ * 1 means that we have been forced to do 2 uploads for a size <= 2. This
+ * happens with gen < 8 and dvec3 or dvec4 vertex shader input
+ * variables. In those cases, we return ISL_FORMAT_R32_FLOAT as a way of
+ * flagging that we want to fill with zeroes this second forced upload.
+ */
switch (format) {
case ISL_FORMAT_R64_PASSTHRU:
- return ISL_FORMAT_R32G32_FLOAT;
+ return upload == 0 ? ISL_FORMAT_R32G32_FLOAT
+ : ISL_FORMAT_R32_FLOAT;
case ISL_FORMAT_R64G64_PASSTHRU:
- return ISL_FORMAT_R32G32B32A32_FLOAT;
+ return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
+ : ISL_FORMAT_R32_FLOAT;
case ISL_FORMAT_R64G64B64_PASSTHRU:
- return !upload ? ISL_FORMAT_R32G32B32A32_FLOAT
- : ISL_FORMAT_R32G32_FLOAT;
+ return upload == 0 ? ISL_FORMAT_R32G32B32A32_FLOAT
+ : ISL_FORMAT_R32G32_FLOAT;
case ISL_FORMAT_R64G64B64A64_PASSTHRU:
return ISL_FORMAT_R32G32B32A32_FLOAT;
default:
upload_format_size(uint32_t upload_format)
{
switch (upload_format) {
+ case ISL_FORMAT_R32_FLOAT:
+
+ /* downsized_format has returned this one in order to flag that we are
+ * performing a second upload which we want to have filled with
+ * zeroes. This happens with gen < 8, a size <= 2, and dvec3 or dvec4
+ * vertex shader input variables.
+ */
+
+ return 0;
case ISL_FORMAT_R32G32_FLOAT:
return 2;
case ISL_FORMAT_R32G32B32A32_FLOAT:
}
}
+static UNUSED uint16_t
+pinned_bo_high_bits(struct brw_bo *bo)
+{
+ return (bo->kflags & EXEC_OBJECT_PINNED) ? bo->gtt_offset >> 32ull : 0;
+}
+
+/* The VF cache designers apparently cut corners, and made the cache key's
+ * <VertexBufferIndex, Memory Address> tuple only consider the bottom 32 bits
+ * of the address. If you happen to have two vertex buffers which get placed
+ * exactly 4 GiB apart and use them in back-to-back draw calls, you can get
+ * collisions. (These collisions can happen within a single batch.)
+ *
+ * In the soft-pin world, we'd like to assign addresses up front, and never
+ * move buffers. So, we need to do a VF cache invalidate if the buffer for
+ * a particular VB slot has different [48:32] address bits than the last one.
+ *
+ * In the relocation world, we have no idea what the addresses will be, so
+ * we can't apply this workaround. Instead, we tell the kernel to move it
+ * to the low 4GB regardless.
+ */
+static void
+vf_invalidate_for_vb_48bit_transitions(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ bool need_invalidate = false;
+ unsigned i;
+
+ for (i = 0; i < brw->vb.nr_buffers; i++) {
+ uint16_t high_bits = pinned_bo_high_bits(brw->vb.buffers[i].bo);
+
+ if (high_bits != brw->vb.last_bo_high_bits[i]) {
+ need_invalidate = true;
+ brw->vb.last_bo_high_bits[i] = high_bits;
+ }
+ }
+
+ /* Don't bother with draw parameter buffers - those are generated by
+ * the driver so we can select a consistent memory zone.
+ */
+
+ if (need_invalidate) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ }
+#endif
+}
+
+static void
+vf_invalidate_for_ib_48bit_transition(struct brw_context *brw)
+{
+#if GEN_GEN >= 8
+ uint16_t high_bits = pinned_bo_high_bits(brw->ib.bo);
+
+ if (high_bits != brw->ib.last_bo_high_bits) {
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_VF_CACHE_INVALIDATE);
+ brw->ib.last_bo_high_bits = high_bits;
+ }
+#endif
+}
+
static void
genX(emit_vertices)(struct brw_context *brw)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
uint32_t *dw;
brw_prepare_vertices(brw);
} else {
brw_batch_emit(brw, GENX(3DSTATE_VF_SGVS), vfs);
}
+#endif
- /* Normally we don't need an element for the SGVS attribute because the
- * 3DSTATE_VF_SGVS instruction lets you store the generated attribute in an
- * element that is past the list in 3DSTATE_VERTEX_ELEMENTS. However if
- * we're using draw parameters then we need an element for the those
- * values. Additionally if there is an edge flag element then the SGVS
- * can't be inserted past that so we need a dummy element to ensure that
- * the edge flag is the last one.
- */
- const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance ||
- ((vs_prog_data->uses_instanceid ||
- vs_prog_data->uses_vertexid)
- && uses_edge_flag));
-#else
- const bool needs_sgvs_element = (vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance ||
+ const bool uses_draw_params =
+ vs_prog_data->uses_firstvertex ||
+ vs_prog_data->uses_baseinstance;
+
+ const bool uses_derived_draw_params =
+ vs_prog_data->uses_drawid ||
+ vs_prog_data->uses_is_indexed_draw;
+
+ const bool needs_sgvs_element = (uses_draw_params ||
vs_prog_data->uses_instanceid ||
vs_prog_data->uses_vertexid);
-#endif
+
unsigned nr_elements =
- brw->vb.nr_enabled + needs_sgvs_element + vs_prog_data->uses_drawid;
+ brw->vb.nr_enabled + needs_sgvs_element + uses_derived_draw_params;
#if GEN_GEN < 8
/* If any of the formats of vb.enabled needs more that one upload, we need
*/
for (unsigned i = 0; i < brw->vb.nr_enabled; i++) {
struct brw_vertex_element *input = brw->vb.enabled[i];
- uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
+ const struct gl_array_attributes *glattrib = input->glattrib;
+ uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
- if (uploads_needed(format) > 1)
+ if (uploads_needed(format, input->is_dual_slot) > 1)
nr_elements++;
}
#endif
}
/* Now emit 3DSTATE_VERTEX_BUFFERS and 3DSTATE_VERTEX_ELEMENTS packets. */
- const bool uses_draw_params =
- vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance;
const unsigned nr_buffers = brw->vb.nr_buffers +
- uses_draw_params + vs_prog_data->uses_drawid;
+ uses_draw_params + uses_derived_draw_params;
+
+ vf_invalidate_for_vb_48bit_transitions(brw);
if (nr_buffers) {
assert(nr_buffers <= (GEN_GEN >= 6 ? 33 : 17));
* vertex element may poke over the end of the buffer by 2 bytes.
*/
const unsigned padding =
- (GEN_GEN <= 7 && !brw->is_baytrail && !brw->is_haswell) * 2;
+ (GEN_GEN <= 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail) * 2;
const unsigned end = buffer->offset + buffer->size + padding;
dw = genX(emit_vertex_buffer_state)(brw, dw, i, buffer->bo,
buffer->offset,
0 /* step rate */);
}
- if (vs_prog_data->uses_drawid) {
+ if (uses_derived_draw_params) {
dw = genX(emit_vertex_buffer_state)(brw, dw, brw->vb.nr_buffers + 1,
- brw->draw.draw_id_bo,
- brw->draw.draw_id_offset,
- brw->draw.draw_id_bo->size,
+ brw->draw.derived_draw_params_bo,
+ brw->draw.derived_draw_params_offset,
+ brw->draw.derived_draw_params_bo->size,
0 /* stride */,
0 /* step rate */);
}
unsigned i;
for (i = 0; i < brw->vb.nr_enabled; i++) {
const struct brw_vertex_element *input = brw->vb.enabled[i];
- uint32_t format = brw_get_vertex_surface_type(brw, input->glarray);
+ const struct gl_array_attributes *glattrib = input->glattrib;
+ uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
uint32_t comp0 = VFCOMP_STORE_SRC;
uint32_t comp1 = VFCOMP_STORE_SRC;
uint32_t comp2 = VFCOMP_STORE_SRC;
uint32_t comp3 = VFCOMP_STORE_SRC;
- const unsigned num_uploads = GEN_GEN < 8 ? uploads_needed(format) : 1;
+ const unsigned num_uploads = GEN_GEN < 8 ?
+ uploads_needed(format, input->is_dual_slot) : 1;
#if GEN_GEN >= 8
/* From the BDW PRM, Volume 2d, page 588 (VERTEX_ELEMENT_STATE):
* entry. */
const unsigned offset = input->offset + c * 16;
+ const struct gl_array_attributes *glattrib = input->glattrib;
const int size = (GEN_GEN < 8 && is_passthru_format(format)) ?
- upload_format_size(upload_format) : input->glarray->Size;
+ upload_format_size(upload_format) : glattrib->Size;
switch (size) {
case 0: comp0 = VFCOMP_STORE_0;
case 1: comp1 = VFCOMP_STORE_0;
case 2: comp2 = VFCOMP_STORE_0;
case 3:
- if (GEN_GEN >= 8 && input->glarray->Doubles) {
+ if (GEN_GEN >= 8 && glattrib->Doubles) {
comp3 = VFCOMP_STORE_0;
- } else if (input->glarray->Integer) {
+ } else if (glattrib->Integer) {
comp3 = VFCOMP_STORE_1_INT;
} else {
comp3 = VFCOMP_STORE_1_FP;
* to be specified as VFCOMP_STORE_0 in order to output a 256-bit
* vertex element."
*/
- if (input->glarray->Doubles && !input->is_dual_slot) {
+ if (glattrib->Doubles && !input->is_dual_slot) {
/* Store vertex elements which correspond to double and dvec2 vertex
* shader inputs as 128-bit vertex elements, instead of 256-bits.
*/
};
#if GEN_GEN >= 8
- if (vs_prog_data->uses_basevertex ||
- vs_prog_data->uses_baseinstance) {
+ if (uses_draw_params) {
elem_state.VertexBufferIndex = brw->vb.nr_buffers;
elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
elem_state.Component0Control = VFCOMP_STORE_SRC;
#else
elem_state.VertexBufferIndex = brw->vb.nr_buffers;
elem_state.SourceElementFormat = ISL_FORMAT_R32G32_UINT;
- if (vs_prog_data->uses_basevertex)
+ if (uses_draw_params) {
elem_state.Component0Control = VFCOMP_STORE_SRC;
-
- if (vs_prog_data->uses_baseinstance)
elem_state.Component1Control = VFCOMP_STORE_SRC;
+ }
if (vs_prog_data->uses_vertexid)
elem_state.Component2Control = VFCOMP_STORE_VID;
dw += GENX(VERTEX_ELEMENT_STATE_length);
}
- if (vs_prog_data->uses_drawid) {
+ if (uses_derived_draw_params) {
struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
.Valid = true,
.VertexBufferIndex = brw->vb.nr_buffers + 1,
- .SourceElementFormat = ISL_FORMAT_R32_UINT,
+ .SourceElementFormat = ISL_FORMAT_R32G32_UINT,
.Component0Control = VFCOMP_STORE_SRC,
- .Component1Control = VFCOMP_STORE_0,
+ .Component1Control = VFCOMP_STORE_SRC,
.Component2Control = VFCOMP_STORE_0,
.Component3Control = VFCOMP_STORE_0,
#if GEN_GEN < 5
#if GEN_GEN >= 6
if (gen6_edgeflag_input) {
- const uint32_t format =
- brw_get_vertex_surface_type(brw, gen6_edgeflag_input->glarray);
+ const struct gl_array_attributes *glattrib = gen6_edgeflag_input->glattrib;
+ const uint32_t format = brw_get_vertex_surface_type(brw, glattrib);
struct GENX(VERTEX_ELEMENT_STATE) elem_state = {
.Valid = true,
.mesa = _NEW_POLYGON,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
+ BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VERTICES |
BRW_NEW_VS_PROG_DATA,
},
if (index_buffer == NULL)
return;
+ vf_invalidate_for_ib_48bit_transition(brw);
+
brw_batch_emit(brw, GENX(3DSTATE_INDEX_BUFFER), ib) {
#if GEN_GEN < 8 && !GEN_IS_HASWELL
ib.CutIndexEnable = brw->prim_restart.enable_cut_index;
#endif
ib.IndexFormat = brw_get_index_type(index_buffer->index_size);
- ib.BufferStartingAddress = vertex_bo(brw->ib.bo, 0);
+
+ /* The VF cache designers apparently cut corners, and made the cache
+ * only consider the bottom 32 bits of memory addresses. If you happen
+ * to have two index buffers which get placed exactly 4 GiB apart and
+ * use them in back-to-back draw calls, you can get collisions. To work
+ * around this problem, we restrict index buffers to the low 32 bits of
+ * the address space.
+ */
+ ib.BufferStartingAddress = ro_32_bo(brw->ib.bo, 0);
#if GEN_GEN >= 8
ib.IndexBufferMOCS = GEN_GEN >= 9 ? SKL_MOCS_WB : BDW_MOCS_WB;
ib.BufferSize = brw->ib.size;
#else
- ib.BufferEndingAddress = vertex_bo(brw->ib.bo, brw->ib.size - 1);
+ ib.BufferEndingAddress = ro_bo(brw->ib.bo, brw->ib.size - 1);
#endif
}
}
/* _NEW_POINT */
const struct gl_point_attrib *point = &ctx->Point;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
+
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *wm_prog_data =
brw_wm_prog_data(brw->wm.base.prog_data);
*point_sprite_enables = 0;
- /* BRW_NEW_FRAGMENT_PROGRAM
- *
- * If the fragment shader reads VARYING_SLOT_LAYER, then we need to pass in
- * the full vertex header. Otherwise, we can program the SF to start
- * reading at an offset of 1 (2 varying slots) to skip unnecessary data:
- * - VARYING_SLOT_PSIZ and BRW_VARYING_SLOT_NDC on gen4-5
- * - VARYING_SLOT_{PSIZ,LAYER} and VARYING_SLOT_POS on gen6+
- */
-
- bool fs_needs_vue_header = brw->fragment_program->info.inputs_read &
- (VARYING_BIT_LAYER | VARYING_BIT_VIEWPORT);
+ int first_slot =
+ brw_compute_first_urb_slot_required(fp->info.inputs_read,
+ &brw->vue_map_geom_out);
- *urb_entry_read_offset = fs_needs_vue_header ? 0 : 1;
+ /* Each URB offset packs two varying slots */
+ assert(first_slot % 2 == 0);
+ *urb_entry_read_offset = first_slot / 2;
/* From the Ivybridge PRM, Vol 2 Part 1, 3DSTATE_SBE,
* description of dw10 Point Sprite Texture Coordinate Enable:
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
-static void
-genX(upload_depth_stencil_state)(struct brw_context *brw)
+#if GEN_GEN >= 8
+typedef struct GENX(3DSTATE_WM_DEPTH_STENCIL) DEPTH_STENCIL_GENXML;
+#elif GEN_GEN >= 6
+typedef struct GENX(DEPTH_STENCIL_STATE) DEPTH_STENCIL_GENXML;
+#else
+typedef struct GENX(COLOR_CALC_STATE) DEPTH_STENCIL_GENXML;
+#endif
+
+static inline void
+set_depth_stencil_bits(struct brw_context *brw, DEPTH_STENCIL_GENXML *ds)
{
struct gl_context *ctx = &brw->ctx;
struct gl_stencil_attrib *stencil = &ctx->Stencil;
const int b = stencil->_BackFace;
+ if (depth->Test && depth_irb) {
+ ds->DepthTestEnable = true;
+ ds->DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
+ ds->DepthTestFunction = intel_translate_compare_func(depth->Func);
+ }
+
+ if (brw->stencil_enabled) {
+ ds->StencilTestEnable = true;
+ ds->StencilWriteMask = stencil->WriteMask[0] & 0xff;
+ ds->StencilTestMask = stencil->ValueMask[0] & 0xff;
+
+ ds->StencilTestFunction =
+ intel_translate_compare_func(stencil->Function[0]);
+ ds->StencilFailOp =
+ intel_translate_stencil_op(stencil->FailFunc[0]);
+ ds->StencilPassDepthPassOp =
+ intel_translate_stencil_op(stencil->ZPassFunc[0]);
+ ds->StencilPassDepthFailOp =
+ intel_translate_stencil_op(stencil->ZFailFunc[0]);
+
+ ds->StencilBufferWriteEnable = brw->stencil_write_enabled;
+
+ if (brw->stencil_two_sided) {
+ ds->DoubleSidedStencilEnable = true;
+ ds->BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
+ ds->BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
+
+ ds->BackfaceStencilTestFunction =
+ intel_translate_compare_func(stencil->Function[b]);
+ ds->BackfaceStencilFailOp =
+ intel_translate_stencil_op(stencil->FailFunc[b]);
+ ds->BackfaceStencilPassDepthPassOp =
+ intel_translate_stencil_op(stencil->ZPassFunc[b]);
+ ds->BackfaceStencilPassDepthFailOp =
+ intel_translate_stencil_op(stencil->ZFailFunc[b]);
+ }
+
+#if GEN_GEN <= 5 || GEN_GEN >= 9
+ ds->StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
+ ds->BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
+#endif
+ }
+}
+
+#if GEN_GEN >= 6
+static void
+genX(upload_depth_stencil_state)(struct brw_context *brw)
+{
#if GEN_GEN >= 8
brw_batch_emit(brw, GENX(3DSTATE_WM_DEPTH_STENCIL), wmds) {
+ set_depth_stencil_bits(brw, &wmds);
+ }
#else
uint32_t ds_offset;
- brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, wmds) {
-#endif
- if (depth->Test && depth_irb) {
- wmds.DepthTestEnable = true;
- wmds.DepthBufferWriteEnable = brw_depth_writes_enabled(brw);
- wmds.DepthTestFunction = intel_translate_compare_func(depth->Func);
- }
-
- if (brw->stencil_enabled) {
- wmds.StencilTestEnable = true;
- wmds.StencilWriteMask = stencil->WriteMask[0] & 0xff;
- wmds.StencilTestMask = stencil->ValueMask[0] & 0xff;
-
- wmds.StencilTestFunction =
- intel_translate_compare_func(stencil->Function[0]);
- wmds.StencilFailOp =
- intel_translate_stencil_op(stencil->FailFunc[0]);
- wmds.StencilPassDepthPassOp =
- intel_translate_stencil_op(stencil->ZPassFunc[0]);
- wmds.StencilPassDepthFailOp =
- intel_translate_stencil_op(stencil->ZFailFunc[0]);
-
- wmds.StencilBufferWriteEnable = brw->stencil_write_enabled;
-
- if (brw->stencil_two_sided) {
- wmds.DoubleSidedStencilEnable = true;
- wmds.BackfaceStencilWriteMask = stencil->WriteMask[b] & 0xff;
- wmds.BackfaceStencilTestMask = stencil->ValueMask[b] & 0xff;
-
- wmds.BackfaceStencilTestFunction =
- intel_translate_compare_func(stencil->Function[b]);
- wmds.BackfaceStencilFailOp =
- intel_translate_stencil_op(stencil->FailFunc[b]);
- wmds.BackfaceStencilPassDepthPassOp =
- intel_translate_stencil_op(stencil->ZPassFunc[b]);
- wmds.BackfaceStencilPassDepthFailOp =
- intel_translate_stencil_op(stencil->ZFailFunc[b]);
- }
-
-#if GEN_GEN >= 9
- wmds.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
- wmds.BackfaceStencilReferenceValue = _mesa_get_stencil_ref(ctx, b);
-#endif
- }
+ brw_state_emit(brw, GENX(DEPTH_STENCIL_STATE), 64, &ds_offset, ds) {
+ set_depth_stencil_bits(brw, &ds);
}
+ /* Now upload a pointer to the indirect state */
#if GEN_GEN == 6
brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
ptr.DEPTH_STENCIL_STATEChange = true;
}
-#elif GEN_GEN == 7
+#else
brw_batch_emit(brw, GENX(3DSTATE_DEPTH_STENCIL_STATE_POINTERS), ptr) {
ptr.PointertoDEPTH_STENCIL_STATE = ds_offset;
}
#endif
+#endif
}
static const struct brw_tracked_state genX(depth_stencil_state) = {
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
+#if GEN_GEN <= 5
+
+static void
+genX(upload_clip_state)(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+ brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) {
+ clip.KernelStartPointer = KSP(brw, brw->clip.prog_offset);
+ clip.GRFRegisterCount =
+ DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1;
+ clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
+ clip.SingleProgramFlow = true;
+ clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length;
+ clip.ConstantURBEntryReadLength = brw->clip.prog_data->curb_read_length;
+
+ /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
+ clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2;
+ clip.DispatchGRFStartRegisterForURBData = 1;
+ clip.VertexURBEntryReadOffset = 0;
+
+ /* BRW_NEW_URB_FENCE */
+ clip.NumberofURBEntries = brw->urb.nr_clip_entries;
+ clip.URBEntryAllocationSize = brw->urb.vsize - 1;
+
+ if (brw->urb.nr_clip_entries >= 10) {
+ /* Half of the URB entries go to each thread, and it has to be an
+ * even number.
+ */
+ assert(brw->urb.nr_clip_entries % 2 == 0);
+
+ /* Although up to 16 concurrent Clip threads are allowed on Ironlake,
+ * only 2 threads can output VUEs at a time.
+ */
+ clip.MaximumNumberofThreads = (GEN_GEN == 5 ? 16 : 2) - 1;
+ } else {
+ assert(brw->urb.nr_clip_entries >= 5);
+ clip.MaximumNumberofThreads = 1 - 1;
+ }
+
+ clip.VertexPositionSpace = VPOS_NDCSPACE;
+ clip.UserClipFlagsMustClipEnable = true;
+ clip.GuardbandClipTestEnable = true;
+
+ clip.ClipperViewportStatePointer =
+ ro_bo(brw->batch.state.bo, brw->clip.vp_offset);
+
+ clip.ScreenSpaceViewportXMin = -1;
+ clip.ScreenSpaceViewportXMax = 1;
+ clip.ScreenSpaceViewportYMin = -1;
+ clip.ScreenSpaceViewportYMax = 1;
+
+ clip.ViewportXYClipTestEnable = true;
+ clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
+ ctx->Transform.DepthClampFar);
+
+ /* _NEW_TRANSFORM */
+ if (GEN_GEN == 5 || GEN_IS_G4X) {
+ clip.UserClipDistanceClipTestEnableBitmask =
+ ctx->Transform.ClipPlanesEnabled;
+ } else {
+ /* Up to 6 actual clip flags, plus the 7th for the negative RHW
+ * workaround.
+ */
+ clip.UserClipDistanceClipTestEnableBitmask =
+ (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40;
+ }
+
+ if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE)
+ clip.APIMode = APIMODE_D3D;
+ else
+ clip.APIMode = APIMODE_OGL;
+
+ clip.GuardbandClipTestEnable = true;
+
+ clip.ClipMode = brw->clip.prog_data->clip_mode;
+
+#if GEN_IS_G4X
+ clip.NegativeWClipTestEnable = true;
+#endif
+ }
+}
+
+const struct brw_tracked_state genX(clip_state) = {
+ .dirty = {
+ .mesa = _NEW_TRANSFORM |
+ _NEW_VIEWPORT,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_CLIP_PROG_DATA |
+ BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+ BRW_NEW_PROGRAM_CACHE |
+ BRW_NEW_URB_FENCE,
+ },
+ .emit = genX(upload_clip_state),
+};
+
+#else
+
static void
genX(upload_clip_state)(struct brw_context *brw)
{
#endif
#if GEN_GEN == 7
- clip.FrontWinding = brw->polygon_front_bit == _mesa_is_user_fbo(fb);
+ clip.FrontWinding = brw->polygon_front_bit != fb->FlipY;
if (ctx->Polygon.CullFlag) {
switch (ctx->Polygon.CullFaceMode) {
clip.UserClipDistanceCullTestEnableBitmask =
brw_vue_prog_data(brw->vs.base.prog_data)->cull_distance_mask;
- clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp;
+ clip.ViewportZClipTestEnable = !(ctx->Transform.DepthClampNear &&
+ ctx->Transform.DepthClampFar);
#endif
/* _NEW_LIGHT */
#if GEN_GEN <= 7
/* _NEW_BUFFERS */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ bool flip_y = ctx->DrawBuffer->FlipY;
UNUSED const bool multisampled_fbo =
_mesa_geometric_samples(ctx->DrawBuffer) > 1;
#endif
ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
brw_state_emit(brw, GENX(SF_STATE), 64, &brw->sf.state_offset, sf) {
- sf.KernelStartPointer = KSP_ro(brw, brw->sf.prog_offset);
+ sf.KernelStartPointer = KSP(brw, brw->sf.prog_offset);
sf.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
sf.GRFRegisterCount = DIV_ROUND_UP(sf_prog_data->total_grf, 16) - 1;
sf.DispatchGRFStartRegisterForURBData = 3;
* domain.
*/
sf.SetupViewportStateOffset =
- instruction_ro_bo(brw->batch.bo, brw->sf.vp_offset);
+ ro_bo(brw->batch.state.bo, brw->sf.vp_offset);
sf.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
#if GEN_GEN <= 7
/* _NEW_POLYGON */
- sf.FrontWinding = brw->polygon_front_bit == render_to_fbo;
+ sf.FrontWinding = brw->polygon_front_bit != flip_y;
#if GEN_GEN >= 6
sf.GlobalDepthOffsetEnableSolid = ctx->Polygon.OffsetFill;
sf.GlobalDepthOffsetEnableWireframe = ctx->Polygon.OffsetLine;
/* _NEW_LINE */
#if GEN_GEN == 8
- if (brw->is_cherryview)
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+
+ if (devinfo->is_cherryview)
sf.CHVLineWidth = brw_get_line_width(brw);
else
sf.LineWidth = brw_get_line_width(brw);
sf.SmoothPointEnable = true;
#endif
+#if GEN_GEN == 10
+ /* _NEW_BUFFERS
+ * Smooth Point Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+ */
+ const bool multisampled_fbo =
+ _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ if (multisampled_fbo)
+ sf.SmoothPointEnable = false;
+#endif
+
#if GEN_IS_G4X || GEN_GEN >= 5
sf.AALineDistanceMode = AALINEDISTANCE_TRUE;
#endif
* Window coordinates in an FBO are inverted, which means point
* sprite origin must be inverted, too.
*/
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo) {
+ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y) {
sf.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
} else {
sf.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
_NEW_POINT |
_NEW_PROGRAM |
(GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0) |
- (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0),
+ (GEN_GEN <= 7 ? _NEW_BUFFERS | _NEW_POLYGON : 0) |
+ (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
.brw = BRW_NEW_BLORP |
BRW_NEW_VUE_MAP_GEOM_OUT |
(GEN_GEN <= 5 ? BRW_NEW_BATCH |
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
+static bool
+brw_color_buffer_write_enabled(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
+ unsigned i;
+
+ /* _NEW_BUFFERS */
+ for (i = 0; i < ctx->DrawBuffer->_NumColorDrawBuffers; i++) {
+ struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+ uint64_t outputs_written = fp->info.outputs_written;
+
+ /* _NEW_COLOR */
+ if (rb && (outputs_written & BITFIELD64_BIT(FRAG_RESULT_COLOR) ||
+ outputs_written & BITFIELD64_BIT(FRAG_RESULT_DATA0 + i)) &&
+ GET_COLORMASK(ctx->Color.ColorMask, i)) {
+ return true;
+ }
+ }
+
+ return false;
+}
+
static void
genX(upload_wm)(struct brw_context *brw)
{
UNUSED bool writes_depth =
wm_prog_data->computed_depth_mode != BRW_PSCDEPTH_OFF;
+ UNUSED struct brw_stage_state *stage_state = &brw->wm.base;
+ UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
-#if GEN_GEN < 7
- const struct brw_stage_state *stage_state = &brw->wm.base;
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
-
+#if GEN_GEN == 6
/* We can't fold this into gen6_upload_wm_push_constants(), because
* according to the SNB PRM, vol 2 part 1 section 7.2.2
* (3DSTATE_CONSTANT_PS [DevSNB]):
/* Pointer to the WM constant buffer. Covered by the set of
* state flags from gen6_upload_wm_push_constants.
*/
- wmcp.PointertoPSConstantBuffer0 = stage_state->push_const_offset;
- wmcp.PSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
+ wmcp.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
+ wmcp.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
}
}
#endif
+#if GEN_GEN >= 6
brw_batch_emit(brw, GENX(3DSTATE_WM), wm) {
- wm.StatisticsEnable = true;
+#else
+ ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+ brw_state_emit(brw, GENX(WM_STATE), 64, &stage_state->state_offset, wm) {
+#endif
+
+#if GEN_GEN <= 6
+ wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
+ wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
+ wm._32PixelDispatchEnable = wm_prog_data->dispatch_32;
+#endif
+
+#if GEN_GEN == 4
+ /* On gen4, we only have one shader kernel */
+ if (brw_wm_state_has_ksp(wm, 0)) {
+ assert(brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0) == 0);
+ wm.KernelStartPointer0 = KSP(brw, stage_state->prog_offset);
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
+ wm.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
+ }
+#elif GEN_GEN == 5
+ /* On gen5, we have multiple shader kernels but only one GRF start
+ * register for all kernels
+ */
+ wm.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
+ wm.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
+ wm.KernelStartPointer2 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
+
+ wm.GRFRegisterCount0 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 0);
+ wm.GRFRegisterCount1 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 1);
+ wm.GRFRegisterCount2 = brw_wm_prog_data_reg_blocks(wm_prog_data, wm, 2);
+
+ wm.DispatchGRFStartRegisterForConstantSetupData0 =
+ wm_prog_data->base.dispatch_grf_start_reg;
+
+ /* Dispatch GRF Start should be the same for all shaders on gen5 */
+ if (brw_wm_state_has_ksp(wm, 1)) {
+ assert(wm_prog_data->base.dispatch_grf_start_reg ==
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1));
+ }
+ if (brw_wm_state_has_ksp(wm, 2)) {
+ assert(wm_prog_data->base.dispatch_grf_start_reg ==
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2));
+ }
+#elif GEN_GEN == 6
+ /* On gen6, we have multiple shader kernels and we no longer specify a
+ * register count for each one.
+ */
+ wm.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 0);
+ wm.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 1);
+ wm.KernelStartPointer2 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(wm_prog_data, wm, 2);
+
+ wm.DispatchGRFStartRegisterForConstantSetupData0 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 0);
+ wm.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 1);
+ wm.DispatchGRFStartRegisterForConstantSetupData2 =
+ brw_wm_prog_data_dispatch_grf_start_reg(wm_prog_data, wm, 2);
+#endif
+
+#if GEN_GEN <= 5
+ wm.ConstantURBEntryReadLength = wm_prog_data->base.curb_read_length;
+ /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */
+ wm.ConstantURBEntryReadOffset = brw->curbe.wm_start * 2;
+ wm.SetupURBEntryReadLength = wm_prog_data->num_varying_inputs * 2;
+ wm.SetupURBEntryReadOffset = 0;
+ wm.EarlyDepthTestEnable = true;
+#endif
+
+#if GEN_GEN >= 6
wm.LineAntialiasingRegionWidth = _10pixels;
wm.LineEndCapAntialiasingRegionWidth = _05pixels;
+ wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
+ wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
+#else
+ if (stage_state->sampler_count)
+ wm.SamplerStatePointer =
+ ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
+
+ wm.LineAntialiasingRegionWidth = _05pixels;
+ wm.LineEndCapAntialiasingRegionWidth = _10pixels;
+
+ /* _NEW_POLYGON */
+ if (ctx->Polygon.OffsetFill) {
+ wm.GlobalDepthOffsetEnable = true;
+ /* Something weird going on with legacy_global_depth_bias,
+ * offset_constant, scaling and MRD. This value passes glean
+ * but gives some odd results elsewere (eg. the
+ * quad-offset-units test).
+ */
+ wm.GlobalDepthOffsetConstant = ctx->Polygon.OffsetUnits * 2;
+
+ /* This is the only value that passes glean:
+ */
+ wm.GlobalDepthOffsetScale = ctx->Polygon.OffsetFactor;
+ }
+
+ wm.DepthCoefficientURBReadOffset = 1;
+#endif
+
+ /* BRW_NEW_STATS_WM */
+ wm.StatisticsEnable = GEN_GEN >= 6 || brw->stats_wm;
+
#if GEN_GEN < 7
if (wm_prog_data->base.use_alt_mode)
- wm.FloatingPointMode = Alternate;
+ wm.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
+
+ wm.SamplerCount = GEN_GEN == 5 ?
+ 0 : DIV_ROUND_UP(stage_state->sampler_count, 4);
- wm.SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4);
- wm.BindingTableEntryCount = wm_prog_data->base.binding_table.size_bytes / 4;
+ wm.BindingTableEntryCount =
+ wm_prog_data->base.binding_table.size_bytes / 4;
wm.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
- wm._8PixelDispatchEnable = wm_prog_data->dispatch_8;
- wm._16PixelDispatchEnable = wm_prog_data->dispatch_16;
- wm.DispatchGRFStartRegisterForConstantSetupData0 =
- wm_prog_data->base.dispatch_grf_start_reg;
- wm.DispatchGRFStartRegisterForConstantSetupData2 =
- wm_prog_data->dispatch_grf_start_reg_2;
- wm.KernelStartPointer0 = stage_state->prog_offset;
- wm.KernelStartPointer2 = stage_state->prog_offset +
- wm_prog_data->prog_offset_2;
+
+#if GEN_GEN == 6
wm.DualSourceBlendEnable =
wm_prog_data->dual_src_blend && (ctx->Color.BlendEnabled & 1) &&
ctx->Color.Blend[0]._UsesDualSrc;
wm.PositionXYOffsetSelect = POSOFFSET_SAMPLE;
else
wm.PositionXYOffsetSelect = POSOFFSET_NONE;
+#endif
if (wm_prog_data->base.total_scratch) {
- wm.ScratchSpaceBasePointer =
- render_bo(stage_state->scratch_bo,
- ffs(stage_state->per_thread_scratch) - 11);
+ wm.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
+ wm.PerThreadScratchSpace =
+ ffs(stage_state->per_thread_scratch) - 11;
}
wm.PixelShaderComputedDepth = writes_depth;
#endif
- wm.PointRasterizationRule = RASTRULE_UPPER_RIGHT;
-
/* _NEW_LINE */
wm.LineStippleEnable = ctx->Line.StippleFlag;
/* _NEW_POLYGON */
wm.PolygonStippleEnable = ctx->Polygon.StippleFlag;
- wm.BarycentricInterpolationMode = wm_prog_data->barycentric_interp_modes;
#if GEN_GEN < 8
- /* _NEW_BUFFERS */
- const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
- wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
+#if GEN_GEN >= 6
wm.PixelShaderUsesSourceW = wm_prog_data->uses_src_w;
- if (wm_prog_data->uses_kill ||
- _mesa_is_alpha_test_enabled(ctx) ||
- _mesa_is_alpha_to_coverage_enabled(ctx) ||
- wm_prog_data->uses_omask) {
- wm.PixelShaderKillsPixel = true;
- }
- /* _NEW_BUFFERS | _NEW_COLOR */
- if (brw_color_buffer_write_enabled(brw) || writes_depth ||
- wm_prog_data->has_side_effects || wm.PixelShaderKillsPixel) {
- wm.ThreadDispatchEnable = true;
- }
+ /* _NEW_BUFFERS */
+ const bool multisampled_fbo = _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+
if (multisampled_fbo) {
/* _NEW_MULTISAMPLE */
if (ctx->Multisample.Enabled)
wm.MultisampleRasterizationMode = MSRASTMODE_OFF_PIXEL;
wm.MultisampleDispatchMode = MSDISPMODE_PERSAMPLE;
}
+#endif
+ wm.PixelShaderUsesSourceDepth = wm_prog_data->uses_src_depth;
+ if (wm_prog_data->uses_kill ||
+ _mesa_is_alpha_test_enabled(ctx) ||
+ _mesa_is_alpha_to_coverage_enabled(ctx) ||
+ (GEN_GEN >= 6 && wm_prog_data->uses_omask)) {
+ wm.PixelShaderKillsPixel = true;
+ }
+
+ /* _NEW_BUFFERS | _NEW_COLOR */
+ if (brw_color_buffer_write_enabled(brw) || writes_depth ||
+ wm.PixelShaderKillsPixel ||
+ (GEN_GEN >= 6 && wm_prog_data->has_side_effects)) {
+ wm.ThreadDispatchEnable = true;
+ }
#if GEN_GEN >= 7
wm.PixelShaderComputedDepthMode = wm_prog_data->computed_depth_mode;
wm.EarlyDepthStencilControl = EDSC_PSEXEC;
#endif
}
+
+#if GEN_GEN <= 5
+ if (brw->wm.offset_clamp != ctx->Polygon.OffsetClamp) {
+ brw_batch_emit(brw, GENX(3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP), clamp) {
+ clamp.GlobalDepthOffsetClamp = ctx->Polygon.OffsetClamp;
+ }
+
+ brw->wm.offset_clamp = ctx->Polygon.OffsetClamp;
+ }
+#endif
}
static const struct brw_tracked_state genX(wm_state) = {
.mesa = _NEW_LINE |
_NEW_POLYGON |
(GEN_GEN < 8 ? _NEW_BUFFERS |
- _NEW_COLOR |
- _NEW_MULTISAMPLE :
+ _NEW_COLOR :
0) |
- (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
+ (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0) |
+ (GEN_GEN < 6 ? _NEW_POLYGONSTIPPLE : 0) |
+ (GEN_GEN < 8 && GEN_GEN >= 6 ? _NEW_MULTISAMPLE : 0),
.brw = BRW_NEW_BLORP |
BRW_NEW_FS_PROG_DATA |
+ (GEN_GEN < 6 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+ BRW_NEW_FRAGMENT_PROGRAM |
+ BRW_NEW_PROGRAM_CACHE |
+ BRW_NEW_SAMPLER_STATE_TABLE |
+ BRW_NEW_STATS_WM
+ : 0) |
(GEN_GEN < 7 ? BRW_NEW_BATCH : BRW_NEW_CONTEXT),
},
.emit = genX(upload_wm),
};
-#endif
/* ---------------------------------------------------------------------- */
+/* We restrict scratch buffers to the bottom 32 bits of the address space
+ * by using rw_32_bo().
+ *
+ * General State Base Address is a bit broken. If the address + size as
+ * seen by STATE_BASE_ADDRESS overflows 48 bits, the GPU appears to treat
+ * all accesses to the buffer as being out of bounds and returns zero.
+ */
+
#define INIT_THREAD_DISPATCH_FIELDS(pkt, prefix) \
pkt.KernelStartPointer = KSP(brw, stage_state->prog_offset); \
pkt.SamplerCount = \
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4); \
+ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to \
+ * disable prefetching of binding tables in A0 and B0 steppings. \
+ * TODO: Revisit this WA on C0 stepping. \
+ */ \
pkt.BindingTableEntryCount = \
+ GEN_GEN == 11 ? \
+ 0 : \
stage_prog_data->binding_table.size_bytes / 4; \
pkt.FloatingPointMode = stage_prog_data->use_alt_mode; \
\
if (stage_prog_data->total_scratch) { \
- pkt.ScratchSpaceBasePointer = \
- render_bo(stage_state->scratch_bo, 0); \
+ pkt.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0); \
pkt.PerThreadScratchSpace = \
ffs(stage_state->per_thread_scratch) - 11; \
} \
assert(vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8 ||
vue_prog_data->dispatch_mode == DISPATCH_MODE_4X2_DUAL_OBJECT);
+ assert(GEN_GEN < 11 ||
+ vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
#if GEN_GEN == 6
/* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), cvs) {
if (stage_state->push_const_size != 0) {
cvs.Buffer0Valid = true;
- cvs.PointertoVSConstantBuffer0 = stage_state->push_const_offset;
- cvs.VSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
+ cvs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
+ cvs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
}
}
#endif
vs.StatisticsEnable = false;
vs.SamplerStatePointer =
- instruction_ro_bo(brw->batch.bo, stage_state->sampler_offset);
+ ro_bo(brw->batch.state.bo, stage_state->sampler_offset);
#endif
#if GEN_GEN == 5
for (unsigned i = 0; i < viewport_count; i++) {
/* _NEW_VIEWPORT | _NEW_TRANSFORM */
const struct gl_viewport_attrib *vp = &ctx->ViewportArray[i];
- if (ctx->Transform.DepthClamp) {
+ if (ctx->Transform.DepthClampNear && ctx->Transform.DepthClampFar) {
ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
+ } else if (ctx->Transform.DepthClampNear) {
+ ccv.MinimumDepth = MIN2(vp->Near, vp->Far);
+ ccv.MaximumDepth = 0.0;
+ } else if (ctx->Transform.DepthClampFar) {
+ ccv.MinimumDepth = 0.0;
+ ccv.MaximumDepth = MAX2(vp->Near, vp->Far);
} else {
ccv.MinimumDepth = 0.0;
ccv.MaximumDepth = 1.0;
/* ---------------------------------------------------------------------- */
-static inline void
+static void
set_scissor_bits(const struct gl_context *ctx, int i,
- bool render_to_fbo, unsigned fb_width, unsigned fb_height,
+ bool flip_y, unsigned fb_width, unsigned fb_height,
struct GENX(SCISSOR_RECT) *sc)
{
int bbox[4];
sc->ScissorRectangleXMax = 0;
sc->ScissorRectangleYMin = 1;
sc->ScissorRectangleYMax = 0;
- } else if (render_to_fbo) {
+ } else if (!flip_y) {
/* texmemory: Y=0=bottom */
sc->ScissorRectangleXMin = bbox[0];
sc->ScissorRectangleXMax = bbox[1] - 1;
genX(upload_scissor_state)(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
- const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool flip_y = ctx->DrawBuffer->FlipY;
struct GENX(SCISSOR_RECT) scissor;
uint32_t scissor_state_offset;
const unsigned int fb_width = _mesa_geometric_width(ctx->DrawBuffer);
* inclusive but max is exclusive.
*/
for (unsigned i = 0; i < viewport_count; i++) {
- set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height, &scissor);
+ set_scissor_bits(ctx, i, flip_y, fb_width, fb_height, &scissor);
GENX(SCISSOR_RECT_pack)(
NULL, scissor_map + i * GENX(SCISSOR_RECT_length), &scissor);
}
*/
const float gb_size = GEN_GEN >= 7 ? 16384.0f : 8192.0f;
+ /* Workaround: prevent gpu hangs on SandyBridge
+ * by disabling guardband clipping for odd dimensions.
+ */
+ if (GEN_GEN == 6 && (fb_width & 1 || fb_height & 1)) {
+ *xmin = -1.0f;
+ *xmax = 1.0f;
+ *ymin = -1.0f;
+ *ymax = 1.0f;
+ return;
+ }
+
if (m00 != 0 && m11 != 0) {
/* First, we compute the screen-space render area */
const float ss_ra_xmin = MIN3( 0, m30 + m00, m30 - m00);
const unsigned viewport_count = brw->clip.viewport_count;
/* _NEW_BUFFERS */
- const bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool flip_y = ctx->DrawBuffer->FlipY;
const uint32_t fb_width = (float)_mesa_geometric_width(ctx->DrawBuffer);
const uint32_t fb_height = (float)_mesa_geometric_height(ctx->DrawBuffer);
#endif
/* _NEW_BUFFERS */
- if (render_to_fbo) {
- y_scale = 1.0;
- y_bias = 0;
- } else {
+ if (flip_y) {
y_scale = -1.0;
y_bias = (float)fb_height;
+ } else {
+ y_scale = 1.0;
+ y_bias = 0;
}
for (unsigned i = 0; i < brw->clip.viewport_count; i++) {
clv.YMaxClipGuardband = gb_ymax;
#if GEN_GEN < 6
- set_scissor_bits(ctx, i, render_to_fbo, fb_width, fb_height,
+ set_scissor_bits(ctx, i, flip_y, fb_width, fb_height,
&sfv.ScissorRectangle);
#elif GEN_GEN >= 8
/* _NEW_VIEWPORT | _NEW_BUFFERS: Screen Space Viewport
* The hardware will take the intersection of the drawing rectangle,
- * scissor rectangle, and the viewport extents. We don't need to be
- * smart, and can therefore just program the viewport extents.
+ * scissor rectangle, and the viewport extents. However, emitting
+ * 3DSTATE_DRAWING_RECTANGLE is expensive since it requires a full
+ * pipeline stall so we're better off just being a little more clever
+ * with our viewport so we can emit it once at context creation time.
*/
+ const float viewport_Xmin = MAX2(ctx->ViewportArray[i].X, 0);
+ const float viewport_Ymin = MAX2(ctx->ViewportArray[i].Y, 0);
const float viewport_Xmax =
- ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width;
+ MIN2(ctx->ViewportArray[i].X + ctx->ViewportArray[i].Width, fb_width);
const float viewport_Ymax =
- ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height;
+ MIN2(ctx->ViewportArray[i].Y + ctx->ViewportArray[i].Height, fb_height);
- if (render_to_fbo) {
- sfv.XMinViewPort = ctx->ViewportArray[i].X;
+ if (flip_y) {
+ sfv.XMinViewPort = viewport_Xmin;
sfv.XMaxViewPort = viewport_Xmax - 1;
- sfv.YMinViewPort = ctx->ViewportArray[i].Y;
- sfv.YMaxViewPort = viewport_Ymax - 1;
+ sfv.YMinViewPort = fb_height - viewport_Ymax;
+ sfv.YMaxViewPort = fb_height - viewport_Ymin - 1;
} else {
- sfv.XMinViewPort = ctx->ViewportArray[i].X;
+ sfv.XMinViewPort = viewport_Xmin;
sfv.XMaxViewPort = viewport_Xmax - 1;
- sfv.YMinViewPort = fb_height - viewport_Ymax;
- sfv.YMaxViewPort = fb_height - ctx->ViewportArray[i].Y - 1;
+ sfv.YMinViewPort = viewport_Ymin;
+ sfv.YMaxViewPort = viewport_Ymax - 1;
}
#endif
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
static void
genX(upload_gs_state)(struct brw_context *brw)
{
- const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ UNUSED struct gl_context *ctx = &brw->ctx;
+ UNUSED const struct gen_device_info *devinfo = &brw->screen->devinfo;
const struct brw_stage_state *stage_state = &brw->gs.base;
+ const struct gl_program *gs_prog = brw->programs[MESA_SHADER_GEOMETRY];
/* BRW_NEW_GEOMETRY_PROGRAM */
- bool active = brw->geometry_program;
+ bool active = GEN_GEN >= 6 && gs_prog;
/* BRW_NEW_GS_PROG_DATA */
struct brw_stage_prog_data *stage_prog_data = stage_state->prog_data;
- const struct brw_vue_prog_data *vue_prog_data =
+ UNUSED const struct brw_vue_prog_data *vue_prog_data =
brw_vue_prog_data(stage_prog_data);
#if GEN_GEN >= 7
const struct brw_gs_prog_data *gs_prog_data =
brw_gs_prog_data(stage_prog_data);
#endif
-#if GEN_GEN < 7
+#if GEN_GEN == 6
brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_GS), cgs) {
if (active && stage_state->push_const_size != 0) {
cgs.Buffer0Valid = true;
- cgs.PointertoGSConstantBuffer0 = stage_state->push_const_offset;
- cgs.GSConstantBuffer0ReadLength = stage_state->push_const_size - 1;
+ cgs.ConstantBody.PointertoConstantBuffer0 = stage_state->push_const_offset;
+ cgs.ConstantBody.ConstantBuffer0ReadLength = stage_state->push_const_size - 1;
}
}
#endif
* whole fixed function pipeline" means to emit a PIPE_CONTROL with the "CS
* Stall" bit set.
*/
- if (brw->gt == 2 && brw->gs.enabled != active)
+ if (devinfo->gt == 2 && brw->gs.enabled != active)
gen7_emit_cs_stall_flush(brw);
#endif
- if (active) {
- brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+#if GEN_GEN >= 6
+ brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
+#else
+ ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+ brw_state_emit(brw, GENX(GS_STATE), 32, &brw->ff_gs.state_offset, gs) {
+#endif
+
+#if GEN_GEN >= 6
+ if (active) {
INIT_THREAD_DISPATCH_FIELDS(gs, Vertex);
#if GEN_GEN >= 7
#if GEN_GEN < 7
gs.SOStatisticsEnable = true;
- gs.RenderingEnabled = 1;
- if (brw->geometry_program->info.has_transform_feedback_varyings)
- gs.SVBIPayloadEnable = true;
+ if (gs_prog->info.has_transform_feedback_varyings)
+ gs.SVBIPayloadEnable = _mesa_is_xfb_active_and_unpaused(ctx);
/* GEN6_GS_SPF_MODE and GEN6_GS_VECTOR_MASK_ENABLE are enabled as it
* was previously done for gen6.
gs.VertexURBEntryOutputLength = MAX2(urb_entry_output_length, 1);
#endif
}
-#if GEN_GEN < 7
- } else if (brw->ff_gs.prog_active) {
- /* In gen6, transform feedback for the VS stage is done with an ad-hoc GS
- * program. This function provides the needed 3DSTATE_GS for this.
- */
- upload_gs_state_for_tf(brw);
-#endif
- } else {
- brw_batch_emit(brw, GENX(3DSTATE_GS), gs) {
- gs.StatisticsEnable = true;
-#if GEN_GEN < 7
- gs.RenderingEnabled = true;
#endif
+#if GEN_GEN <= 6
+ if (!active && brw->ff_gs.prog_active) {
+ /* In gen6, transform feedback for the VS stage is done with an
+ * ad-hoc GS program. This function provides the needed 3DSTATE_GS
+ * for this.
+ */
+ gs.KernelStartPointer = KSP(brw, brw->ff_gs.prog_offset);
+ gs.SingleProgramFlow = true;
+ gs.DispatchGRFStartRegisterForURBData = GEN_GEN == 6 ? 2 : 1;
+ gs.VertexURBEntryReadLength = brw->ff_gs.prog_data->urb_read_length;
+
+#if GEN_GEN <= 5
+ gs.GRFRegisterCount =
+ DIV_ROUND_UP(brw->ff_gs.prog_data->total_grf, 16) - 1;
+ /* BRW_NEW_URB_FENCE */
+ gs.NumberofURBEntries = brw->urb.nr_gs_entries;
+ gs.URBEntryAllocationSize = brw->urb.vsize - 1;
+ gs.MaximumNumberofThreads = brw->urb.nr_gs_entries >= 8 ? 1 : 0;
+ gs.FloatingPointMode = FLOATING_POINT_MODE_Alternate;
+#else
+ gs.Enable = true;
+ gs.VectorMaskEnable = true;
+ gs.SVBIPayloadEnable = true;
+ gs.SVBIPostIncrementEnable = true;
+ gs.SVBIPostIncrementValue =
+ brw->ff_gs.prog_data->svbi_postincrement_value;
+ gs.SOStatisticsEnable = true;
+ gs.MaximumNumberofThreads = devinfo->max_gs_threads - 1;
+#endif
+ }
+#endif
+ if (!active && !brw->ff_gs.prog_active) {
#if GEN_GEN < 8
gs.DispatchGRFStartRegisterForURBData = 1;
#if GEN_GEN >= 7
#endif
#endif
}
+
+#if GEN_GEN >= 6
+ gs.StatisticsEnable = true;
+#endif
+#if GEN_GEN == 5 || GEN_GEN == 6
+ gs.RenderingEnabled = true;
+#endif
+#if GEN_GEN <= 5
+ gs.MaximumVPIndex = brw->clip.viewport_count - 1;
+#endif
}
-#if GEN_GEN < 7
+
+#if GEN_GEN == 6
brw->gs.enabled = active;
#endif
}
static const struct brw_tracked_state genX(gs_state) = {
.dirty = {
- .mesa = (GEN_GEN < 7 ? _NEW_PROGRAM_CONSTANTS : 0),
+ .mesa = (GEN_GEN == 6 ? _NEW_PROGRAM_CONSTANTS : 0),
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_CONTEXT |
- BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA |
+ (GEN_GEN <= 5 ? BRW_NEW_PUSH_CONSTANT_ALLOCATION |
+ BRW_NEW_PROGRAM_CACHE |
+ BRW_NEW_URB_FENCE |
+ BRW_NEW_VIEWPORT_COUNT
+ : 0) |
+ (GEN_GEN >= 6 ? BRW_NEW_CONTEXT |
+ BRW_NEW_GEOMETRY_PROGRAM |
+ BRW_NEW_GS_PROG_DATA
+ : 0) |
(GEN_GEN < 7 ? BRW_NEW_FF_GS_PROG_DATA : 0),
},
.emit = genX(upload_gs_state),
};
-#endif
/* ---------------------------------------------------------------------- */
-UNUSED static GLenum
-fix_dual_blend_alpha_to_one(GLenum function)
+UNUSED static GLenum
+fix_dual_blend_alpha_to_one(GLenum function)
+{
+ switch (function) {
+ case GL_SRC1_ALPHA:
+ return GL_ONE;
+
+ case GL_ONE_MINUS_SRC1_ALPHA:
+ return GL_ZERO;
+ }
+
+ return function;
+}
+
+#define blend_factor(x) brw_translate_blend_factor(x)
+#define blend_eqn(x) brw_translate_blend_equation(x)
+
+/**
+ * Modify blend function to force destination alpha to 1.0
+ *
+ * If \c function specifies a blend function that uses destination alpha,
+ * replace it with a function that hard-wires destination alpha to 1.0. This
+ * is used when rendering to xRGB targets.
+ */
+static GLenum
+brw_fix_xRGB_alpha(GLenum function)
{
switch (function) {
- case GL_SRC1_ALPHA:
+ case GL_DST_ALPHA:
return GL_ONE;
- case GL_ONE_MINUS_SRC1_ALPHA:
+ case GL_ONE_MINUS_DST_ALPHA:
+ case GL_SRC_ALPHA_SATURATE:
return GL_ZERO;
}
return function;
}
-#define blend_factor(x) brw_translate_blend_factor(x)
-#define blend_eqn(x) brw_translate_blend_equation(x)
+#if GEN_GEN >= 6
+typedef struct GENX(BLEND_STATE_ENTRY) BLEND_ENTRY_GENXML;
+#else
+typedef struct GENX(COLOR_CALC_STATE) BLEND_ENTRY_GENXML;
+#endif
+
+UNUSED static bool
+set_blend_entry_bits(struct brw_context *brw, BLEND_ENTRY_GENXML *entry, int i,
+ bool alpha_to_one)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ /* _NEW_BUFFERS */
+ const struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
+
+ bool independent_alpha_blend = false;
+
+ /* Used for implementing the following bit of GL_EXT_texture_integer:
+ * "Per-fragment operations that require floating-point color
+ * components, including multisample alpha operations, alpha test,
+ * blending, and dithering, have no effect when the corresponding
+ * colors are written to an integer color buffer."
+ */
+ const bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
+
+ const unsigned blend_enabled = GEN_GEN >= 6 ?
+ ctx->Color.BlendEnabled & (1 << i) : ctx->Color.BlendEnabled;
+
+ /* _NEW_COLOR */
+ if (ctx->Color.ColorLogicOpEnabled) {
+ GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
+ : GL_UNSIGNED_NORMALIZED;
+ WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
+ rb_type != GL_UNSIGNED_NORMALIZED &&
+ rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
+ "renderbuffer\n",
+ _mesa_enum_to_string(ctx->Color.LogicOp),
+ _mesa_enum_to_string(rb_type));
+ if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
+ entry->LogicOpEnable = true;
+ entry->LogicOpFunction = ctx->Color._LogicOp;
+ }
+ } else if (blend_enabled && !ctx->Color._AdvancedBlendMode
+ && (GEN_GEN <= 5 || !integer)) {
+ GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
+ GLenum eqA = ctx->Color.Blend[i].EquationA;
+ GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
+ GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
+ GLenum srcA = ctx->Color.Blend[i].SrcA;
+ GLenum dstA = ctx->Color.Blend[i].DstA;
+
+ if (eqRGB == GL_MIN || eqRGB == GL_MAX)
+ srcRGB = dstRGB = GL_ONE;
+
+ if (eqA == GL_MIN || eqA == GL_MAX)
+ srcA = dstA = GL_ONE;
+
+ /* Due to hardware limitations, the destination may have information
+ * in an alpha channel even when the format specifies no alpha
+ * channel. In order to avoid getting any incorrect blending due to
+ * that alpha channel, coerce the blend factors to values that will
+ * not read the alpha channel, but will instead use the correct
+ * implicit value for alpha.
+ */
+ if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
+ GL_TEXTURE_ALPHA_TYPE)) {
+ srcRGB = brw_fix_xRGB_alpha(srcRGB);
+ srcA = brw_fix_xRGB_alpha(srcA);
+ dstRGB = brw_fix_xRGB_alpha(dstRGB);
+ dstA = brw_fix_xRGB_alpha(dstA);
+ }
+
+ /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
+ * "If Dual Source Blending is enabled, this bit must be disabled."
+ *
+ * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
+ * and leave it enabled anyway.
+ */
+ if (GEN_GEN >= 6 && ctx->Color.Blend[i]._UsesDualSrc && alpha_to_one) {
+ srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
+ srcA = fix_dual_blend_alpha_to_one(srcA);
+ dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
+ dstA = fix_dual_blend_alpha_to_one(dstA);
+ }
+
+ entry->ColorBufferBlendEnable = true;
+ entry->DestinationBlendFactor = blend_factor(dstRGB);
+ entry->SourceBlendFactor = blend_factor(srcRGB);
+ entry->DestinationAlphaBlendFactor = blend_factor(dstA);
+ entry->SourceAlphaBlendFactor = blend_factor(srcA);
+ entry->ColorBlendFunction = blend_eqn(eqRGB);
+ entry->AlphaBlendFunction = blend_eqn(eqA);
+
+ if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
+ independent_alpha_blend = true;
+ }
+
+ return independent_alpha_blend;
+}
#if GEN_GEN >= 6
static void
#else
{
#endif
-
- /* _NEW_BUFFERS */
- struct gl_renderbuffer *rb = ctx->DrawBuffer->_ColorDrawBuffers[i];
-
- /* Used for implementing the following bit of GL_EXT_texture_integer:
- * "Per-fragment operations that require floating-point color
- * components, including multisample alpha operations, alpha test,
- * blending, and dithering, have no effect when the corresponding
- * colors are written to an integer color buffer."
- */
- bool integer = ctx->DrawBuffer->_IntegerBuffers & (0x1 << i);
-
- /* _NEW_COLOR */
- if (ctx->Color.ColorLogicOpEnabled) {
- GLenum rb_type = rb ? _mesa_get_format_datatype(rb->Format)
- : GL_UNSIGNED_NORMALIZED;
- WARN_ONCE(ctx->Color.LogicOp != GL_COPY &&
- rb_type != GL_UNSIGNED_NORMALIZED &&
- rb_type != GL_FLOAT, "Ignoring %s logic op on %s "
- "renderbuffer\n",
- _mesa_enum_to_string(ctx->Color.LogicOp),
- _mesa_enum_to_string(rb_type));
- if (GEN_GEN >= 8 || rb_type == GL_UNSIGNED_NORMALIZED) {
- entry.LogicOpEnable = true;
- entry.LogicOpFunction =
- intel_translate_logic_op(ctx->Color.LogicOp);
- }
- } else if (ctx->Color.BlendEnabled & (1 << i) && !integer &&
- !ctx->Color._AdvancedBlendMode) {
- GLenum eqRGB = ctx->Color.Blend[i].EquationRGB;
- GLenum eqA = ctx->Color.Blend[i].EquationA;
- GLenum srcRGB = ctx->Color.Blend[i].SrcRGB;
- GLenum dstRGB = ctx->Color.Blend[i].DstRGB;
- GLenum srcA = ctx->Color.Blend[i].SrcA;
- GLenum dstA = ctx->Color.Blend[i].DstA;
-
- if (eqRGB == GL_MIN || eqRGB == GL_MAX)
- srcRGB = dstRGB = GL_ONE;
-
- if (eqA == GL_MIN || eqA == GL_MAX)
- srcA = dstA = GL_ONE;
-
- /* Due to hardware limitations, the destination may have information
- * in an alpha channel even when the format specifies no alpha
- * channel. In order to avoid getting any incorrect blending due to
- * that alpha channel, coerce the blend factors to values that will
- * not read the alpha channel, but will instead use the correct
- * implicit value for alpha.
- */
- if (rb && !_mesa_base_format_has_channel(rb->_BaseFormat,
- GL_TEXTURE_ALPHA_TYPE)) {
- srcRGB = brw_fix_xRGB_alpha(srcRGB);
- srcA = brw_fix_xRGB_alpha(srcA);
- dstRGB = brw_fix_xRGB_alpha(dstRGB);
- dstA = brw_fix_xRGB_alpha(dstA);
- }
-
- /* From the BLEND_STATE docs, DWord 0, Bit 29 (AlphaToOne Enable):
- * "If Dual Source Blending is enabled, this bit must be disabled."
- *
- * We override SRC1_ALPHA to ONE and ONE_MINUS_SRC1_ALPHA to ZERO,
- * and leave it enabled anyway.
- */
- if (ctx->Color.Blend[i]._UsesDualSrc && blend.AlphaToOneEnable) {
- srcRGB = fix_dual_blend_alpha_to_one(srcRGB);
- srcA = fix_dual_blend_alpha_to_one(srcA);
- dstRGB = fix_dual_blend_alpha_to_one(dstRGB);
- dstA = fix_dual_blend_alpha_to_one(dstA);
- }
-
- entry.ColorBufferBlendEnable = true;
- entry.DestinationBlendFactor = blend_factor(dstRGB);
- entry.SourceBlendFactor = blend_factor(srcRGB);
- entry.DestinationAlphaBlendFactor = blend_factor(dstA);
- entry.SourceAlphaBlendFactor = blend_factor(srcA);
- entry.ColorBlendFunction = blend_eqn(eqRGB);
- entry.AlphaBlendFunction = blend_eqn(eqA);
-
- if (srcA != srcRGB || dstA != dstRGB || eqA != eqRGB)
- blend.IndependentAlphaBlendEnable = true;
- }
+ blend.IndependentAlphaBlendEnable =
+ set_blend_entry_bits(brw, &entry, i, blend.AlphaToOneEnable) ||
+ blend.IndependentAlphaBlendEnable;
/* See section 8.1.6 "Pre-Blend Color Clamping" of the
* SandyBridge PRM Volume 2 Part 1 for HW requirements.
entry.PostBlendColorClampEnable = true;
entry.ColorClampRange = COLORCLAMP_RTFORMAT;
- entry.WriteDisableRed = !ctx->Color.ColorMask[i][0];
- entry.WriteDisableGreen = !ctx->Color.ColorMask[i][1];
- entry.WriteDisableBlue = !ctx->Color.ColorMask[i][2];
- entry.WriteDisableAlpha = !ctx->Color.ColorMask[i][3];
+ entry.WriteDisableRed = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 0);
+ entry.WriteDisableGreen = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 1);
+ entry.WriteDisableBlue = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 2);
+ entry.WriteDisableAlpha = !GET_COLORMASK_BIT(ctx->Color.ColorMask, i, 3);
#if GEN_GEN >= 8
GENX(BLEND_STATE_ENTRY_pack)(NULL, &blend_map[1 + i * 2], &entry);
};
static void
-upload_constant_state(struct brw_context *brw,
- struct brw_stage_state *stage_state,
- bool active, uint32_t stage)
+genX(upload_push_constant_packets)(struct brw_context *brw)
{
+ const struct gen_device_info *devinfo = &brw->screen->devinfo;
+ struct gl_context *ctx = &brw->ctx;
+
UNUSED uint32_t mocs = GEN_GEN < 8 ? GEN7_MOCS_L3 : 0;
- active = active && stage_state->push_const_size != 0;
- brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
- pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
- if (active) {
+ struct brw_stage_state *stage_states[] = {
+ &brw->vs.base,
+ &brw->tcs.base,
+ &brw->tes.base,
+ &brw->gs.base,
+ &brw->wm.base,
+ };
+
+ if (GEN_GEN == 7 && !GEN_IS_HASWELL && !devinfo->is_baytrail &&
+ stage_states[MESA_SHADER_VERTEX]->push_constants_dirty)
+ gen7_emit_vs_workaround_flush(brw);
+
+ for (int stage = 0; stage <= MESA_SHADER_FRAGMENT; stage++) {
+ struct brw_stage_state *stage_state = stage_states[stage];
+ UNUSED struct gl_program *prog = ctx->_Shader->CurrentProgram[stage];
+
+ if (!stage_state->push_constants_dirty)
+ continue;
+
+ brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_VS), pkt) {
+ pkt._3DCommandSubOpcode = push_constant_opcodes[stage];
+ if (stage_state->prog_data) {
#if GEN_GEN >= 8 || GEN_IS_HASWELL
- pkt.ConstantBody.ReadLength[2] = stage_state->push_const_size;
- pkt.ConstantBody.Buffer[2] =
- render_ro_bo(brw->curbe.curbe_bo, stage_state->push_const_offset);
+ /* The Skylake PRM contains the following restriction:
+ *
+ * "The driver must ensure The following case does not occur
+ * without a flush to the 3D engine: 3DSTATE_CONSTANT_* with
+ * buffer 3 read length equal to zero committed followed by a
+ * 3DSTATE_CONSTANT_* with buffer 0 read length not equal to
+ * zero committed."
+ *
+ * To avoid this, we program the buffers in the highest slots.
+ * This way, slot 0 is only used if slot 3 is also used.
+ */
+ int n = 3;
+
+ for (int i = 3; i >= 0; i--) {
+ const struct brw_ubo_range *range =
+ &stage_state->prog_data->ubo_ranges[i];
+
+ if (range->length == 0)
+ continue;
+
+ const struct gl_uniform_block *block =
+ prog->sh.UniformBlocks[range->block];
+ const struct gl_buffer_binding *binding =
+ &ctx->UniformBufferBindings[block->Binding];
+
+ if (binding->BufferObject == ctx->Shared->NullBufferObj) {
+ static unsigned msg_id = 0;
+ _mesa_gl_debug(ctx, &msg_id, MESA_DEBUG_SOURCE_API,
+ MESA_DEBUG_TYPE_UNDEFINED,
+ MESA_DEBUG_SEVERITY_HIGH,
+ "UBO %d unbound, %s shader uniform data "
+ "will be undefined.",
+ range->block,
+ _mesa_shader_stage_to_string(stage));
+ continue;
+ }
+
+ assert(binding->Offset % 32 == 0);
+
+ struct brw_bo *bo = intel_bufferobj_buffer(brw,
+ intel_buffer_object(binding->BufferObject),
+ binding->Offset, range->length * 32, false);
+
+ pkt.ConstantBody.ReadLength[n] = range->length;
+ pkt.ConstantBody.Buffer[n] =
+ ro_bo(bo, range->start * 32 + binding->Offset);
+ n--;
+ }
+
+ if (stage_state->push_const_size > 0) {
+ assert(n >= 0);
+ pkt.ConstantBody.ReadLength[n] = stage_state->push_const_size;
+ pkt.ConstantBody.Buffer[n] =
+ ro_bo(stage_state->push_const_bo,
+ stage_state->push_const_offset);
+ }
#else
- pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
- pkt.ConstantBody.Buffer[0].offset =
- stage_state->push_const_offset | mocs;
+ pkt.ConstantBody.ReadLength[0] = stage_state->push_const_size;
+ pkt.ConstantBody.Buffer[0].offset =
+ stage_state->push_const_offset | mocs;
#endif
+ }
}
- }
- brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
+ stage_state->push_constants_dirty = false;
+ brw->ctx.NewDriverState |= GEN_GEN >= 9 ? BRW_NEW_SURFACES : 0;
+ }
}
+
+const struct brw_tracked_state genX(push_constant_packets) = {
+ .dirty = {
+ .mesa = 0,
+ .brw = BRW_NEW_DRAW_CALL,
+ },
+ .emit = genX(upload_push_constant_packets),
+};
#endif
#if GEN_GEN >= 6
{
struct brw_stage_state *stage_state = &brw->vs.base;
- /* _BRW_NEW_VERTEX_PROGRAM */
- const struct brw_program *vp = brw_program_const(brw->vertex_program);
+ /* BRW_NEW_VERTEX_PROGRAM */
+ const struct gl_program *vp = brw->programs[MESA_SHADER_VERTEX];
/* BRW_NEW_VS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->vs.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_VERTEX);
- gen6_upload_push_constants(brw, &vp->program, prog_data, stage_state);
-
-#if GEN_GEN >= 7
- if (GEN_GEN == 7 && !GEN_IS_HASWELL && !brw->is_baytrail)
- gen7_emit_vs_workaround_flush(brw);
-
- upload_constant_state(brw, stage_state, true /* active */,
- MESA_SHADER_VERTEX);
-#endif
+ gen6_upload_push_constants(brw, vp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(vs_push_constants) = {
_NEW_TRANSFORM,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_VERTEX_PROGRAM |
BRW_NEW_VS_PROG_DATA,
},
struct brw_stage_state *stage_state = &brw->gs.base;
/* BRW_NEW_GEOMETRY_PROGRAM */
- const struct brw_program *gp = brw_program_const(brw->geometry_program);
-
- if (gp) {
- /* BRW_NEW_GS_PROG_DATA */
- struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
+ const struct gl_program *gp = brw->programs[MESA_SHADER_GEOMETRY];
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_GEOMETRY);
- gen6_upload_push_constants(brw, &gp->program, prog_data, stage_state);
- }
+ /* BRW_NEW_GS_PROG_DATA */
+ struct brw_stage_prog_data *prog_data = brw->gs.base.prog_data;
-#if GEN_GEN >= 7
- upload_constant_state(brw, stage_state, gp, MESA_SHADER_GEOMETRY);
-#endif
+ gen6_upload_push_constants(brw, gp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(gs_push_constants) = {
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_GEOMETRY_PROGRAM |
- BRW_NEW_GS_PROG_DATA |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+ BRW_NEW_GS_PROG_DATA,
},
.emit = genX(upload_gs_push_constants),
};
{
struct brw_stage_state *stage_state = &brw->wm.base;
/* BRW_NEW_FRAGMENT_PROGRAM */
- const struct brw_program *fp = brw_program_const(brw->fragment_program);
+ const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
/* BRW_NEW_FS_PROG_DATA */
const struct brw_stage_prog_data *prog_data = brw->wm.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_FRAGMENT);
-
- gen6_upload_push_constants(brw, &fp->program, prog_data, stage_state);
-
-#if GEN_GEN >= 7
- upload_constant_state(brw, stage_state, true, MESA_SHADER_FRAGMENT);
-#endif
+ gen6_upload_push_constants(brw, fp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(wm_push_constants) = {
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_FRAGMENT_PROGRAM |
- BRW_NEW_FS_PROG_DATA |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION,
+ BRW_NEW_FS_PROG_DATA,
},
.emit = genX(upload_wm_push_constants),
};
genX(emit_3dstate_multisample2)(struct brw_context *brw,
unsigned num_samples)
{
- assert(brw->num_samples <= 16);
-
- unsigned log2_samples = ffs(MAX2(num_samples, 1)) - 1;
+ unsigned log2_samples = ffs(num_samples) - 1;
brw_batch_emit(brw, GENX(3DSTATE_MULTISAMPLE), multi) {
multi.PixelLocation = CENTER;
static void
genX(upload_multisample_state)(struct brw_context *brw)
{
+ assert(brw->num_samples > 0 && brw->num_samples <= 16);
+
genX(emit_3dstate_multisample2)(brw, brw->num_samples);
brw_batch_emit(brw, GENX(3DSTATE_SAMPLE_MASK), sm) {
static const struct brw_tracked_state genX(multisample_state) = {
.dirty = {
- .mesa = _NEW_MULTISAMPLE,
+ .mesa = _NEW_MULTISAMPLE |
+ (GEN_GEN == 10 ? _NEW_BUFFERS : 0),
.brw = BRW_NEW_BLORP |
BRW_NEW_CONTEXT |
BRW_NEW_NUM_SAMPLES,
/* ---------------------------------------------------------------------- */
-#if GEN_GEN >= 6
static void
genX(upload_color_calc_state)(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
brw_state_emit(brw, GENX(COLOR_CALC_STATE), 64, &brw->cc.state_offset, cc) {
+#if GEN_GEN <= 5
+ cc.IndependentAlphaBlendEnable =
+ set_blend_entry_bits(brw, &cc, 0, false);
+ set_depth_stencil_bits(brw, &cc);
+
+ if (ctx->Color.AlphaEnabled &&
+ ctx->DrawBuffer->_NumColorDrawBuffers <= 1) {
+ cc.AlphaTestEnable = true;
+ cc.AlphaTestFunction =
+ intel_translate_compare_func(ctx->Color.AlphaFunc);
+ }
+
+ cc.ColorDitherEnable = ctx->Color.DitherFlag;
+
+ cc.StatisticsEnable = brw->stats_wm;
+
+ cc.CCViewportStatePointer =
+ ro_bo(brw->batch.state.bo, brw->cc.vp_offset);
+#else
/* _NEW_COLOR */
- cc.AlphaTestFormat = ALPHATEST_UNORM8;
- UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8,
- ctx->Color.AlphaRef);
+ cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
+ cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
+ cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
+ cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
#if GEN_GEN < 9
/* _NEW_STENCIL */
cc.StencilReferenceValue = _mesa_get_stencil_ref(ctx, 0);
cc.BackfaceStencilReferenceValue =
_mesa_get_stencil_ref(ctx, ctx->Stencil._BackFace);
+#endif
+
#endif
/* _NEW_COLOR */
- cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
- cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
- cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
- cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
+ UNCLAMPED_FLOAT_TO_UBYTE(cc.AlphaReferenceValueAsUNORM8,
+ ctx->Color.AlphaRef);
}
+#if GEN_GEN >= 6
brw_batch_emit(brw, GENX(3DSTATE_CC_STATE_POINTERS), ptr) {
ptr.ColorCalcStatePointer = brw->cc.state_offset;
#if GEN_GEN != 7
ptr.ColorCalcStatePointerValid = true;
#endif
}
+#else
+ brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE;
+#endif
}
static const struct brw_tracked_state genX(color_calc_state) = {
.dirty = {
.mesa = _NEW_COLOR |
- _NEW_STENCIL,
+ _NEW_STENCIL |
+ (GEN_GEN <= 5 ? _NEW_BUFFERS |
+ _NEW_DEPTH
+ : 0),
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_CC_STATE |
- BRW_NEW_STATE_BASE_ADDRESS,
+ (GEN_GEN <= 5 ? BRW_NEW_CC_VP |
+ BRW_NEW_STATS_WM
+ : BRW_NEW_CC_STATE |
+ BRW_NEW_STATE_BASE_ADDRESS),
},
.emit = genX(upload_color_calc_state),
};
-#endif
/* ---------------------------------------------------------------------- */
genX(upload_sbe)(struct brw_context *brw)
{
struct gl_context *ctx = &brw->ctx;
+ /* BRW_NEW_FRAGMENT_PROGRAM */
+ UNUSED const struct gl_program *fp = brw->programs[MESA_SHADER_FRAGMENT];
/* BRW_NEW_FS_PROG_DATA */
const struct brw_wm_prog_data *wm_prog_data =
brw_wm_prog_data(brw->wm.base.prog_data);
sbe.NumberofSFOutputAttributes = wm_prog_data->num_varying_inputs;
/* _NEW_BUFFERS */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ bool flip_y = ctx->DrawBuffer->FlipY;
/* _NEW_POINT
*
* Window coordinates in an FBO are inverted, which means point
* sprite origin must be inverted.
*/
- if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) != render_to_fbo)
+ if ((ctx->Point.SpriteOrigin == GL_LOWER_LEFT) == flip_y)
sbe.PointSpriteTextureCoordinateOrigin = LOWERLEFT;
else
sbe.PointSpriteTextureCoordinateOrigin = UPPERLEFT;
#if GEN_GEN >= 9
/* prepare the active component dwords */
- int input_index = 0;
- for (int attr = 0; attr < VARYING_SLOT_MAX; attr++) {
- if (!(brw->fragment_program->info.inputs_read &
- BITFIELD64_BIT(attr))) {
- continue;
- }
-
- assert(input_index < 32);
-
- sbe.AttributeActiveComponentFormat[input_index] = ACTIVE_COMPONENT_XYZW;
- ++input_index;
- }
+ for (int i = 0; i < 32; i++)
+ sbe.AttributeActiveComponentFormat[i] = ACTIVE_COMPONENT_XYZW;
#endif
}
for (int i = 0; i < 4; i++) {
struct intel_buffer_object *bufferobj =
intel_buffer_object(xfb_obj->Buffers[i]);
+ uint32_t start = xfb_obj->Offset[i];
+ uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
+ uint32_t const size = end - start;
- if (!bufferobj) {
+ if (!bufferobj || !size) {
brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
sob.SOBufferIndex = i;
}
continue;
}
- uint32_t start = xfb_obj->Offset[i];
assert(start % 4 == 0);
- uint32_t end = ALIGN(start + xfb_obj->Size[i], 4);
struct brw_bo *bo =
- intel_bufferobj_buffer(brw, bufferobj, start, end - start);
+ intel_bufferobj_buffer(brw, bufferobj, start, size, true);
assert(end <= bo->size);
brw_batch_emit(brw, GENX(3DSTATE_SO_BUFFER), sob) {
sob.SOBufferIndex = i;
- sob.SurfaceBaseAddress = render_bo(bo, start);
+ sob.SurfaceBaseAddress = rw_bo(bo, start);
#if GEN_GEN < 8
sob.SurfacePitch = linked_xfb_info->Buffers[i].Stride * 4;
- sob.SurfaceEndAddress = render_bo(bo, end);
+ sob.SurfaceEndAddress = rw_bo(bo, end);
#else
sob.SOBufferEnable = true;
sob.StreamOffsetWriteEnable = true;
sob.SurfaceSize = MAX2(xfb_obj->Size[i] / 4, 1) - 1;
sob.StreamOutputBufferOffsetAddress =
- instruction_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
+ rw_bo(brw_obj->offset_bo, i * sizeof(uint32_t));
if (brw_obj->zero_offsets) {
/* Zero out the offset and write that to offset_bo */
#endif
}
-static inline bool
+static bool
query_active(struct gl_query_object *q)
{
return q && q->Active;
sos.RenderingDisable = true;
} else {
perf_debug("Rasterizer discard with a GL_PRIMITIVES_GENERATED "
- "query active relies on the clipper.");
+ "query active relies on the clipper.\n");
}
}
DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4);
/* BRW_NEW_FS_PROG_DATA */
- ps.BindingTableEntryCount = prog_data->base.binding_table.size_bytes / 4;
+ /* Gen 11 workarounds table #2056 WABTPPrefetchDisable suggests to disable
+ * prefetching of binding tables in A0 and B0 steppings.
+ * TODO: Revisit this workaround on C0 stepping.
+ */
+ ps.BindingTableEntryCount = GEN_GEN == 11 ?
+ 0 :
+ prog_data->base.binding_table.size_bytes / 4;
if (prog_data->base.use_alt_mode)
ps.FloatingPointMode = Alternate;
ps.SampleMask = genX(determine_sample_mask(brw));
#endif
- /* 3DSTATE_PS expects the number of threads per PSD, which is always 64;
- * it implicitly scales for different GT levels (which have some # of
- * PSDs).
+ /* 3DSTATE_PS expects the number of threads per PSD, which is always 64
+ * for pre Gen11 and 128 for gen11+; On gen11+ If a programmed value is
+ * k, it implies 2(k+1) threads. It implicitly scales for different GT
+ * levels (which have some # of PSDs).
*
- * In Gen8 the format is U8-2 whereas in Gen9 it is U8-1.
+ * In Gen8 the format is U8-2 whereas in Gen9+ it is U9-1.
*/
#if GEN_GEN >= 9
ps.MaximumNumberofThreadsPerPSD = 64 - 1;
ps.MaximumNumberofThreads = devinfo->max_wm_threads - 1;
#endif
- if (prog_data->base.nr_params > 0)
+ if (prog_data->base.nr_params > 0 ||
+ prog_data->base.ubo_ranges[0].length > 0)
ps.PushConstantEnable = true;
#if GEN_GEN < 8
else
ps.PositionXYOffsetSelect = POSOFFSET_NONE;
- ps.RenderTargetFastClearEnable = brw->wm.fast_clear_op;
ps._8PixelDispatchEnable = prog_data->dispatch_8;
ps._16PixelDispatchEnable = prog_data->dispatch_16;
+ ps._32PixelDispatchEnable = prog_data->dispatch_32;
+
+ /* From the Sky Lake PRM 3DSTATE_PS::32 Pixel Dispatch Enable:
+ *
+ * "When NUM_MULTISAMPLES = 16 or FORCE_SAMPLE_COUNT = 16, SIMD32
+ * Dispatch must not be enabled for PER_PIXEL dispatch mode."
+ *
+ * Since 16x MSAA is first introduced on SKL, we don't need to apply
+ * the workaround on any older hardware.
+ *
+ * BRW_NEW_NUM_SAMPLES
+ */
+ if (GEN_GEN >= 9 && !prog_data->persample_dispatch &&
+ brw->num_samples == 16) {
+ assert(ps._8PixelDispatchEnable || ps._16PixelDispatchEnable);
+ ps._32PixelDispatchEnable = false;
+ }
+
ps.DispatchGRFStartRegisterForConstantSetupData0 =
- prog_data->base.dispatch_grf_start_reg;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 0);
+ ps.DispatchGRFStartRegisterForConstantSetupData1 =
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 1);
ps.DispatchGRFStartRegisterForConstantSetupData2 =
- prog_data->dispatch_grf_start_reg_2;
+ brw_wm_prog_data_dispatch_grf_start_reg(prog_data, ps, 2);
- ps.KernelStartPointer0 = stage_state->prog_offset;
+ ps.KernelStartPointer0 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 0);
+ ps.KernelStartPointer1 = stage_state->prog_offset +
+ brw_wm_prog_data_prog_offset(prog_data, ps, 1);
ps.KernelStartPointer2 = stage_state->prog_offset +
- prog_data->prog_offset_2;
+ brw_wm_prog_data_prog_offset(prog_data, ps, 2);
if (prog_data->base.total_scratch) {
ps.ScratchSpaceBasePointer =
- render_bo(stage_state->scratch_bo,
- ffs(stage_state->per_thread_scratch) - 11);
+ rw_32_bo(stage_state->scratch_bo,
+ ffs(stage_state->per_thread_scratch) - 11);
}
}
}
: 0),
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_FS_PROG_DATA,
+ BRW_NEW_FS_PROG_DATA |
+ (GEN_GEN >= 9 ? BRW_NEW_NUM_SAMPLES : 0),
},
.emit = genX(upload_ps),
};
if (!tes_prog_data) {
brw_batch_emit(brw, GENX(3DSTATE_DS), ds);
} else {
+ assert(GEN_GEN < 11 ||
+ vue_prog_data->dispatch_mode == DISPATCH_MODE_SIMD8);
+
brw_batch_emit(brw, GENX(3DSTATE_DS), ds) {
INIT_THREAD_DISPATCH_FIELDS(ds, Patch);
upload_te_state(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
- bool active = brw->tess_eval_program;
+ bool active = brw->programs[MESA_SHADER_TESS_EVAL];
/* BRW_NEW_TES_PROG_DATA */
const struct brw_tes_prog_data *tes_prog_data =
{
struct brw_stage_state *stage_state = &brw->tes.base;
/* BRW_NEW_TESS_PROGRAMS */
- const struct brw_program *tep = brw_program_const(brw->tess_eval_program);
-
- if (tep) {
- /* BRW_NEW_TES_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_EVAL);
- gen6_upload_push_constants(brw, &tep->program, prog_data, stage_state);
- }
+ const struct gl_program *tep = brw->programs[MESA_SHADER_TESS_EVAL];
- upload_constant_state(brw, stage_state, tep, MESA_SHADER_TESS_EVAL);
+ /* BRW_NEW_TES_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->tes.base.prog_data;
+ gen6_upload_push_constants(brw, tep, prog_data, stage_state);
}
static const struct brw_tracked_state genX(tes_push_constants) = {
.mesa = _NEW_PROGRAM_CONSTANTS,
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_TESS_PROGRAMS |
BRW_NEW_TES_PROG_DATA,
},
{
struct brw_stage_state *stage_state = &brw->tcs.base;
/* BRW_NEW_TESS_PROGRAMS */
- const struct brw_program *tcp = brw_program_const(brw->tess_ctrl_program);
- bool active = brw->tess_eval_program;
-
- if (active) {
- /* BRW_NEW_TCS_PROG_DATA */
- const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
+ const struct gl_program *tcp = brw->programs[MESA_SHADER_TESS_CTRL];
- _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_TESS_CTRL);
- gen6_upload_push_constants(brw, &tcp->program, prog_data, stage_state);
- }
+ /* BRW_NEW_TCS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->tcs.base.prog_data;
- upload_constant_state(brw, stage_state, active, MESA_SHADER_TESS_CTRL);
+ gen6_upload_push_constants(brw, tcp, prog_data, stage_state);
}
static const struct brw_tracked_state genX(tcs_push_constants) = {
.brw = BRW_NEW_BATCH |
BRW_NEW_BLORP |
BRW_NEW_DEFAULT_TESS_LEVELS |
- BRW_NEW_PUSH_CONSTANT_ALLOCATION |
BRW_NEW_TESS_PROGRAMS |
BRW_NEW_TCS_PROG_DATA,
},
/* ---------------------------------------------------------------------- */
#if GEN_GEN >= 7
+static void
+genX(upload_cs_push_constants)(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ const struct gl_program *cp = brw->programs[MESA_SHADER_COMPUTE];
+
+ if (cp) {
+ /* BRW_NEW_CS_PROG_DATA */
+ struct brw_cs_prog_data *cs_prog_data =
+ brw_cs_prog_data(brw->cs.base.prog_data);
+
+ _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
+ brw_upload_cs_push_constants(brw, cp, cs_prog_data, stage_state);
+ }
+}
+
+const struct brw_tracked_state genX(cs_push_constants) = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA,
+ },
+ .emit = genX(upload_cs_push_constants),
+};
+
+/**
+ * Creates a new CS constant buffer reflecting the current CS program's
+ * constants, if needed by the CS program.
+ */
+static void
+genX(upload_cs_pull_constants)(struct brw_context *brw)
+{
+ struct brw_stage_state *stage_state = &brw->cs.base;
+
+ /* BRW_NEW_COMPUTE_PROGRAM */
+ struct brw_program *cp =
+ (struct brw_program *) brw->programs[MESA_SHADER_COMPUTE];
+
+ /* BRW_NEW_CS_PROG_DATA */
+ const struct brw_stage_prog_data *prog_data = brw->cs.base.prog_data;
+
+ _mesa_shader_write_subroutine_indices(&brw->ctx, MESA_SHADER_COMPUTE);
+ /* _NEW_PROGRAM_CONSTANTS */
+ brw_upload_pull_constants(brw, BRW_NEW_SURFACES, &cp->program,
+ stage_state, prog_data);
+}
+
+const struct brw_tracked_state genX(cs_pull_constants) = {
+ .dirty = {
+ .mesa = _NEW_PROGRAM_CONSTANTS,
+ .brw = BRW_NEW_BATCH |
+ BRW_NEW_BLORP |
+ BRW_NEW_COMPUTE_PROGRAM |
+ BRW_NEW_CS_PROG_DATA,
+ },
+ .emit = genX(upload_cs_pull_constants),
+};
+
static void
genX(upload_cs_state)(struct brw_context *brw)
{
brw, &stage_state->surf_offset[
prog_data->binding_table.shader_time_start],
brw->shader_time.bo, 0, ISL_FORMAT_RAW,
- brw->shader_time.bo->size, 1, true);
+ brw->shader_time.bo->size, 1,
+ RELOC_WRITE);
}
uint32_t *bind = brw_state_batch(brw, prog_data->binding_table.size_bytes,
32, &stage_state->bind_bo_offset);
+ /* The MEDIA_VFE_STATE documentation for Gen8+ says:
+ *
+ * "A stalling PIPE_CONTROL is required before MEDIA_VFE_STATE unless
+ * the only bits that are changed are scoreboard related: Scoreboard
+ * Enable, Scoreboard Type, Scoreboard Mask, Scoreboard * Delta. For
+ * these scoreboard related states, a MEDIA_STATE_FLUSH is sufficient."
+ *
+ * Earlier generations say "MI_FLUSH" instead of "stalling PIPE_CONTROL",
+ * but MI_FLUSH isn't really a thing, so we assume they meant PIPE_CONTROL.
+ */
+ brw_emit_pipe_control_flush(brw, PIPE_CONTROL_CS_STALL);
+
brw_batch_emit(brw, GENX(MEDIA_VFE_STATE), vfe) {
if (prog_data->total_scratch) {
- uint32_t bo_offset;
+ uint32_t per_thread_scratch_value;
if (GEN_GEN >= 8) {
/* Broadwell's Per Thread Scratch Space is in the range [0, 11]
* where 0 = 1k, 1 = 2k, 2 = 4k, ..., 11 = 2M.
*/
- bo_offset = ffs(stage_state->per_thread_scratch) - 11;
+ per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 11;
} else if (GEN_IS_HASWELL) {
/* Haswell's Per Thread Scratch Space is in the range [0, 10]
* where 0 = 2k, 1 = 4k, 2 = 8k, ..., 10 = 2M.
*/
- bo_offset = ffs(stage_state->per_thread_scratch) - 12;
+ per_thread_scratch_value = ffs(stage_state->per_thread_scratch) - 12;
} else {
/* Earlier platforms use the range [0, 11] to mean [1kB, 12kB]
* where 0 = 1kB, 1 = 2kB, 2 = 3kB, ..., 11 = 12kB.
*/
- bo_offset = stage_state->per_thread_scratch / 1024 - 1;
+ per_thread_scratch_value = stage_state->per_thread_scratch / 1024 - 1;
}
- vfe.ScratchSpaceBasePointer =
- render_bo(stage_state->scratch_bo, bo_offset);
+ vfe.ScratchSpaceBasePointer = rw_32_bo(stage_state->scratch_bo, 0);
+ vfe.PerThreadScratchSpace = per_thread_scratch_value;
}
+ /* If brw->screen->subslice_total is greater than one, then
+ * devinfo->max_cs_threads stores number of threads per sub-slice;
+ * thus we need to multiply by that number by subslices to get
+ * the actual maximum number of threads; the -1 is because the HW
+ * has a bias of 1 (would not make sense to say the maximum number
+ * of threads is 0).
+ */
const uint32_t subslices = MAX2(brw->screen->subslice_total, 1);
vfe.MaximumNumberofThreads = devinfo->max_cs_threads * subslices - 1;
vfe.NumberofURBEntries = GEN_GEN >= 8 ? 2 : 0;
+#if GEN_GEN < 11
vfe.ResetGatewayTimer =
Resettingrelativetimerandlatchingtheglobaltimestamp;
+#endif
#if GEN_GEN < 9
vfe.BypassGatewayControl = BypassingOpenGatewayCloseGatewayprotocol;
#endif
const struct GENX(INTERFACE_DESCRIPTOR_DATA) idd = {
.KernelStartPointer = brw->cs.base.prog_offset,
.SamplerStatePointer = stage_state->sampler_offset,
- .SamplerCount = DIV_ROUND_UP(stage_state->sampler_count, 4) >> 2,
+ .SamplerCount = DIV_ROUND_UP(CLAMP(stage_state->sampler_count, 0, 16), 4),
.BindingTablePointer = stage_state->bind_bo_offset,
.ConstantURBEntryReadLength = cs_prog_data->push.per_thread.regs,
.NumberofThreadsinGPGPUThreadGroup = cs_prog_data->threads,
- .SharedLocalMemorySize = encode_slm_size(devinfo->gen,
+ .SharedLocalMemorySize = encode_slm_size(GEN_GEN,
prog_data->total_shared),
.BarrierEnable = cs_prog_data->uses_barrier,
#if GEN_GEN >= 8 || GEN_IS_HASWELL
static void
genX(upload_raster)(struct brw_context *brw)
{
- struct gl_context *ctx = &brw->ctx;
+ const struct gl_context *ctx = &brw->ctx;
/* _NEW_BUFFERS */
- bool render_to_fbo = _mesa_is_user_fbo(ctx->DrawBuffer);
+ const bool flip_y = ctx->DrawBuffer->FlipY;
/* _NEW_POLYGON */
- struct gl_polygon_attrib *polygon = &ctx->Polygon;
+ const struct gl_polygon_attrib *polygon = &ctx->Polygon;
/* _NEW_POINT */
- struct gl_point_attrib *point = &ctx->Point;
+ const struct gl_point_attrib *point = &ctx->Point;
brw_batch_emit(brw, GENX(3DSTATE_RASTER), raster) {
- if (brw->polygon_front_bit == render_to_fbo)
+ if (brw->polygon_front_bit != flip_y)
raster.FrontWinding = CounterClockwise;
if (polygon->CullFlag) {
raster.CullMode = CULLMODE_NONE;
}
- point->SmoothFlag = raster.SmoothPointEnable;
+ raster.SmoothPointEnable = point->SmoothFlag;
raster.DXMultisampleRasterizationEnable =
_mesa_is_multisample_enabled(ctx);
/* _NEW_LINE */
raster.AntialiasingEnable = ctx->Line.SmoothFlag;
+#if GEN_GEN == 10
+ /* _NEW_BUFFERS
+ * Antialiasing Enable bit MUST not be set when NUM_MULTISAMPLES > 1.
+ */
+ const bool multisampled_fbo =
+ _mesa_geometric_samples(ctx->DrawBuffer) > 1;
+ if (multisampled_fbo)
+ raster.AntialiasingEnable = false;
+#endif
+
/* _NEW_SCISSOR */
raster.ScissorRectangleEnable = ctx->Scissor.EnableFlags;
/* _NEW_TRANSFORM */
- if (!ctx->Transform.DepthClamp) {
+#if GEN_GEN < 9
+ if (!(ctx->Transform.DepthClampNear &&
+ ctx->Transform.DepthClampFar))
+ raster.ViewportZClipTestEnable = true;
+#endif
+
#if GEN_GEN >= 9
- raster.ViewportZFarClipTestEnable = true;
+ if (!ctx->Transform.DepthClampNear)
raster.ViewportZNearClipTestEnable = true;
-#else
- raster.ViewportZClipTestEnable = true;
+
+ if (!ctx->Transform.DepthClampFar)
+ raster.ViewportZFarClipTestEnable = true;
#endif
- }
/* BRW_NEW_CONSERVATIVE_RASTERIZATION */
#if GEN_GEN >= 9
uint32_t report_id)
{
brw_batch_emit(brw, GENX(MI_REPORT_PERF_COUNT), mi_rpc) {
- mi_rpc.MemoryAddress = instruction_bo(bo, offset_in_bytes);
+ mi_rpc.MemoryAddress = ggtt_bo(bo, offset_in_bytes);
mi_rpc.ReportID = report_id;
}
}
* Emit a 3DSTATE_SAMPLER_STATE_POINTERS_{VS,HS,GS,DS,PS} packet.
*/
static void
-genX(emit_sampler_state_pointers_xs)(struct brw_context *brw,
- struct brw_stage_state *stage_state)
+genX(emit_sampler_state_pointers_xs)(MAYBE_UNUSED struct brw_context *brw,
+ MAYBE_UNUSED struct brw_stage_state *stage_state)
{
#if GEN_GEN >= 7
static const uint16_t packet_headers[] = {
static void
genX(upload_default_color)(struct brw_context *brw,
const struct gl_sampler_object *sampler,
- mesa_format format, GLenum base_format,
+ MAYBE_UNUSED mesa_format format, GLenum base_format,
bool is_integer_format, bool is_stencil_sampling,
uint32_t *sdc_offset)
{
color.ui[3] = float_as_int(1.0);
int alignment = 32;
- if (brw->gen >= 8) {
+ if (GEN_GEN >= 8) {
alignment = 64;
- } else if (brw->is_haswell && (is_integer_format || is_stencil_sampling)) {
+ } else if (GEN_IS_HASWELL && (is_integer_format || is_stencil_sampling)) {
alignment = 512;
}
}
static uint32_t
-translate_wrap_mode(struct brw_context *brw, GLenum wrap, bool using_nearest)
+translate_wrap_mode(GLenum wrap, MAYBE_UNUSED bool using_nearest)
{
switch (wrap) {
case GL_REPEAT:
*
* Gen8+ supports this natively.
*/
- return TCM_HALF_BORDER;
-#endif
-
+ return TCM_HALF_BORDER;
+#else
/* On Gen4-7.5, we clamp the coordinates in the fragment shader
* and set clamp_border here, which gets the result desired.
* We just use clamp(_to_edge) for nearest, because for nearest
return TCM_CLAMP;
else
return TCM_CLAMP_BORDER;
+#endif
case GL_CLAMP_TO_EDGE:
return TCM_CLAMP;
case GL_CLAMP_TO_BORDER:
mesa_format format, GLenum base_format,
const struct gl_texture_object *texObj,
const struct gl_sampler_object *sampler,
- uint32_t *sampler_state,
- uint32_t batch_offset_for_sampler_state)
+ uint32_t *sampler_state)
{
struct GENX(SAMPLER_STATE) samp_st = { 0 };
if (sampler->MaxAnisotropy > 1.0f) {
if (samp_st.MinModeFilter == MAPFILTER_LINEAR)
samp_st.MinModeFilter = MAPFILTER_ANISOTROPIC;
- if (samp_st.MinModeFilter == MAPFILTER_LINEAR)
+ if (samp_st.MagModeFilter == MAPFILTER_LINEAR)
samp_st.MagModeFilter = MAPFILTER_ANISOTROPIC;
if (sampler->MaxAnisotropy > 2.0f) {
bool either_nearest =
sampler->MinFilter == GL_NEAREST || sampler->MagFilter == GL_NEAREST;
- unsigned wrap_s = translate_wrap_mode(brw, sampler->WrapS, either_nearest);
- unsigned wrap_t = translate_wrap_mode(brw, sampler->WrapT, either_nearest);
- unsigned wrap_r = translate_wrap_mode(brw, sampler->WrapR, either_nearest);
+ unsigned wrap_s = translate_wrap_mode(sampler->WrapS, either_nearest);
+ unsigned wrap_t = translate_wrap_mode(sampler->WrapT, either_nearest);
+ unsigned wrap_r = translate_wrap_mode(sampler->WrapR, either_nearest);
if (target == GL_TEXTURE_CUBE_MAP ||
target == GL_TEXTURE_CUBE_MAP_ARRAY) {
texObj->StencilSampling,
&border_color_offset);
}
-
- samp_st.BorderColorPointer = border_color_offset;
-
- if (GEN_GEN < 6) {
- samp_st.BorderColorPointer += brw->batch.bo->offset64; /* reloc */
- brw_emit_reloc(&brw->batch, batch_offset_for_sampler_state + 8,
- brw->batch.bo, border_color_offset,
- I915_GEM_DOMAIN_SAMPLER, 0);
- }
+#if GEN_GEN < 6
+ samp_st.BorderColorPointer =
+ ro_bo(brw->batch.state.bo, border_color_offset);
+#else
+ samp_st.BorderColorPointer = border_color_offset;
+#endif
#if GEN_GEN >= 8
samp_st.LODPreClampMode = CLAMP_MODE_OGL;
static void
update_sampler_state(struct brw_context *brw,
int unit,
- uint32_t *sampler_state,
- uint32_t batch_offset_for_sampler_state)
+ uint32_t *sampler_state)
{
struct gl_context *ctx = &brw->ctx;
const struct gl_texture_unit *texUnit = &ctx->Texture.Unit[unit];
texUnit->LodBias,
firstImage->TexFormat, firstImage->_BaseFormat,
texObj, sampler,
- sampler_state, batch_offset_for_sampler_state);
+ sampler_state);
}
static void
32, &stage_state->sampler_offset);
/* memset(sampler_state, 0, sampler_count * size_in_bytes); */
- uint32_t batch_offset_for_sampler_state = stage_state->sampler_offset;
-
for (unsigned s = 0; s < sampler_count; s++) {
if (SamplersUsed & (1 << s)) {
const unsigned unit = prog->SamplerUnits[s];
if (ctx->Texture.Unit[unit]._Current) {
- update_sampler_state(brw, unit, sampler_state,
- batch_offset_for_sampler_state);
+ update_sampler_state(brw, unit, sampler_state);
}
}
sampler_state += dwords;
- batch_offset_for_sampler_state += size_in_bytes;
}
if (GEN_GEN >= 7 && stage_state->stage != MESA_SHADER_COMPUTE) {
genX(upload_fs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_FRAGMENT_PROGRAM */
- struct gl_program *fs = (struct gl_program *) brw->fragment_program;
+ struct gl_program *fs = brw->programs[MESA_SHADER_FRAGMENT];
genX(upload_sampler_state_table)(brw, fs, &brw->wm.base);
}
genX(upload_vs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_VERTEX_PROGRAM */
- struct gl_program *vs = (struct gl_program *) brw->vertex_program;
+ struct gl_program *vs = brw->programs[MESA_SHADER_VERTEX];
genX(upload_sampler_state_table)(brw, vs, &brw->vs.base);
}
genX(upload_gs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_GEOMETRY_PROGRAM */
- struct gl_program *gs = (struct gl_program *) brw->geometry_program;
+ struct gl_program *gs = brw->programs[MESA_SHADER_GEOMETRY];
if (!gs)
return;
genX(upload_tcs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
- struct gl_program *tcs = (struct gl_program *) brw->tess_ctrl_program;
+ struct gl_program *tcs = brw->programs[MESA_SHADER_TESS_CTRL];
if (!tcs)
return;
genX(upload_tes_samplers)(struct brw_context *brw)
{
/* BRW_NEW_TESS_PROGRAMS */
- struct gl_program *tes = (struct gl_program *) brw->tess_eval_program;
+ struct gl_program *tes = brw->programs[MESA_SHADER_TESS_EVAL];
if (!tes)
return;
genX(upload_cs_samplers)(struct brw_context *brw)
{
/* BRW_NEW_COMPUTE_PROGRAM */
- struct gl_program *cs = (struct gl_program *) brw->compute_program;
+ struct gl_program *cs = brw->programs[MESA_SHADER_COMPUTE];
if (!cs)
return;
/* ---------------------------------------------------------------------- */
+#if GEN_GEN <= 5
+
+static void genX(upload_blend_constant_color)(struct brw_context *brw)
+{
+ struct gl_context *ctx = &brw->ctx;
+
+ brw_batch_emit(brw, GENX(3DSTATE_CONSTANT_COLOR), blend_cc) {
+ blend_cc.BlendConstantColorRed = ctx->Color.BlendColorUnclamped[0];
+ blend_cc.BlendConstantColorGreen = ctx->Color.BlendColorUnclamped[1];
+ blend_cc.BlendConstantColorBlue = ctx->Color.BlendColorUnclamped[2];
+ blend_cc.BlendConstantColorAlpha = ctx->Color.BlendColorUnclamped[3];
+ }
+}
+
+static const struct brw_tracked_state genX(blend_constant_color) = {
+ .dirty = {
+ .mesa = _NEW_COLOR,
+ .brw = BRW_NEW_CONTEXT |
+ BRW_NEW_BLORP,
+ },
+ .emit = genX(upload_blend_constant_color)
+};
+#endif
+
+/* ---------------------------------------------------------------------- */
+
void
genX(init_atoms)(struct brw_context *brw)
{
&brw_recalculate_urb_fence,
&genX(cc_vp),
- &brw_cc_unit,
+ &genX(color_calc_state),
/* Surface state setup. Must come before the VS/WM unit. The binding
* table upload must be last.
&genX(vs_samplers),
/* These set up state for brw_psp_urb_cbs */
- &brw_wm_unit,
+ &genX(wm_state),
&genX(sf_clip_viewport),
&genX(sf_state),
&genX(vs_state), /* always required, enabled or not */
- &brw_clip_unit,
- &brw_gs_unit,
+ &genX(clip_state),
+ &genX(gs_state),
/* Command packets:
*/
- &brw_invariant_state,
-
&brw_binding_table_pointers,
- &brw_blend_constant_color,
+ &genX(blend_constant_color),
&brw_depthbuffer,
*/
&brw_vs_pull_constants,
&brw_vs_ubo_surfaces,
- &brw_vs_abo_surfaces,
&brw_tcs_pull_constants,
&brw_tcs_ubo_surfaces,
- &brw_tcs_abo_surfaces,
&brw_tes_pull_constants,
&brw_tes_ubo_surfaces,
- &brw_tes_abo_surfaces,
&brw_gs_pull_constants,
&brw_gs_ubo_surfaces,
- &brw_gs_abo_surfaces,
&brw_wm_pull_constants,
&brw_wm_ubo_surfaces,
- &brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
&brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
+
+ &genX(push_constant_packets),
+
&brw_vs_binding_table,
&brw_tcs_binding_table,
&brw_tes_binding_table,
&genX(scissor_state),
- &gen7_depthbuffer,
+ &brw_depthbuffer,
&genX(polygon_stipple),
&genX(polygon_stipple_offset),
*/
&brw_vs_pull_constants,
&brw_vs_ubo_surfaces,
- &brw_vs_abo_surfaces,
&brw_tcs_pull_constants,
&brw_tcs_ubo_surfaces,
- &brw_tcs_abo_surfaces,
&brw_tes_pull_constants,
&brw_tes_ubo_surfaces,
- &brw_tes_abo_surfaces,
&brw_gs_pull_constants,
&brw_gs_ubo_surfaces,
- &brw_gs_abo_surfaces,
&brw_wm_pull_constants,
&brw_wm_ubo_surfaces,
- &brw_wm_abo_surfaces,
&gen6_renderbuffer_surfaces,
&brw_renderbuffer_read_surfaces,
&brw_texture_surfaces,
+
+ &genX(push_constant_packets),
+
&brw_vs_binding_table,
&brw_tcs_binding_table,
&brw_tes_binding_table,
&genX(scissor_state),
- &gen7_depthbuffer,
+ &brw_depthbuffer,
&genX(polygon_stipple),
&genX(polygon_stipple_offset),
{
&gen7_l3_state,
&brw_cs_image_surfaces,
- &gen7_cs_push_constants,
- &brw_cs_pull_constants,
+ &genX(cs_push_constants),
+ &genX(cs_pull_constants),
&brw_cs_ubo_surfaces,
- &brw_cs_abo_surfaces,
&brw_cs_texture_surfaces,
&brw_cs_work_groups_surface,
&genX(cs_samplers),