* sizeof(float))
/** \} */
-void
-gen6_blorp_emit_batch_head(struct brw_context *brw,
- const brw_blorp_params *params)
-{
- struct gl_context *ctx = &brw->intel.ctx;
-
- /* To ensure that the batch contains only the resolve, flush the batch
- * before beginning and after finishing emitting the resolve packets.
- */
- intel_flush(ctx);
-}
-
-
/**
* CMD_STATE_BASE_ADDRESS
*
gen6_blorp_emit_state_base_address(struct brw_context *brw,
const brw_blorp_params *params)
{
- struct intel_context *intel = &brw->intel;
+ uint8_t mocs = brw->gen == 7 ? GEN7_MOCS_L3 : 0;
BEGIN_BATCH(10);
OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2));
- OUT_BATCH(1); /* GeneralStateBaseAddressModifyEnable */
+ OUT_BATCH(mocs << 8 | /* GeneralStateMemoryObjectControlState */
+ mocs << 4 | /* StatelessDataPortAccessMemoryObjectControlState */
+ 1); /* GeneralStateBaseAddressModifyEnable */
+
/* SurfaceStateBaseAddress */
- OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
+ OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1);
/* DynamicStateBaseAddress */
- OUT_RELOC(intel->batch.bo, (I915_GEM_DOMAIN_RENDER |
+ OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER |
I915_GEM_DOMAIN_INSTRUCTION), 0, 1);
OUT_BATCH(1); /* IndirectObjectBaseAddress */
if (params->use_wm_prog) {
gen6_blorp_emit_vertices(struct brw_context *brw,
const brw_blorp_params *params)
{
- struct intel_context *intel = &brw->intel;
uint32_t vertex_offset;
/* Setup VBO for the rectangle primitive..
uint32_t dw0 = GEN6_VB0_ACCESS_VERTEXDATA |
(GEN6_BLORP_NUM_VUE_ELEMS * sizeof(float)) << BRW_VB0_PITCH_SHIFT;
- if (intel->gen >= 7)
+ if (brw->gen >= 7)
dw0 |= GEN7_VB0_ADDRESS_MODIFYENABLE;
+ if (brw->gen == 7)
+ dw0 |= GEN7_MOCS_L3 << 16;
+
BEGIN_BATCH(batch_length);
OUT_BATCH((_3DSTATE_VERTEX_BUFFERS << 16) | (batch_length - 2));
OUT_BATCH(dw0);
/* start address */
- OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+ OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
vertex_offset);
/* end address */
- OUT_RELOC(intel->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
+ OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_VERTEX, 0,
vertex_offset + GEN6_BLORP_VBO_SIZE - 1);
OUT_BATCH(0);
ADVANCE_BATCH();
blend->blend1.write_disable_b = params->color_write_disable[2];
blend->blend1.write_disable_a = params->color_write_disable[3];
- /* When blitting from an XRGB source to a ARGB destination, we need to
- * interpret the missing channel as 1.0. Blending can do that for us:
- * we simply use the RGB values from the fragment shader ("source RGB"),
- * but smash the alpha channel to 1.
- */
- if (params->src.mt &&
- _mesa_get_format_bits(params->dst.mt->format, GL_ALPHA_BITS) > 0 &&
- _mesa_get_format_bits(params->src.mt->format, GL_ALPHA_BITS) == 0) {
- blend->blend0.blend_enable = 1;
- blend->blend0.ia_blend_enable = 1;
-
- blend->blend0.blend_func = BRW_BLENDFUNCTION_ADD;
- blend->blend0.ia_blend_func = BRW_BLENDFUNCTION_ADD;
-
- blend->blend0.source_blend_factor = BRW_BLENDFACTOR_SRC_COLOR;
- blend->blend0.dest_blend_factor = BRW_BLENDFACTOR_ZERO;
- blend->blend0.ia_source_blend_factor = BRW_BLENDFACTOR_ONE;
- blend->blend0.ia_dest_blend_factor = BRW_BLENDFACTOR_ZERO;
- }
-
return cc_blend_state_offset;
}
width /= 2;
height /= 2;
}
- struct intel_region *region = surface->mt->region;
+ struct intel_mipmap_tree *mt = surface->mt;
uint32_t tile_x, tile_y;
uint32_t *surf = (uint32_t *)
/* reloc */
surf[1] = (surface->compute_tile_offsets(&tile_x, &tile_y) +
- region->bo->offset);
+ mt->bo->offset64);
surf[2] = (0 << BRW_SURFACE_LOD_SHIFT |
(width - 1) << BRW_SURFACE_WIDTH_SHIFT |
uint32_t tiling = surface->map_stencil_as_y_tiled
? BRW_SURFACE_TILED | BRW_SURFACE_TILED_Y
- : brw_get_surface_tiling_bits(region->tiling);
- uint32_t pitch_bytes = region->pitch;
+ : brw_get_surface_tiling_bits(mt->tiling);
+ uint32_t pitch_bytes = mt->pitch;
if (surface->map_stencil_as_y_tiled)
pitch_bytes *= 2;
surf[3] = (tiling |
BRW_SURFACE_VERTICAL_ALIGN_ENABLE : 0));
/* Emit relocation to surface contents */
- drm_intel_bo_emit_reloc(brw->intel.batch.bo,
+ drm_intel_bo_emit_reloc(brw->batch.bo,
wm_surf_offset + 4,
- region->bo,
- surf[1] - region->bo->offset,
+ mt->bo,
+ surf[1] - mt->bo->offset64,
read_domains, write_domain);
return wm_surf_offset;
/**
* SAMPLER_STATE. See brw_update_sampler_state().
*/
-static uint32_t
+uint32_t
gen6_blorp_emit_sampler_state(struct brw_context *brw,
const brw_blorp_params *params)
{
uint32_t sampler_offset;
+ uint32_t *sampler_state = (uint32_t *)
+ brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE, 16, 32, &sampler_offset);
- struct brw_sampler_state *sampler = (struct brw_sampler_state *)
- brw_state_batch(brw, AUB_TRACE_SAMPLER_STATE,
- sizeof(struct brw_sampler_state),
- 32, &sampler_offset);
- memset(sampler, 0, sizeof(*sampler));
-
- sampler->ss0.min_filter = BRW_MAPFILTER_LINEAR;
- sampler->ss0.mip_filter = BRW_MIPFILTER_NONE;
- sampler->ss0.mag_filter = BRW_MAPFILTER_LINEAR;
-
- sampler->ss1.r_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- sampler->ss1.s_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
- sampler->ss1.t_wrap_mode = BRW_TEXCOORDMODE_CLAMP;
-
- sampler->ss0.min_mag_neq = 1;
-
- /* Set LOD bias:
- */
- sampler->ss0.lod_bias = 0;
-
- sampler->ss0.lod_preclamp = 1; /* OpenGL mode */
- sampler->ss0.default_color_mode = 0; /* OpenGL/DX10 mode */
+ unsigned address_rounding = BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MIN |
+ BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
+ BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
- /* Set BaseMipLevel, MaxLOD, MinLOD:
- *
- * XXX: I don't think that using firstLevel, lastLevel works,
+ /* XXX: I don't think that using firstLevel, lastLevel works,
* because we always setup the surface state as if firstLevel ==
* level zero. Probably have to subtract firstLevel from each of
* these:
*/
- sampler->ss0.base_level = U_FIXED(0, 1);
-
- sampler->ss1.max_lod = U_FIXED(0, 6);
- sampler->ss1.min_lod = U_FIXED(0, 6);
-
- sampler->ss3.non_normalized_coord = 1;
-
- sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MIN |
- BRW_ADDRESS_ROUNDING_ENABLE_V_MIN |
- BRW_ADDRESS_ROUNDING_ENABLE_R_MIN;
- sampler->ss3.address_round |= BRW_ADDRESS_ROUNDING_ENABLE_U_MAG |
- BRW_ADDRESS_ROUNDING_ENABLE_V_MAG |
- BRW_ADDRESS_ROUNDING_ENABLE_R_MAG;
+ brw_emit_sampler_state(brw,
+ sampler_state,
+ sampler_offset,
+ BRW_MAPFILTER_LINEAR, /* min filter */
+ BRW_MAPFILTER_LINEAR, /* mag filter */
+ BRW_MIPFILTER_NONE,
+ BRW_ANISORATIO_2,
+ address_rounding,
+ BRW_TEXCOORDMODE_CLAMP,
+ BRW_TEXCOORDMODE_CLAMP,
+ BRW_TEXCOORDMODE_CLAMP,
+ 0, /* min LOD */
+ 0, /* max LOD */
+ 0, /* LOD bias */
+ 0, /* base miplevel */
+ 0, /* shadow function */
+ true, /* non-normalized coordinates */
+ 0); /* border color offset - unused */
return sampler_offset;
}
gen6_blorp_emit_vs_disable(struct brw_context *brw,
const brw_blorp_params *params)
{
- struct intel_context *intel = &brw->intel;
-
- if (intel->gen == 6) {
- /* From the BSpec, Volume 2a, Part 3 "Vertex Shader", Section
+ if (brw->gen == 6) {
+ /* From the BSpec, 3D Pipeline > Geometry > Vertex Shader > State,
* 3DSTATE_VS, Dword 5.0 "VS Function Enable":
*
* [DevSNB] A pipeline flush must be programmed prior to a
1 << GEN6_SF_URB_ENTRY_READ_LENGTH_SHIFT |
0 << GEN6_SF_URB_ENTRY_READ_OFFSET_SHIFT);
OUT_BATCH(0); /* dw2 */
- OUT_BATCH(params->num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
+ OUT_BATCH(params->dst.num_samples > 1 ? GEN6_SF_MSRAST_ON_PATTERN : 0);
for (int i = 0; i < 16; ++i)
OUT_BATCH(0);
ADVANCE_BATCH();
uint32_t dw2, dw4, dw5, dw6;
/* Even when thread dispatch is disabled, max threads (dw5.25:31) must be
- * nonzero to prevent the GPU from hanging. See the valid ranges in the
- * BSpec, Volume 2a.11 Windower, Section 3DSTATE_WM, Dword 5.25:31
- * "Maximum Number Of Threads".
+ * nonzero to prevent the GPU from hanging. While the documentation doesn't
+ * mention this explicitly, it notes that the valid range for the field is
+ * [1,39] = [2,40] threads, which excludes zero.
*
* To be safe (and to minimize extraneous code) we go ahead and fully
* configure the WM state whether or not there is a WM program.
case GEN6_HIZ_OP_NONE:
break;
default:
- assert(0);
- break;
+ unreachable("not reached");
}
dw5 |= GEN6_WM_LINE_AA_WIDTH_1_0;
dw5 |= GEN6_WM_LINE_END_CAP_AA_WIDTH_0_5;
dw5 |= GEN6_WM_DISPATCH_ENABLE; /* We are rendering */
}
- if (params->num_samples > 1) {
+ if (params->dst.num_samples > 1) {
dw6 |= GEN6_WM_MSRAST_ON_PATTERN;
if (prog_data && prog_data->persample_msaa_dispatch)
dw6 |= GEN6_WM_MSDISPMODE_PERSAMPLE;
gen6_blorp_emit_depth_stencil_config(struct brw_context *brw,
const brw_blorp_params *params)
{
- struct intel_context *intel = &brw->intel;
- struct gl_context *ctx = &intel->ctx;
+ struct gl_context *ctx = &brw->ctx;
uint32_t draw_x = params->depth.x_offset;
uint32_t draw_y = params->depth.y_offset;
uint32_t tile_mask_x, tile_mask_y;
+ uint32_t surftype;
+ unsigned int depth = MAX2(params->depth.mt->logical_depth0, 1);
+ GLenum gl_target = params->depth.mt->target;
+
+ switch (gl_target) {
+ case GL_TEXTURE_CUBE_MAP_ARRAY:
+ case GL_TEXTURE_CUBE_MAP:
+ /* The PRM claims that we should use BRW_SURFACE_CUBE for this
+ * situation, but experiments show that gl_Layer doesn't work when we do
+ * this. So we use BRW_SURFACE_2D, since for rendering purposes this is
+ * equivalent.
+ */
+ surftype = BRW_SURFACE_2D;
+ depth *= 6;
+ break;
+ default:
+ surftype = translate_tex_target(gl_target);
+ break;
+ }
brw_get_depthstencil_tile_masks(params->depth.mt,
params->depth.level,
uint32_t tile_x = draw_x & tile_mask_x;
uint32_t tile_y = draw_y & tile_mask_y;
uint32_t offset =
- intel_region_get_aligned_offset(params->depth.mt->region,
- draw_x & ~tile_mask_x,
- draw_y & ~tile_mask_y, false);
+ intel_miptree_get_aligned_offset(params->depth.mt,
+ draw_x & ~tile_mask_x,
+ draw_y & ~tile_mask_y, false);
/* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327
* (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
- OUT_BATCH((params->depth.mt->region->pitch - 1) |
+ OUT_BATCH((params->depth.mt->pitch - 1) |
params->depth_format << 18 |
1 << 21 | /* separate stencil enable */
1 << 22 | /* hiz enable */
BRW_TILEWALK_YMAJOR << 26 |
1 << 27 | /* y-tiled */
BRW_SURFACE_2D << 29);
- OUT_RELOC(params->depth.mt->region->bo,
+ OUT_RELOC(params->depth.mt->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
offset);
OUT_BATCH(BRW_SURFACE_MIPMAPLAYOUT_BELOW << 1 |
/* 3DSTATE_HIER_DEPTH_BUFFER */
{
- struct intel_region *hiz_region = params->depth.mt->hiz_mt->region;
+ struct intel_mipmap_tree *hiz_mt = params->depth.mt->hiz_mt;
uint32_t hiz_offset =
- intel_region_get_aligned_offset(hiz_region,
- draw_x & ~tile_mask_x,
- (draw_y & ~tile_mask_y) / 2, false);
+ intel_miptree_get_aligned_offset(hiz_mt,
+ draw_x & ~tile_mask_x,
+ (draw_y & ~tile_mask_y) / 2, false);
BEGIN_BATCH(3);
OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2));
- OUT_BATCH(hiz_region->pitch - 1);
- OUT_RELOC(hiz_region->bo,
+ OUT_BATCH(hiz_mt->pitch - 1);
+ OUT_RELOC(hiz_mt->bo,
I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER,
hiz_offset);
ADVANCE_BATCH();
gen6_blorp_emit_depth_disable(struct brw_context *brw,
const brw_blorp_params *params)
{
+ intel_emit_post_sync_nonzero_flush(brw);
+ intel_emit_depth_stall_flushes(brw);
+
BEGIN_BATCH(7);
OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (7 - 2));
OUT_BATCH((BRW_DEPTHFORMAT_D32_FLOAT << 18) |
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
+
+ BEGIN_BATCH(3);
+ OUT_BATCH(_3DSTATE_HIER_DEPTH_BUFFER << 16 | (3 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
+
+ BEGIN_BATCH(3);
+ OUT_BATCH(_3DSTATE_STENCIL_BUFFER << 16 | (3 - 2));
+ OUT_BATCH(0);
+ OUT_BATCH(0);
+ ADVANCE_BATCH();
}
gen6_blorp_emit_drawing_rectangle(struct brw_context *brw,
const brw_blorp_params *params)
{
+ if (brw->gen == 6)
+ intel_emit_post_sync_nonzero_flush(brw);
+
BEGIN_BATCH(4);
OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2));
OUT_BATCH(0);
OUT_BATCH(0);
OUT_BATCH(0);
ADVANCE_BATCH();
-}
+ /* Only used on Sandybridge; harmless to set elsewhere. */
+ brw->batch.need_workaround_flush = true;
+}
/**
* \brief Execute a blit or render pass operation.
uint32_t wm_bind_bo_offset = 0;
uint32_t prog_offset = params->get_wm_prog(brw, &prog_data);
- gen6_blorp_emit_batch_head(brw, params);
- gen6_emit_3dstate_multisample(brw, params->num_samples);
- gen6_emit_3dstate_sample_mask(brw, params->num_samples, 1.0, false, ~0u);
+
+ /* Emit workaround flushes when we switch from drawing to blorping. */
+ brw->batch.need_workaround_flush = true;
+
+ gen6_emit_3dstate_multisample(brw, params->dst.num_samples);
+ gen6_emit_3dstate_sample_mask(brw,
+ params->dst.num_samples > 1 ?
+ (1 << params->dst.num_samples) - 1 : 1);
gen6_blorp_emit_state_base_address(brw, params);
gen6_blorp_emit_vertices(brw, params);
gen6_blorp_emit_urb_config(brw, params);