From e490382326fece5bf93c05a468c5ff1737d7f083 Mon Sep 17 00:00:00 2001 From: Rafael Antognolli Date: Fri, 14 Jul 2017 11:02:57 -0700 Subject: [PATCH] i965: Convert CLIP_STATE to genxml. Add the code into its own function and atom, since almost nothing is shared with GEN >= 6. v2: Split GEN <=5 and GEN >= 6 into separate functions (Ken). v3: Minor tidying by Ken. Signed-off-by: Rafael Antognolli Reviewed-by: Kenneth Graunke --- src/mesa/drivers/dri/i965/Makefile.sources | 1 - src/mesa/drivers/dri/i965/brw_clip_state.c | 147 ------------------ src/mesa/drivers/dri/i965/brw_structs.h | 65 -------- src/mesa/drivers/dri/i965/genX_state_upload.c | 102 +++++++++++- 4 files changed, 100 insertions(+), 215 deletions(-) delete mode 100644 src/mesa/drivers/dri/i965/brw_clip_state.c diff --git a/src/mesa/drivers/dri/i965/Makefile.sources b/src/mesa/drivers/dri/i965/Makefile.sources index fc46645c8ed..60a41f89ca3 100644 --- a/src/mesa/drivers/dri/i965/Makefile.sources +++ b/src/mesa/drivers/dri/i965/Makefile.sources @@ -6,7 +6,6 @@ i965_FILES = \ brw_bufmgr.h \ brw_clear.c \ brw_clip.c \ - brw_clip_state.c \ brw_compute.c \ brw_conditional_render.c \ brw_context.c \ diff --git a/src/mesa/drivers/dri/i965/brw_clip_state.c b/src/mesa/drivers/dri/i965/brw_clip_state.c deleted file mode 100644 index 8f22c0ff673..00000000000 --- a/src/mesa/drivers/dri/i965/brw_clip_state.c +++ /dev/null @@ -1,147 +0,0 @@ -/* - Copyright (C) Intel Corp. 2006. All Rights Reserved. - Intel funded Tungsten Graphics to - develop this 3D driver. - - Permission is hereby granted, free of charge, to any person obtaining - a copy of this software and associated documentation files (the - "Software"), to deal in the Software without restriction, including - without limitation the rights to use, copy, modify, merge, publish, - distribute, sublicense, and/or sell copies of the Software, and to - permit persons to whom the Software is furnished to do so, subject to - the following conditions: - - The above copyright notice and this permission notice (including the - next paragraph) shall be included in all copies or substantial - portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. - IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE - LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION - OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION - WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - - **********************************************************************/ - /* - * Authors: - * Keith Whitwell - */ - -#include "intel_batchbuffer.h" -#include "brw_context.h" -#include "brw_state.h" -#include "brw_defines.h" -#include "main/framebuffer.h" - -static void -brw_upload_clip_unit(struct brw_context *brw) -{ - struct gl_context *ctx = &brw->ctx; - struct brw_clip_unit_state *clip; - - clip = brw_state_batch(brw, sizeof(*clip), 32, &brw->clip.state_offset); - memset(clip, 0, sizeof(*clip)); - - /* BRW_NEW_PROGRAM_CACHE | BRW_NEW_CLIP_PROG_DATA */ - clip->thread0.grf_reg_count = (ALIGN(brw->clip.prog_data->total_grf, 16) / - 16 - 1); - clip->thread0.kernel_start_pointer = - brw_program_reloc(brw, - brw->clip.state_offset + - offsetof(struct brw_clip_unit_state, thread0), - brw->clip.prog_offset + - (clip->thread0.grf_reg_count << 1)) >> 6; - - clip->thread1.floating_point_mode = BRW_FLOATING_POINT_NON_IEEE_754; - clip->thread1.single_program_flow = 1; - - clip->thread3.urb_entry_read_length = brw->clip.prog_data->urb_read_length; - clip->thread3.const_urb_entry_read_length = - brw->clip.prog_data->curb_read_length; - - /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ - clip->thread3.const_urb_entry_read_offset = brw->curbe.clip_start * 2; - clip->thread3.dispatch_grf_start_reg = 1; - clip->thread3.urb_entry_read_offset = 0; - - /* BRW_NEW_URB_FENCE */ - clip->thread4.nr_urb_entries = brw->urb.nr_clip_entries; - clip->thread4.urb_entry_allocation_size = brw->urb.vsize - 1; - /* If we have enough clip URB entries to run two threads, do so. - */ - if (brw->urb.nr_clip_entries >= 10) { - /* Half of the URB entries go to each thread, and it has to be an - * even number. - */ - assert(brw->urb.nr_clip_entries % 2 == 0); - - /* Although up to 16 concurrent Clip threads are allowed on Ironlake, - * only 2 threads can output VUEs at a time. - */ - if (brw->gen == 5) - clip->thread4.max_threads = 16 - 1; - else - clip->thread4.max_threads = 2 - 1; - } else { - assert(brw->urb.nr_clip_entries >= 5); - clip->thread4.max_threads = 1 - 1; - } - - /* _NEW_TRANSFORM */ - if (brw->gen == 5 || brw->is_g4x) - clip->clip5.userclip_enable_flags = ctx->Transform.ClipPlanesEnabled; - else - /* Up to 6 actual clip flags, plus the 7th for negative RHW workaround. */ - clip->clip5.userclip_enable_flags = (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40; - - clip->clip5.userclip_must_clip = 1; - - /* enable guardband clipping if we can */ - clip->clip5.guard_band_enable = 1; - clip->clip6.clipper_viewport_state_ptr = - (brw->batch.bo->offset64 + brw->clip.vp_offset) >> 5; - - /* emit clip viewport relocation */ - brw_emit_reloc(&brw->batch, - (brw->clip.state_offset + - offsetof(struct brw_clip_unit_state, clip6)), - brw->batch.bo, brw->clip.vp_offset, - I915_GEM_DOMAIN_INSTRUCTION, 0); - - /* _NEW_TRANSFORM */ - if (!ctx->Transform.DepthClamp) - clip->clip5.viewport_z_clip_enable = 1; - clip->clip5.viewport_xy_clip_enable = 1; - clip->clip5.vertex_position_space = BRW_CLIP_NDCSPACE; - if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) - clip->clip5.api_mode = BRW_CLIP_API_DX; - else - clip->clip5.api_mode = BRW_CLIP_API_OGL; - clip->clip5.clip_mode = brw->clip.prog_data->clip_mode; - - if (brw->is_g4x) - clip->clip5.negative_w_clip_test = 1; - - clip->viewport_xmin = -1; - clip->viewport_xmax = 1; - clip->viewport_ymin = -1; - clip->viewport_ymax = 1; - - brw->ctx.NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; -} - -const struct brw_tracked_state brw_clip_unit = { - .dirty = { - .mesa = _NEW_TRANSFORM | - _NEW_VIEWPORT, - .brw = BRW_NEW_BATCH | - BRW_NEW_BLORP | - BRW_NEW_CLIP_PROG_DATA | - BRW_NEW_PUSH_CONSTANT_ALLOCATION | - BRW_NEW_PROGRAM_CACHE | - BRW_NEW_URB_FENCE, - }, - .emit = brw_upload_clip_unit, -}; diff --git a/src/mesa/drivers/dri/i965/brw_structs.h b/src/mesa/drivers/dri/i965/brw_structs.h index 6feab0dadc4..5a0d91d6f77 100644 --- a/src/mesa/drivers/dri/i965/brw_structs.h +++ b/src/mesa/drivers/dri/i965/brw_structs.h @@ -115,71 +115,6 @@ struct thread3 unsigned pad3:1; }; - - -struct brw_clip_unit_state -{ - struct thread0 thread0; - struct - { - unsigned pad0:7; - unsigned sw_exception_enable:1; - unsigned pad1:3; - unsigned mask_stack_exception_enable:1; - unsigned pad2:1; - unsigned illegal_op_exception_enable:1; - unsigned pad3:2; - unsigned floating_point_mode:1; - unsigned thread_priority:1; - unsigned binding_table_entry_count:8; - unsigned pad4:5; - unsigned single_program_flow:1; - } thread1; - - struct thread2 thread2; - struct thread3 thread3; - - struct - { - unsigned pad0:9; - unsigned gs_output_stats:1; /* not always */ - unsigned stats_enable:1; - unsigned nr_urb_entries:7; - unsigned pad1:1; - unsigned urb_entry_allocation_size:5; - unsigned pad2:1; - unsigned max_threads:5; /* may be less */ - unsigned pad3:2; - } thread4; - - struct - { - unsigned pad0:13; - unsigned clip_mode:3; - unsigned userclip_enable_flags:8; - unsigned userclip_must_clip:1; - unsigned negative_w_clip_test:1; - unsigned guard_band_enable:1; - unsigned viewport_z_clip_enable:1; - unsigned viewport_xy_clip_enable:1; - unsigned vertex_position_space:1; - unsigned api_mode:1; - unsigned pad2:1; - } clip5; - - struct - { - unsigned pad0:5; - unsigned clipper_viewport_state_ptr:27; - } clip6; - - - float viewport_xmin; - float viewport_xmax; - float viewport_ymin; - float viewport_ymax; -}; - struct brw_wm_unit_state { struct thread0 thread0; diff --git a/src/mesa/drivers/dri/i965/genX_state_upload.c b/src/mesa/drivers/dri/i965/genX_state_upload.c index 54a547cb799..aa87e9c930c 100644 --- a/src/mesa/drivers/dri/i965/genX_state_upload.c +++ b/src/mesa/drivers/dri/i965/genX_state_upload.c @@ -1268,7 +1268,105 @@ static const struct brw_tracked_state genX(depth_stencil_state) = { /* ---------------------------------------------------------------------- */ -#if GEN_GEN >= 6 +#if GEN_GEN <= 5 + +static void +genX(upload_clip_state)(struct brw_context *brw) +{ + struct gl_context *ctx = &brw->ctx; + + ctx->NewDriverState |= BRW_NEW_GEN4_UNIT_STATE; + brw_state_emit(brw, GENX(CLIP_STATE), 32, &brw->clip.state_offset, clip) { + clip.KernelStartPointer = KSP_ro(brw, brw->clip.prog_offset); + clip.GRFRegisterCount = + DIV_ROUND_UP(brw->clip.prog_data->total_grf, 16) - 1; + clip.FloatingPointMode = FLOATING_POINT_MODE_Alternate; + clip.SingleProgramFlow = true; + clip.VertexURBEntryReadLength = brw->clip.prog_data->urb_read_length; + clip.ConstantURBEntryReadLength = brw->clip.prog_data->curb_read_length; + + /* BRW_NEW_PUSH_CONSTANT_ALLOCATION */ + clip.ConstantURBEntryReadOffset = brw->curbe.clip_start * 2; + clip.DispatchGRFStartRegisterForURBData = 1; + clip.VertexURBEntryReadOffset = 0; + + /* BRW_NEW_URB_FENCE */ + clip.NumberofURBEntries = brw->urb.nr_clip_entries; + clip.URBEntryAllocationSize = brw->urb.vsize - 1; + + if (brw->urb.nr_clip_entries >= 10) { + /* Half of the URB entries go to each thread, and it has to be an + * even number. + */ + assert(brw->urb.nr_clip_entries % 2 == 0); + + /* Although up to 16 concurrent Clip threads are allowed on Ironlake, + * only 2 threads can output VUEs at a time. + */ + clip.MaximumNumberofThreads = (GEN_GEN == 5 ? 16 : 2) - 1; + } else { + assert(brw->urb.nr_clip_entries >= 5); + clip.MaximumNumberofThreads = 1 - 1; + } + + clip.VertexPositionSpace = VPOS_NDCSPACE; + clip.UserClipFlagsMustClipEnable = true; + clip.GuardbandClipTestEnable = true; + + clip.ClipperViewportStatePointer = + instruction_ro_bo(brw->batch.bo, brw->clip.vp_offset); + + clip.ScreenSpaceViewportXMin = -1; + clip.ScreenSpaceViewportXMax = 1; + clip.ScreenSpaceViewportYMin = -1; + clip.ScreenSpaceViewportYMax = 1; + + clip.ViewportXYClipTestEnable = true; + clip.ViewportZClipTestEnable = !ctx->Transform.DepthClamp; + + /* _NEW_TRANSFORM */ + if (GEN_GEN == 5 || GEN_IS_G4X) { + clip.UserClipDistanceClipTestEnableBitmask = + ctx->Transform.ClipPlanesEnabled; + } else { + /* Up to 6 actual clip flags, plus the 7th for the negative RHW + * workaround. + */ + clip.UserClipDistanceClipTestEnableBitmask = + (ctx->Transform.ClipPlanesEnabled & 0x3f) | 0x40; + } + + if (ctx->Transform.ClipDepthMode == GL_ZERO_TO_ONE) + clip.APIMode = APIMODE_D3D; + else + clip.APIMode = APIMODE_OGL; + + clip.GuardbandClipTestEnable = true; + + clip.ClipMode = brw->clip.prog_data->clip_mode; + +#if GEN_IS_G4X + clip.NegativeWClipTestEnable = true; +#endif + } +} + +const struct brw_tracked_state genX(clip_state) = { + .dirty = { + .mesa = _NEW_TRANSFORM | + _NEW_VIEWPORT, + .brw = BRW_NEW_BATCH | + BRW_NEW_BLORP | + BRW_NEW_CLIP_PROG_DATA | + BRW_NEW_PUSH_CONSTANT_ALLOCATION | + BRW_NEW_PROGRAM_CACHE | + BRW_NEW_URB_FENCE, + }, + .emit = genX(upload_clip_state), +}; + +#else + static void genX(upload_clip_state)(struct brw_context *brw) { @@ -5123,7 +5221,7 @@ genX(init_atoms)(struct brw_context *brw) &genX(sf_clip_viewport), &genX(sf_state), &genX(vs_state), /* always required, enabled or not */ - &brw_clip_unit, + &genX(clip_state), &genX(gs_state), /* Command packets: -- 2.30.2