2 * Copyright © 2013 Intel Corporation
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
21 * DEALINGS IN THE SOFTWARE.
25 * \file brw_vec4_gs_visitor.cpp
27 * Geometry-shader-specific code derived from the vec4_visitor class.
30 #include "brw_vec4_gs_visitor.h"
32 const unsigned MAX_GS_INPUT_VERTICES
= 6;
36 vec4_gs_visitor::vec4_gs_visitor(struct brw_context
*brw
,
37 struct brw_vec4_gs_compile
*c
,
38 struct gl_shader_program
*prog
,
39 struct brw_shader
*shader
,
41 : vec4_visitor(brw
, &c
->base
, &c
->gp
->program
.Base
, &c
->key
.base
,
42 &c
->prog_data
.base
, prog
, shader
, mem_ctx
,
43 INTEL_DEBUG
& DEBUG_GS
),
50 vec4_gs_visitor::make_reg_for_system_value(ir_variable
*ir
)
52 /* Geometry shaders don't use any system values. */
59 vec4_gs_visitor::setup_varying_inputs(int payload_reg
, int *attribute_map
)
61 /* For geometry shaders there are N copies of the input attributes, where N
62 * is the number of input vertices. attribute_map[BRW_VARYING_SLOT_COUNT *
63 * i + j] represents attribute j for vertex i.
65 * Note that GS inputs are read from the VUE 256 bits (2 vec4's) at a time,
66 * so the total number of input slots that will be delivered to the GS (and
67 * thus the stride of the input arrays) is urb_read_length * 2.
69 const unsigned num_input_vertices
= c
->gp
->program
.VerticesIn
;
70 assert(num_input_vertices
<= MAX_GS_INPUT_VERTICES
);
71 unsigned input_array_stride
= c
->prog_data
.base
.urb_read_length
* 2;
73 for (int slot
= 0; slot
< c
->key
.input_vue_map
.num_slots
; slot
++) {
74 int varying
= c
->key
.input_vue_map
.slot_to_varying
[slot
];
75 for (unsigned vertex
= 0; vertex
< num_input_vertices
; vertex
++) {
76 attribute_map
[BRW_VARYING_SLOT_COUNT
* vertex
+ varying
] =
77 payload_reg
+ input_array_stride
* vertex
+ slot
;
81 return payload_reg
+ input_array_stride
* num_input_vertices
;
86 vec4_gs_visitor::setup_payload()
88 int attribute_map
[BRW_VARYING_SLOT_COUNT
* MAX_GS_INPUT_VERTICES
];
90 /* If a geometry shader tries to read from an input that wasn't written by
91 * the vertex shader, that produces undefined results, but it shouldn't
92 * crash anything. So initialize attribute_map to zeros--that ensures that
93 * these undefined results are read from r0.
95 memset(attribute_map
, 0, sizeof(attribute_map
));
99 /* The payload always contains important data in r0, which contains
100 * the URB handles that are passed on to the URB write at the end
105 reg
= setup_uniforms(reg
);
107 reg
= setup_varying_inputs(reg
, attribute_map
);
109 lower_attributes_to_hw_regs(attribute_map
);
111 this->first_non_payload_grf
= reg
;
116 vec4_gs_visitor::emit_prolog()
118 /* In vertex shaders, r0.2 is guaranteed to be initialized to zero. In
119 * geometry shaders, it isn't (it contains a bunch of information we don't
120 * need, like the input primitive type). We need r0.2 to be zero in order
121 * to build scratch read/write messages correctly (otherwise this value
122 * will be interpreted as a global offset, causing us to do our scratch
123 * reads/writes to garbage memory). So just set it to zero at the top of
126 this->current_annotation
= "clear r0.2";
127 dst_reg
r0(retype(brw_vec4_grf(0, 0), BRW_REGISTER_TYPE_UD
));
128 emit(GS_OPCODE_SET_DWORD_2_IMMED
, r0
, 0u);
130 /* Create a virtual register to hold the vertex count */
131 this->vertex_count
= src_reg(this, glsl_type::uint_type
);
133 /* Initialize the vertex_count register to 0 */
134 this->current_annotation
= "initialize vertex_count";
135 vec4_instruction
*inst
= emit(MOV(dst_reg(this->vertex_count
), 0u));
136 inst
->force_writemask_all
= true;
138 this->current_annotation
= NULL
;
143 vec4_gs_visitor::emit_program_code()
145 /* We don't support NV_geometry_program4. */
146 assert(!"Unreached");
151 vec4_gs_visitor::emit_thread_end()
153 /* MRF 0 is reserved for the debugger, so start with message header
158 current_annotation
= "thread end";
159 dst_reg
mrf_reg(MRF
, base_mrf
);
160 src_reg
r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
161 vec4_instruction
*inst
= emit(MOV(mrf_reg
, r0
));
162 inst
->force_writemask_all
= true;
163 emit(GS_OPCODE_SET_VERTEX_COUNT
, mrf_reg
, this->vertex_count
);
164 if (INTEL_DEBUG
& DEBUG_SHADER_TIME
)
165 emit_shader_time_end();
166 inst
= emit(GS_OPCODE_THREAD_END
);
167 inst
->base_mrf
= base_mrf
;
173 vec4_gs_visitor::emit_urb_write_header(int mrf
)
175 /* The SEND instruction that writes the vertex data to the VUE will use
176 * per_slot_offset=true, which means that DWORDs 3 and 4 of the message
177 * header specify an offset (in multiples of 256 bits) into the URB entry
178 * at which the write should take place.
180 * So we have to prepare a message header with the appropriate offset
183 dst_reg
mrf_reg(MRF
, mrf
);
184 src_reg
r0(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD
));
185 this->current_annotation
= "URB write header";
186 vec4_instruction
*inst
= emit(MOV(mrf_reg
, r0
));
187 inst
->force_writemask_all
= true;
188 emit(GS_OPCODE_SET_WRITE_OFFSET
, mrf_reg
, this->vertex_count
,
189 (uint32_t) c
->prog_data
.output_vertex_size_hwords
);
194 vec4_gs_visitor::emit_urb_write_opcode(bool complete
)
196 /* We don't care whether the vertex is complete, because in general
197 * geometry shaders output multiple vertices, and we don't terminate the
198 * thread until all vertices are complete.
202 vec4_instruction
*inst
= emit(GS_OPCODE_URB_WRITE
);
203 inst
->urb_write_flags
= BRW_URB_WRITE_PER_SLOT_OFFSET
;
209 vec4_gs_visitor::compute_array_stride(ir_dereference_array
*ir
)
211 /* Geometry shader inputs are arrays, but they use an unusual array layout:
212 * instead of all array elements for a given geometry shader input being
213 * stored consecutively, all geometry shader inputs are interleaved into
214 * one giant array. At this stage of compilation, we assume that the
215 * stride of the array is BRW_VARYING_SLOT_COUNT. Later,
216 * setup_attributes() will remap our accesses to the actual input array.
218 ir_dereference_variable
*deref_var
= ir
->array
->as_dereference_variable();
219 if (deref_var
&& deref_var
->var
->mode
== ir_var_shader_in
)
220 return BRW_VARYING_SLOT_COUNT
;
222 return vec4_visitor::compute_array_stride(ir
);
227 vec4_gs_visitor::visit(ir_emit_vertex
*)
229 this->current_annotation
= "emit vertex: safety check";
231 /* To ensure that we don't output more vertices than the shader specified
232 * using max_vertices, do the logic inside a conditional of the form "if
233 * (vertex_count < MAX)"
235 unsigned num_output_vertices
= c
->gp
->program
.VerticesOut
;
236 emit(CMP(dst_null_d(), this->vertex_count
,
237 src_reg(num_output_vertices
), BRW_CONDITIONAL_L
));
238 emit(IF(BRW_PREDICATE_NORMAL
));
240 this->current_annotation
= "emit vertex: vertex data";
243 this->current_annotation
= "emit vertex: increment vertex count";
244 emit(ADD(dst_reg(this->vertex_count
), this->vertex_count
,
247 emit(BRW_OPCODE_ENDIF
);
249 this->current_annotation
= NULL
;
253 vec4_gs_visitor::visit(ir_end_primitive
*)
255 assert(!"Not implemented yet");
259 } /* namespace brw */