nir: Use a single list for all shader variables
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_fragment.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31
32 namespace r600 {
33
34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35 r600_shader& sh,
36 r600_pipe_shader_selector &sel,
37 const r600_shader_key &key,
38 enum chip_class chip_class):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class, 0),
40 m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key.ps.color_two_side),
43 m_last_pixel_export(nullptr),
44 m_nir(nir),
45 m_reserved_registers(0),
46 m_frag_pos_index(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
49 m_depth_exports(0),
50 m_enable_centroid_interpolators(false),
51 m_apply_sample_mask(key.ps.apply_sample_id_mask)
52 {
53 for (auto& i: m_interpolator) {
54 i.enabled = false;
55 i.ij_index= 0;
56 }
57
58 sh_info().rat_base = key.ps.nr_cbufs;
59 sh_info().atomic_base = key.ps.first_atomic_counter;
60 }
61
62 bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
63 {
64 sfn_log << SfnLog::io << "Parse input variable "
65 << input->name << " location:" << input->data.location
66 << " driver-loc:" << input->data.driver_location
67 << " interpolation:" << input->data.interpolation
68 << "\n";
69
70 if (input->data.location == VARYING_SLOT_FACE) {
71 m_sv_values.set(es_face);
72 return true;
73 }
74
75 unsigned name, sid;
76 auto semantic = r600_get_varying_semantic(input->data.location);
77 name = semantic.first;
78 sid = semantic.second;
79
80 tgsi_semantic sname = static_cast<tgsi_semantic>(name);
81
82 switch (sname) {
83 case TGSI_SEMANTIC_POSITION: {
84 m_sv_values.set(es_pos);
85 return true;
86 }
87 case TGSI_SEMANTIC_COLOR: {
88 m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
89 m_need_back_color = m_two_sided_color;
90 return true;
91 }
92 case TGSI_SEMANTIC_PRIMID:
93 sh_info().gs_prim_id_input = true;
94 sh_info().ps_prim_id_input = m_shaderio.inputs().size();
95 /* fallthrough */
96 case TGSI_SEMANTIC_FOG:
97 case TGSI_SEMANTIC_GENERIC:
98 case TGSI_SEMANTIC_TEXCOORD:
99 case TGSI_SEMANTIC_LAYER:
100 case TGSI_SEMANTIC_PCOORD:
101 case TGSI_SEMANTIC_VIEWPORT_INDEX:
102 case TGSI_SEMANTIC_CLIPDIST: {
103 if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
104 m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
105 return true;
106 }
107 default:
108 return false;
109 }
110 }
111
112 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
113 {
114 switch (instr->type) {
115 case nir_instr_type_intrinsic: {
116 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
117 switch (ii->intrinsic) {
118 case nir_intrinsic_load_front_face:
119 m_sv_values.set(es_face);
120 break;
121 case nir_intrinsic_load_sample_mask_in:
122 m_sv_values.set(es_sample_mask_in);
123 break;
124 case nir_intrinsic_load_sample_pos:
125 m_sv_values.set(es_sample_pos);
126 /* fallthrough */
127 case nir_intrinsic_load_sample_id:
128 m_sv_values.set(es_sample_id);
129 break;
130 case nir_intrinsic_interp_deref_at_centroid:
131 /* This is not a sysvalue, should go elsewhere */
132 m_enable_centroid_interpolators = true;
133 break;
134 default:
135 ;
136 }
137 }
138 default:
139 ;
140 }
141 return true;
142 }
143
144 bool FragmentShaderFromNir::do_allocate_reserved_registers()
145 {
146 assert(!m_reserved_registers);
147
148 int face_reg_index = -1;
149 int sample_id_index = -1;
150 // enabled interpolators based on inputs
151 for (auto& i: m_shaderio.inputs()) {
152 int ij = i->ij_index();
153 if (ij >= 0) {
154 m_interpolator[ij].enabled = true;
155 }
156 }
157
158 /* Lazy, enable both possible interpolators,
159 * TODO: check which ones are really needed */
160 if (m_enable_centroid_interpolators) {
161 m_interpolator[2].enabled = true; /* perspective */
162 m_interpolator[5].enabled = true; /* linear */
163 }
164
165 // sort the varying inputs
166 m_shaderio.sort_varying_inputs();
167
168 // handle interpolators
169 int num_baryc = 0;
170 for (int i = 0; i < 6; ++i) {
171 if (m_interpolator[i].enabled) {
172 sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
173
174 m_interpolator[i].ij_index = num_baryc;
175
176 unsigned sel = num_baryc / 2;
177 unsigned chan = 2 * (num_baryc % 2);
178
179 auto ip_i = new GPRValue(sel, chan + 1);
180 ip_i->set_as_input();
181 m_interpolator[i].i.reset(ip_i);
182 inject_register(sel, chan + 1, m_interpolator[i].i, false);
183
184 auto ip_j = new GPRValue(sel, chan);
185 ip_j->set_as_input();
186 m_interpolator[i].j.reset(ip_j);
187 inject_register(sel, chan, m_interpolator[i].j, false);
188
189 ++num_baryc;
190 }
191 }
192 m_reserved_registers += (num_baryc + 1) >> 1;
193
194 if (m_sv_values.test(es_pos)) {
195 m_frag_pos_index = m_reserved_registers++;
196 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
197 }
198
199 // handle system values
200 if (m_sv_values.test(es_face) || m_need_back_color) {
201 face_reg_index = m_reserved_registers++;
202 auto ffr = new GPRValue(face_reg_index,0);
203 ffr->set_as_input();
204 m_front_face_reg.reset(ffr);
205 sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
206 inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false);
207
208 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
209 load_front_face();
210 }
211
212 if (m_sv_values.test(es_sample_mask_in)) {
213 if (face_reg_index < 0)
214 face_reg_index = m_reserved_registers++;
215
216 auto smi = new GPRValue(face_reg_index,2);
217 smi->set_as_input();
218 m_sample_mask_reg.reset(smi);
219 sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
220 //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
221 sh_info().nsys_inputs = 1;
222 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
223 }
224
225 if (m_sv_values.test(es_sample_id) ||
226 m_sv_values.test(es_sample_mask_in)) {
227 if (sample_id_index < 0)
228 sample_id_index = m_reserved_registers++;
229
230 auto smi = new GPRValue(sample_id_index, 3);
231 smi->set_as_input();
232 m_sample_id_reg.reset(smi);
233 sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
234 sh_info().nsys_inputs++;
235 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, sample_id_index));
236 }
237
238 // The back color handling is not emmited in the code, so we have
239 // to add the inputs here and later we also need to inject the code to set
240 // the right color
241 if (m_need_back_color) {
242 size_t ninputs = m_shaderio.inputs().size();
243 for (size_t k = 0; k < ninputs; ++k) {
244 ShaderInput& i = m_shaderio.input(k);
245
246 if (i.name() != TGSI_SEMANTIC_COLOR)
247 continue;
248
249 ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
250
251 size_t next_pos = m_shaderio.size();
252 auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
253 m_shaderio.add_input(bcol);
254 col.set_back_color(next_pos);
255 }
256 m_shaderio.set_two_sided();
257 }
258
259 m_shaderio.update_lds_pos();
260
261 set_reserved_registers(m_reserved_registers);
262
263 return true;
264 }
265
266 void FragmentShaderFromNir::emit_shader_start()
267 {
268 if (m_sv_values.test(es_face))
269 load_front_face();
270
271 if (m_sv_values.test(es_pos)) {
272 for (int i = 0; i < 4; ++i) {
273 auto v = new GPRValue(m_frag_pos_index, i);
274 v->set_as_input();
275 auto reg = PValue(v);
276 if (i == 3)
277 emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
278 m_frag_pos[i] = reg;
279 }
280 }
281 }
282
283 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
284 {
285 if (out_var->data.location == FRAG_RESULT_COLOR)
286 return emit_export_pixel(out_var, instr, true);
287
288 if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
289 out_var->data.location <= FRAG_RESULT_DATA7) ||
290 out_var->data.location == FRAG_RESULT_DEPTH ||
291 out_var->data.location == FRAG_RESULT_STENCIL ||
292 out_var->data.location == FRAG_RESULT_SAMPLE_MASK)
293 return emit_export_pixel(out_var, instr, false);
294
295 sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
296 out_var->data.location << "(" << out_var->data.driver_location << ")\n";
297 return false;
298 }
299
300 bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
301 {
302 sfn_log << SfnLog::instr << "Parse output variable "
303 << output->name << " @" << output->data.location
304 << "@dl:" << output->data.driver_location << "\n";
305
306 ++sh_info().noutput;
307 r600_shader_io& io = sh_info().output[output->data.driver_location];
308 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
309 &io.name, &io.sid);
310
311 /* Check whether this code has become obsolete by the IO vectorization */
312 unsigned num_components = 4;
313 unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
314 if (vector_elements)
315 num_components = vector_elements;
316 unsigned component = output->data.location_frac;
317
318 for (unsigned j = component; j < num_components + component; j++)
319 io.write_mask |= 1 << j;
320
321 int loc = output->data.location;
322 if (loc == FRAG_RESULT_COLOR &&
323 (m_nir.info.outputs_written & (1ull << loc))) {
324 sh_info().fs_write_all = true;
325 }
326
327 if (output->data.location == FRAG_RESULT_COLOR ||
328 (output->data.location >= FRAG_RESULT_DATA0 &&
329 output->data.location <= FRAG_RESULT_DATA7)) {
330 return true;
331 }
332 if (output->data.location == FRAG_RESULT_DEPTH ||
333 output->data.location == FRAG_RESULT_STENCIL ||
334 output->data.location == FRAG_RESULT_SAMPLE_MASK) {
335 io.write_mask = 15;
336 return true;
337 }
338
339 return false;
340 }
341
342 bool FragmentShaderFromNir::emit_load_sample_mask_in(nir_intrinsic_instr* instr)
343 {
344 auto dest = from_nir(instr->dest, 0);
345 assert(m_sample_id_reg);
346 assert(m_sample_mask_reg);
347
348 emit_instruction(new AluInstruction(op2_lshl_int, dest, Value::one_i, m_sample_id_reg, EmitInstruction::last_write));
349 emit_instruction(new AluInstruction(op2_and_int, dest, dest, m_sample_mask_reg, EmitInstruction::last_write));
350 return true;
351 }
352
353 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
354 {
355 switch (instr->intrinsic) {
356 case nir_intrinsic_load_sample_mask_in:
357 if (m_apply_sample_mask) {
358 return emit_load_sample_mask_in(instr);
359 } else
360 return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
361 case nir_intrinsic_load_sample_id:
362 return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
363 case nir_intrinsic_load_front_face:
364 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
365 case nir_intrinsic_interp_deref_at_sample:
366 return emit_interp_deref_at_sample(instr);
367 case nir_intrinsic_interp_deref_at_offset:
368 return emit_interp_deref_at_offset(instr);
369 case nir_intrinsic_interp_deref_at_centroid:
370 return emit_interp_deref_at_centroid(instr);
371 case nir_intrinsic_load_sample_pos:
372 return emit_load_sample_pos(instr);
373
374 default:
375 return false;
376 }
377 }
378
379 void FragmentShaderFromNir::load_front_face()
380 {
381 assert(m_front_face_reg);
382 if (m_front_face_loaded)
383 return;
384
385 auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
386 Value::zero, {alu_write, alu_last_instr});
387 m_front_face_loaded = true;
388 emit_instruction(ir);
389 }
390
391 bool FragmentShaderFromNir::emit_load_sample_pos(nir_intrinsic_instr* instr)
392 {
393 GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
394 auto fetch = new FetchInstruction(vc_fetch,
395 no_index_offset,
396 fmt_32_32_32_32_float,
397 vtx_nf_scaled,
398 vtx_es_none,
399 m_sample_id_reg,
400 dest,
401 0,
402 false,
403 0xf,
404 R600_BUFFER_INFO_CONST_BUFFER,
405 0,
406 bim_none,
407 false,
408 false,
409 0,
410 0,
411 0,
412 PValue(),
413 {0,1,2,3});
414 fetch->set_flag(vtx_srf_mode);
415 emit_instruction(fetch);
416 return true;
417 }
418
419 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
420 {
421 GPRVector slope = get_temp_vec4();
422
423 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
424 from_nir_with_fetch_constant(instr->src[1], 0),
425 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
426 fetch->set_flag(vtx_srf_mode);
427 emit_instruction(fetch);
428
429 GPRVector grad = get_temp_vec4();
430 auto var = get_deref_location(instr->src[0]);
431 assert(var);
432
433 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
434 auto interpolator = m_interpolator[io.ij_index()];
435 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
436
437 GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
438
439 auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
440 tex->set_dest_swizzle({0,1,7,7});
441 emit_instruction(tex);
442
443 tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
444 tex->set_dest_swizzle({7,7,0,1});
445 emit_instruction(tex);
446
447 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
448 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
449
450 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
451 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
452
453 Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
454
455 auto dst = vec_from_nir(instr->dest, 4);
456 int num_components = instr->dest.is_ssa ?
457 instr->dest.ssa.num_components:
458 instr->dest.reg.reg->num_components;
459
460 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
461
462 return true;
463 }
464
465 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
466 {
467 int temp = allocate_temp_register();
468
469 GPRVector help(temp, {0,1,2,3});
470
471 auto var = get_deref_location(instr->src[0]);
472 assert(var);
473
474 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
475 auto interpolator = m_interpolator[io.ij_index()];
476 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
477
478 GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
479
480 auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
481 getgradh->set_dest_swizzle({0,1,7,7});
482 getgradh->set_flag(TexInstruction::x_unnormalized);
483 getgradh->set_flag(TexInstruction::y_unnormalized);
484 getgradh->set_flag(TexInstruction::z_unnormalized);
485 getgradh->set_flag(TexInstruction::w_unnormalized);
486 emit_instruction(getgradh);
487
488 auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
489 getgradv->set_dest_swizzle({7,7,0,1});
490 getgradv->set_flag(TexInstruction::x_unnormalized);
491 getgradv->set_flag(TexInstruction::y_unnormalized);
492 getgradv->set_flag(TexInstruction::z_unnormalized);
493 getgradv->set_flag(TexInstruction::w_unnormalized);
494 emit_instruction(getgradv);
495
496 PValue ofs_x = from_nir(instr->src[1], 0);
497 PValue ofs_y = from_nir(instr->src[1], 1);
498 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
499 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
500 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
501 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
502
503 Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
504
505 auto dst = vec_from_nir(instr->dest, 4);
506 load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
507 var->data.location_frac);
508
509 return true;
510 }
511
512 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
513 {
514 auto var = get_deref_location(instr->src[0]);
515 assert(var);
516
517 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
518 io.set_uses_interpolate_at_centroid();
519
520 int ij_index = io.ij_index() >= 3 ? 5 : 2;
521 assert (m_interpolator[ij_index].enabled);
522 auto ip = m_interpolator[ij_index];
523
524 int num_components = nir_dest_num_components(instr->dest);
525
526 auto dst = vec_from_nir(instr->dest, 4);
527 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
528 return true;
529 }
530
531
532 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
533 {
534 if (in_var->data.location == VARYING_SLOT_POS) {
535 assert(instr->dest.is_ssa);
536
537 for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
538 inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
539 }
540 return true;
541 }
542
543 if (in_var->data.location == VARYING_SLOT_FACE)
544 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
545
546 // todo: replace io with ShaderInputVarying
547 auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
548 unsigned num_components = 4;
549
550
551 if (instr->dest.is_ssa) {
552 num_components = instr->dest.ssa.num_components;
553 } else {
554 num_components = instr->dest.reg.reg->num_components;
555 }
556
557 auto dst = vec_from_nir(instr->dest, 4);
558
559 sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
560 << "].gpr=" << dst.sel() << "\n";
561
562 io.set_gpr(dst.sel());
563
564 auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
565
566 load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
567
568 /* These results are expected starting in slot x..*/
569 if (in_var->data.location_frac > 0) {
570 int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
571 instr->dest.reg.reg->num_components;
572 AluInstruction *ir = nullptr;
573 for (int i = 0; i < n; ++i) {
574 ir = new AluInstruction(op1_mov, dst[i],
575 dst[i + in_var->data.location_frac], {alu_write});
576 emit_instruction(ir);
577 }
578 if (ir)
579 ir->set_flag(alu_last_instr);
580 }
581
582
583 if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
584
585 auto & color_input = static_cast<ShaderInputColor&> (io);
586 auto& bgio = m_shaderio.input(color_input.back_color_input_index());
587
588 bgio.set_gpr(allocate_temp_register());
589
590 GPRVector bgcol(bgio.gpr(), {0,1,2,3});
591 load_interpolated(bgcol, bgio, ip, num_components, 0);
592
593 load_front_face();
594
595 AluInstruction *ir = nullptr;
596 for (unsigned i = 0; i < 4 ; ++i) {
597 ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
598 emit_instruction(ir);
599 }
600 if (ir)
601 ir->set_flag(alu_last_instr);
602 }
603
604 return true;
605 }
606
607 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
608 ShaderInput& io, const Interpolator &ip,
609 int num_components, int start_comp)
610 {
611 // replace io with ShaderInputVarying
612 if (io.interpolate() > 0) {
613
614 sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
615
616 if (num_components == 1) {
617 switch (start_comp) {
618 case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
619 case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
620 case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
621 case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
622 default:
623 assert(0);
624 }
625 }
626
627 if (num_components == 2) {
628 switch (start_comp) {
629 case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
630 case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
631 case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
632 load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
633 default:
634 assert(0);
635 }
636 }
637
638 if (num_components == 3 && start_comp == 0)
639 return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
640 load_interpolated_one_comp(dest, io, ip, op2_interp_z);
641
642 int full_write_mask = ((1 << num_components) - 1) << start_comp;
643
644 bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
645 success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
646 return success;
647
648 } else {
649 AluInstruction *ir = nullptr;
650 for (unsigned i = 0; i < 4 ; ++i) {
651 ir = new AluInstruction(op1_interp_load_p0, dest[i],
652 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
653 EmitInstruction::write);
654 emit_instruction(ir);
655 }
656 ir->set_flag(alu_last_instr);
657 }
658 return true;
659 }
660
661 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
662 ShaderInput& io, const Interpolator& ip, EAluOp op)
663 {
664 for (unsigned i = 0; i < 2 ; ++i) {
665 int chan = i;
666 if (op == op2_interp_z)
667 chan += 2;
668
669
670 auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
671 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
672 i == 0 ? EmitInstruction::write : EmitInstruction::last);
673 dest.pin_to_channel(chan);
674
675 ir->set_bank_swizzle(alu_vec_210);
676 emit_instruction(ir);
677 }
678 return true;
679 }
680
681 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
682 const Interpolator& ip, EAluOp op, int writemask)
683 {
684 AluInstruction *ir = nullptr;
685 for (unsigned i = 0; i < 4 ; ++i) {
686 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
687 (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
688 dest.pin_to_channel(i);
689 ir->set_bank_swizzle(alu_vec_210);
690 emit_instruction(ir);
691 }
692 ir->set_flag(alu_last_instr);
693 return true;
694 }
695
696 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
697 ShaderInput& io, const Interpolator& ip,
698 EAluOp op, UNUSED int start, int comp)
699 {
700 AluInstruction *ir = nullptr;
701 for (int i = 0; i < 4 ; ++i) {
702 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
703 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
704 i == comp ? EmitInstruction::write : EmitInstruction::empty);
705 ir->set_bank_swizzle(alu_vec_210);
706 dest.pin_to_channel(i);
707 emit_instruction(ir);
708 }
709 ir->set_flag(alu_last_instr);
710 return true;
711 }
712
713
714 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, bool all_chanels)
715 {
716 int outputs = all_chanels ? m_max_color_exports : 1;
717
718 std::array<uint32_t,4> swizzle;
719 unsigned writemask = nir_intrinsic_write_mask(instr);
720 switch (out_var->data.location) {
721 case FRAG_RESULT_DEPTH:
722 writemask = 1;
723 swizzle = {0,7,7,7};
724 break;
725 case FRAG_RESULT_STENCIL:
726 writemask = 2;
727 swizzle = {7,0,7,7};
728 break;
729 case FRAG_RESULT_SAMPLE_MASK:
730 writemask = 4;
731 swizzle = {7,7,0,7};
732 break;
733 default:
734 for (int i = 0; i < 4; ++i) {
735 swizzle[i] = (i < instr->num_components) ? i : 7;
736 }
737 }
738
739 auto value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
740
741 set_output(out_var->data.driver_location, value.sel());
742
743 if (out_var->data.location == FRAG_RESULT_COLOR ||
744 (out_var->data.location >= FRAG_RESULT_DATA0 &&
745 out_var->data.location <= FRAG_RESULT_DATA7)) {
746 for (int k = 0 ; k < outputs; ++k) {
747
748 unsigned location = out_var->data.driver_location + k - m_depth_exports;
749 if (location >= m_max_color_exports) {
750 sfn_log << SfnLog::io << "Pixel output " << location
751 << " skipped because we have only " << m_max_color_exports << "CBs\n";
752 continue;
753 }
754
755 m_last_pixel_export = new ExportInstruction(location, value, ExportInstruction::et_pixel);
756
757 if (sh_info().ps_export_highest < location)
758 sh_info().ps_export_highest = location;
759
760 sh_info().nr_ps_color_exports++;
761
762 unsigned mask = (0xfu << (location * 4));
763 sh_info().ps_color_export_mask |= mask;
764
765 emit_export_instruction(m_last_pixel_export);
766 ++m_max_counted_color_exports;
767 };
768 } else if (out_var->data.location == FRAG_RESULT_DEPTH ||
769 out_var->data.location == FRAG_RESULT_STENCIL ||
770 out_var->data.location == FRAG_RESULT_SAMPLE_MASK) {
771 m_depth_exports++;
772 emit_export_instruction(new ExportInstruction(61, value, ExportInstruction::et_pixel));
773 } else {
774 return false;
775 }
776 return true;
777 }
778
779 void FragmentShaderFromNir::do_finalize()
780 {
781 // update shader io info and set LDS etc.
782 sh_info().ninput = m_shaderio.inputs().size();
783
784 sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
785 for (size_t i = 0; i < sh_info().ninput; ++i) {
786 int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
787 m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
788 m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
789 }
790
791 sh_info().two_side = m_shaderio.two_sided();
792 sh_info().nlds = m_shaderio.nlds();
793
794 sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
795
796 if (sh_info().fs_write_all) {
797 sh_info().nr_ps_max_color_exports = m_max_color_exports;
798 }
799
800 if (!m_last_pixel_export) {
801 GPRVector v(0, {7,7,7,7});
802 m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
803 sh_info().nr_ps_color_exports++;
804 sh_info().ps_color_export_mask = 0xf;
805 emit_export_instruction(m_last_pixel_export);
806 }
807
808 m_last_pixel_export->set_last();
809
810 if (sh_info().fs_write_all)
811 sh_info().nr_ps_max_color_exports = 8;
812 }
813
814 }