r600/nir: Pin interpolation results to channel
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_fragment.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31
32 namespace r600 {
33
34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35 r600_shader& sh,
36 r600_pipe_shader_selector &sel,
37 const r600_shader_key &key):
38 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size),
39 m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
40 m_max_counted_color_exports(0),
41 m_two_sided_color(key.ps.color_two_side),
42 m_last_pixel_export(nullptr),
43 m_nir(nir),
44 m_reserved_registers(0),
45 m_frag_pos_index(0),
46 m_need_back_color(false),
47 m_front_face_loaded(false),
48 m_depth_exports(0),
49 m_enable_centroid_interpolators(false)
50 {
51 for (auto& i: m_interpolator) {
52 i.enabled = false;
53 i.ij_index= 0;
54 }
55
56 sh_info().rat_base = key.ps.nr_cbufs;
57 sh_info().atomic_base = key.ps.first_atomic_counter;
58 }
59
60 bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
61 {
62 sfn_log << SfnLog::io << "Parse input variable "
63 << input->name << " location:" << input->data.location
64 << " driver-loc:" << input->data.driver_location
65 << " interpolation:" << input->data.interpolation
66 << "\n";
67
68 unsigned name, sid;
69
70 if (input->data.location == VARYING_SLOT_FACE) {
71 m_sv_values.set(es_face);
72 return true;
73 }
74
75 tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(input->data.location),
76 true, &name, &sid);
77
78 /* Work around the mixed tgsi/nir semantic problems, this fixes
79 * dEQP-GLES2.functional.shaders.builtin_variable.pointcoord */
80 if (input->data.location == VARYING_SLOT_PNTC) {
81 name = TGSI_SEMANTIC_GENERIC;
82 sid = 8;
83 }
84
85 tgsi_semantic sname = static_cast<tgsi_semantic>(name);
86
87 switch (sname) {
88 case TGSI_SEMANTIC_POSITION: {
89 m_sv_values.set(es_pos);
90 return true;
91 }
92 case TGSI_SEMANTIC_COLOR: {
93 m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
94 m_need_back_color = m_two_sided_color;
95 return true;
96 }
97 case TGSI_SEMANTIC_PRIMID:
98 sh_info().gs_prim_id_input = true;
99 sh_info().ps_prim_id_input = m_shaderio.inputs().size();
100 /* fallthrough */
101 case TGSI_SEMANTIC_FOG:
102 case TGSI_SEMANTIC_GENERIC:
103 case TGSI_SEMANTIC_TEXCOORD:
104 case TGSI_SEMANTIC_LAYER:
105 case TGSI_SEMANTIC_PCOORD:
106 case TGSI_SEMANTIC_VIEWPORT_INDEX:
107 case TGSI_SEMANTIC_CLIPDIST: {
108 if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
109 m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
110 return true;
111 }
112 default:
113 return false;
114 }
115 }
116
117 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
118 {
119 switch (instr->type) {
120 case nir_instr_type_intrinsic: {
121 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
122 switch (ii->intrinsic) {
123 case nir_intrinsic_load_front_face:
124 m_sv_values.set(es_face);
125 break;
126 case nir_intrinsic_load_sample_mask_in:
127 m_sv_values.set(es_sample_mask_in);
128 break;
129 case nir_intrinsic_load_sample_id:
130 m_sv_values.set(es_sample_id);
131 break;
132 case nir_intrinsic_interp_deref_at_centroid:
133 /* This is not a sysvalue, should go elsewhere */
134 m_enable_centroid_interpolators = true;
135 break;
136 default:
137 ;
138 }
139 }
140 default:
141 ;
142 }
143 return true;
144 }
145
146 bool FragmentShaderFromNir::allocate_reserved_registers()
147 {
148 assert(!m_reserved_registers);
149
150 int face_reg_index = -1;
151 // enabled interpolators based on inputs
152 for (auto& i: m_shaderio.inputs()) {
153 int ij = i->ij_index();
154 if (ij >= 0) {
155 m_interpolator[ij].enabled = true;
156 }
157 }
158
159 /* Lazy, enable both possible interpolators,
160 * TODO: check which ones are really needed */
161 if (m_enable_centroid_interpolators) {
162 m_interpolator[2].enabled = true; /* perspective */
163 m_interpolator[5].enabled = true; /* linear */
164 }
165
166 // sort the varying inputs
167 m_shaderio.sort_varying_inputs();
168
169 // handle interpolators
170 int num_baryc = 0;
171 for (int i = 0; i < 6; ++i) {
172 if (m_interpolator[i].enabled) {
173 sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
174
175 m_interpolator[i].ij_index = num_baryc;
176
177 unsigned sel = num_baryc / 2;
178 unsigned chan = 2 * (num_baryc % 2);
179
180 auto ip_i = new GPRValue(sel, chan + 1);
181 ip_i->set_as_input();
182 m_interpolator[i].i.reset(ip_i);
183 inject_register(sel, chan + 1, m_interpolator[i].i, false);
184
185 auto ip_j = new GPRValue(sel, chan);
186 ip_j->set_as_input();
187 m_interpolator[i].j.reset(ip_j);
188 inject_register(sel, chan, m_interpolator[i].j, false);
189
190 ++num_baryc;
191 }
192 }
193 m_reserved_registers += (num_baryc + 1) >> 1;
194
195 if (m_sv_values.test(es_pos)) {
196 m_frag_pos_index = m_reserved_registers++;
197 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
198 }
199
200 // handle system values
201 if (m_sv_values.test(es_face) || m_need_back_color) {
202 face_reg_index = m_reserved_registers++;
203 auto ffr = new GPRValue(face_reg_index,0);
204 ffr->set_as_input();
205 m_front_face_reg.reset(ffr);
206 sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
207 inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false);
208
209 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
210 load_front_face();
211 }
212
213 if (m_sv_values.test(es_sample_mask_in)) {
214 if (face_reg_index < 0)
215 face_reg_index = m_reserved_registers++;
216
217 auto smi = new GPRValue(face_reg_index,2);
218 smi->set_as_input();
219 m_sample_mask_reg.reset(smi);
220 sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
221 //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
222 sh_info().nsys_inputs = 1;
223 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
224 }
225
226 if (m_sv_values.test(es_sample_id)) {
227 if (face_reg_index < 0)
228 face_reg_index = m_reserved_registers++;
229
230 auto smi = new GPRValue(face_reg_index, 3);
231 smi->set_as_input();
232 m_sample_id_reg.reset(smi);
233 sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
234 sh_info().nsys_inputs++;
235 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, face_reg_index));
236 }
237
238 // The back color handling is not emmited in the code, so we have
239 // to add the inputs here and later we also need to inject the code to set
240 // the right color
241 if (m_need_back_color) {
242 size_t ninputs = m_shaderio.inputs().size();
243 for (size_t k = 0; k < ninputs; ++k) {
244 ShaderInput& i = m_shaderio.input(k);
245
246 if (i.name() != TGSI_SEMANTIC_COLOR)
247 continue;
248
249 ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
250
251 size_t next_pos = m_shaderio.size();
252 auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
253 m_shaderio.add_input(bcol);
254 col.set_back_color(next_pos);
255 }
256 m_shaderio.set_two_sided();
257 }
258
259 m_shaderio.update_lds_pos();
260
261 set_reserved_registers(m_reserved_registers);
262
263 return true;
264 }
265
266 void FragmentShaderFromNir::emit_shader_start()
267 {
268 if (m_sv_values.test(es_face))
269 load_front_face();
270
271 if (m_sv_values.test(es_pos)) {
272 for (int i = 0; i < 4; ++i) {
273 auto v = new GPRValue(m_frag_pos_index, i);
274 v->set_as_input();
275 auto reg = PValue(v);
276 if (i == 3)
277 emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
278 m_frag_pos[i] = reg;
279 }
280 }
281 }
282
283 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
284 {
285 if (out_var->data.location == FRAG_RESULT_COLOR)
286 return emit_export_pixel(out_var, instr, true);
287
288 if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
289 out_var->data.location <= FRAG_RESULT_DATA7) ||
290 out_var->data.location == FRAG_RESULT_DEPTH ||
291 out_var->data.location == FRAG_RESULT_STENCIL)
292 return emit_export_pixel(out_var, instr, false);
293
294 sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
295 out_var->data.location << "(" << out_var->data.driver_location << ")\n";
296 return false;
297 }
298
299 bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
300 {
301 sfn_log << SfnLog::instr << "Parse output variable "
302 << output->name << " @" << output->data.location
303 << "@dl:" << output->data.driver_location << "\n";
304
305 ++sh_info().noutput;
306 r600_shader_io& io = sh_info().output[output->data.driver_location];
307 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
308 &io.name, &io.sid);
309
310 /* Check whether this code has become obsolete by the IO vectorization */
311 unsigned num_components = 4;
312 unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
313 if (vector_elements)
314 num_components = vector_elements;
315 unsigned component = output->data.location_frac;
316
317 for (unsigned j = component; j < num_components + component; j++)
318 io.write_mask |= 1 << j;
319
320 int loc = output->data.location;
321 if (loc == FRAG_RESULT_COLOR &&
322 (m_nir.info.outputs_written & (1ull << loc))) {
323 sh_info().fs_write_all = true;
324 }
325
326 if (output->data.location == FRAG_RESULT_COLOR ||
327 (output->data.location >= FRAG_RESULT_DATA0 &&
328 output->data.location <= FRAG_RESULT_DATA7)) {
329 return true;
330 }
331 if (output->data.location == FRAG_RESULT_DEPTH ||
332 output->data.location == FRAG_RESULT_STENCIL) {
333 io.write_mask = 15;
334 return true;
335 }
336
337 return false;
338 }
339
340 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
341 {
342 switch (instr->intrinsic) {
343 case nir_intrinsic_load_sample_mask_in:
344 return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
345 case nir_intrinsic_load_sample_id:
346 return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
347 case nir_intrinsic_load_front_face:
348 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
349 case nir_intrinsic_interp_deref_at_sample:
350 return emit_interp_deref_at_sample(instr);
351 case nir_intrinsic_interp_deref_at_offset:
352 return emit_interp_deref_at_offset(instr);
353 case nir_intrinsic_interp_deref_at_centroid:
354 return emit_interp_deref_at_centroid(instr);
355 default:
356 return false;
357 }
358 }
359
360 void FragmentShaderFromNir::load_front_face()
361 {
362 assert(m_front_face_reg);
363 if (m_front_face_loaded)
364 return;
365
366 auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
367 Value::zero, {alu_write, alu_last_instr});
368 m_front_face_loaded = true;
369 emit_instruction(ir);
370 }
371
372 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
373 {
374 GPRVector slope = get_temp_vec4();
375
376 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
377 from_nir_with_fetch_constant(instr->src[1], 0),
378 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
379 fetch->set_flag(vtx_srf_mode);
380 emit_instruction(fetch);
381
382 GPRVector grad = get_temp_vec4();
383 auto var = get_deref_location(instr->src[0]);
384 assert(var);
385
386 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
387 auto interpolator = m_interpolator[io.ij_index()];
388 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
389
390 GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
391
392 auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
393 tex->set_dest_swizzle({0,1,7,7});
394 emit_instruction(tex);
395
396 tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
397 tex->set_dest_swizzle({7,7,0,1});
398 emit_instruction(tex);
399
400 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
401 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
402
403 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
404 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
405
406 Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
407
408 auto dst = vec_from_nir(instr->dest, 4);
409 int num_components = instr->dest.is_ssa ?
410 instr->dest.ssa.num_components:
411 instr->dest.reg.reg->num_components;
412
413 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
414
415 return true;
416 }
417
418 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
419 {
420 int temp = allocate_temp_register();
421
422 GPRVector help(temp, {0,1,2,3});
423
424 auto var = get_deref_location(instr->src[0]);
425 assert(var);
426
427 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
428 auto interpolator = m_interpolator[io.ij_index()];
429 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
430
431 GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
432
433 auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
434 getgradh->set_dest_swizzle({0,1,7,7});
435 getgradh->set_flag(TexInstruction::x_unnormalized);
436 getgradh->set_flag(TexInstruction::y_unnormalized);
437 getgradh->set_flag(TexInstruction::z_unnormalized);
438 getgradh->set_flag(TexInstruction::w_unnormalized);
439 emit_instruction(getgradh);
440
441 auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
442 getgradv->set_dest_swizzle({7,7,0,1});
443 getgradv->set_flag(TexInstruction::x_unnormalized);
444 getgradv->set_flag(TexInstruction::y_unnormalized);
445 getgradv->set_flag(TexInstruction::z_unnormalized);
446 getgradv->set_flag(TexInstruction::w_unnormalized);
447 emit_instruction(getgradv);
448
449 PValue ofs_x = from_nir(instr->src[1], 0);
450 PValue ofs_y = from_nir(instr->src[1], 1);
451 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
452 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
453 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
454 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
455
456 Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
457
458 auto dst = vec_from_nir(instr->dest, 4);
459 load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
460 var->data.location_frac);
461
462 return true;
463 }
464
465 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
466 {
467 auto var = get_deref_location(instr->src[0]);
468 assert(var);
469
470 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
471 io.set_uses_interpolate_at_centroid();
472
473 int ij_index = io.ij_index() >= 3 ? 5 : 2;
474 assert (m_interpolator[ij_index].enabled);
475 auto ip = m_interpolator[ij_index];
476
477 int num_components = nir_dest_num_components(instr->dest);
478
479 auto dst = vec_from_nir(instr->dest, 4);
480 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
481 return true;
482 }
483
484
485 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
486 {
487 if (in_var->data.location == VARYING_SLOT_POS) {
488 assert(instr->dest.is_ssa);
489
490 for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
491 inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
492 }
493 return true;
494 }
495
496 if (in_var->data.location == VARYING_SLOT_FACE)
497 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
498
499 // todo: replace io with ShaderInputVarying
500 auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
501 unsigned num_components = 4;
502
503
504 if (instr->dest.is_ssa) {
505 num_components = instr->dest.ssa.num_components;
506 } else {
507 num_components = instr->dest.reg.reg->num_components;
508 }
509
510 auto dst = vec_from_nir(instr->dest, 4);
511
512 sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
513 << "].gpr=" << dst.sel() << "\n";
514
515 io.set_gpr(dst.sel());
516
517 auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
518
519 load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
520
521 /* These results are expected starting in slot x..*/
522 if (in_var->data.location_frac > 0) {
523 int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
524 instr->dest.reg.reg->num_components;
525 AluInstruction *ir = nullptr;
526 for (int i = 0; i < n; ++i) {
527 ir = new AluInstruction(op1_mov, dst[i],
528 dst[i + in_var->data.location_frac], {alu_write});
529 emit_instruction(ir);
530 }
531 if (ir)
532 ir->set_flag(alu_last_instr);
533 }
534
535
536 if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
537
538 auto & color_input = static_cast<ShaderInputColor&> (io);
539 auto& bgio = m_shaderio.input(color_input.back_color_input_index());
540
541 bgio.set_gpr(allocate_temp_register());
542
543 GPRVector bgcol(bgio.gpr(), {0,1,2,3});
544 load_interpolated(bgcol, bgio, ip, num_components, 0);
545
546 load_front_face();
547
548 AluInstruction *ir = nullptr;
549 for (unsigned i = 0; i < 4 ; ++i) {
550 ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
551 emit_instruction(ir);
552 }
553 if (ir)
554 ir->set_flag(alu_last_instr);
555 }
556
557 return true;
558 }
559
560 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
561 ShaderInput& io, const Interpolator &ip,
562 int num_components, int start_comp)
563 {
564 // replace io with ShaderInputVarying
565 if (io.interpolate() > 0) {
566
567 sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
568
569 if (num_components == 1) {
570 switch (start_comp) {
571 case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
572 case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
573 case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
574 case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
575 default:
576 assert(0);
577 }
578 }
579
580 if (num_components == 2) {
581 switch (start_comp) {
582 case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
583 case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
584 case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
585 load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
586 default:
587 assert(0);
588 }
589 }
590
591 if (num_components == 3 && start_comp == 0)
592 return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
593 load_interpolated_one_comp(dest, io, ip, op2_interp_z);
594
595 int full_write_mask = ((1 << num_components) - 1) << start_comp;
596
597 bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
598 success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
599 return success;
600
601 } else {
602 AluInstruction *ir = nullptr;
603 for (unsigned i = 0; i < 4 ; ++i) {
604 ir = new AluInstruction(op1_interp_load_p0, dest[i],
605 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
606 EmitInstruction::write);
607 emit_instruction(ir);
608 }
609 ir->set_flag(alu_last_instr);
610 }
611 return true;
612 }
613
614 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
615 ShaderInput& io, const Interpolator& ip, EAluOp op)
616 {
617 for (unsigned i = 0; i < 2 ; ++i) {
618 int chan = i;
619 if (op == op2_interp_z)
620 chan += 2;
621
622
623 auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
624 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
625 i == 0 ? EmitInstruction::write : EmitInstruction::last);
626 dest.pin_to_channel(chan);
627
628 ir->set_bank_swizzle(alu_vec_210);
629 emit_instruction(ir);
630 }
631 return true;
632 }
633
634 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
635 const Interpolator& ip, EAluOp op, int writemask)
636 {
637 AluInstruction *ir = nullptr;
638 for (unsigned i = 0; i < 4 ; ++i) {
639 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
640 (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
641 dest.pin_to_channel(i);
642 ir->set_bank_swizzle(alu_vec_210);
643 emit_instruction(ir);
644 }
645 ir->set_flag(alu_last_instr);
646 return true;
647 }
648
649 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
650 ShaderInput& io, const Interpolator& ip,
651 EAluOp op, UNUSED int start, int comp)
652 {
653 AluInstruction *ir = nullptr;
654 for (int i = 0; i < 4 ; ++i) {
655 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
656 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
657 i == comp ? EmitInstruction::write : EmitInstruction::empty);
658 ir->set_bank_swizzle(alu_vec_210);
659 dest.pin_to_channel(i);
660 emit_instruction(ir);
661 }
662 ir->set_flag(alu_last_instr);
663 return true;
664 }
665
666
667 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, bool all_chanels)
668 {
669 int outputs = all_chanels ? m_max_color_exports : 1;
670
671 std::array<uint32_t,4> swizzle;
672 unsigned writemask = nir_intrinsic_write_mask(instr);
673 if (out_var->data.location != FRAG_RESULT_STENCIL) {
674 for (int i = 0; i < 4; ++i) {
675 swizzle[i] = (i < instr->num_components) ? i : 7;
676 }
677 } else {
678 swizzle = {7,0,7,7};
679 }
680
681 GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
682
683 set_output(out_var->data.driver_location, PValue(value));
684
685 if (out_var->data.location == FRAG_RESULT_COLOR ||
686 (out_var->data.location >= FRAG_RESULT_DATA0 &&
687 out_var->data.location <= FRAG_RESULT_DATA7)) {
688 for (int k = 0 ; k < outputs; ++k) {
689
690 unsigned location = out_var->data.driver_location + k - m_depth_exports;
691 if (location >= m_max_color_exports) {
692 sfn_log << SfnLog::io << "Pixel output " << location
693 << " skipped because we have only " << m_max_color_exports << "CBs\n";
694 continue;
695 }
696
697 m_last_pixel_export = new ExportInstruction(location, *value, ExportInstruction::et_pixel);
698
699 if (sh_info().ps_export_highest < location)
700 sh_info().ps_export_highest = location;
701
702 sh_info().nr_ps_color_exports++;
703
704 unsigned mask = (0xfu << (location * 4));
705 sh_info().ps_color_export_mask |= mask;
706
707 emit_export_instruction(m_last_pixel_export);
708 ++m_max_counted_color_exports;
709 };
710 } else if (out_var->data.location == FRAG_RESULT_DEPTH) {
711 m_depth_exports++;
712 emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
713 } else if (out_var->data.location == FRAG_RESULT_STENCIL) {
714 m_depth_exports++;
715 emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
716 } else {
717 return false;
718 }
719
720 return true;
721 }
722
723 void FragmentShaderFromNir::do_finalize()
724 {
725 // update shader io info and set LDS etc.
726 sh_info().ninput = m_shaderio.inputs().size();
727
728 sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
729 for (size_t i = 0; i < sh_info().ninput; ++i) {
730 int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
731 m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
732 m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
733 }
734
735 sh_info().two_side = m_shaderio.two_sided();
736 sh_info().nlds = m_shaderio.nlds();
737
738 sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
739
740 if (sh_info().fs_write_all) {
741 sh_info().nr_ps_max_color_exports = m_max_color_exports;
742 }
743
744 if (!m_last_pixel_export) {
745 GPRVector v(0, {7,7,7,7});
746 m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
747 sh_info().nr_ps_color_exports++;
748 sh_info().ps_color_export_mask = 0xf;
749 emit_export_instruction(m_last_pixel_export);
750 }
751
752 m_last_pixel_export->set_last();
753
754 if (sh_info().fs_write_all)
755 sh_info().nr_ps_max_color_exports = 8;
756 }
757
758 }