r600/sfn: plumb the chip class into the instruction emission
[mesa.git] / src / gallium / drivers / r600 / sfn / sfn_shader_fragment.cpp
1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "pipe/p_defines.h"
28 #include "tgsi/tgsi_from_mesa.h"
29 #include "sfn_shader_fragment.h"
30 #include "sfn_instruction_fetch.h"
31
32 namespace r600 {
33
34 FragmentShaderFromNir::FragmentShaderFromNir(const nir_shader& nir,
35 r600_shader& sh,
36 r600_pipe_shader_selector &sel,
37 const r600_shader_key &key,
38 enum chip_class chip_class):
39 ShaderFromNirProcessor(PIPE_SHADER_FRAGMENT, sel, sh, nir.scratch_size, chip_class),
40 m_max_color_exports(MAX2(key.ps.nr_cbufs,1)),
41 m_max_counted_color_exports(0),
42 m_two_sided_color(key.ps.color_two_side),
43 m_last_pixel_export(nullptr),
44 m_nir(nir),
45 m_reserved_registers(0),
46 m_frag_pos_index(0),
47 m_need_back_color(false),
48 m_front_face_loaded(false),
49 m_depth_exports(0),
50 m_enable_centroid_interpolators(false)
51 {
52 for (auto& i: m_interpolator) {
53 i.enabled = false;
54 i.ij_index= 0;
55 }
56
57 sh_info().rat_base = key.ps.nr_cbufs;
58 sh_info().atomic_base = key.ps.first_atomic_counter;
59 }
60
61 bool FragmentShaderFromNir::do_process_inputs(nir_variable *input)
62 {
63 sfn_log << SfnLog::io << "Parse input variable "
64 << input->name << " location:" << input->data.location
65 << " driver-loc:" << input->data.driver_location
66 << " interpolation:" << input->data.interpolation
67 << "\n";
68
69 unsigned name, sid;
70
71 if (input->data.location == VARYING_SLOT_FACE) {
72 m_sv_values.set(es_face);
73 return true;
74 }
75
76 tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(input->data.location),
77 true, &name, &sid);
78
79 /* Work around the mixed tgsi/nir semantic problems, this fixes
80 * dEQP-GLES2.functional.shaders.builtin_variable.pointcoord */
81 if (input->data.location == VARYING_SLOT_PNTC) {
82 name = TGSI_SEMANTIC_GENERIC;
83 sid = 8;
84 }
85
86 tgsi_semantic sname = static_cast<tgsi_semantic>(name);
87
88 switch (sname) {
89 case TGSI_SEMANTIC_POSITION: {
90 m_sv_values.set(es_pos);
91 return true;
92 }
93 case TGSI_SEMANTIC_COLOR: {
94 m_shaderio.add_input(new ShaderInputColor(sname, sid, input));
95 m_need_back_color = m_two_sided_color;
96 return true;
97 }
98 case TGSI_SEMANTIC_PRIMID:
99 sh_info().gs_prim_id_input = true;
100 sh_info().ps_prim_id_input = m_shaderio.inputs().size();
101 /* fallthrough */
102 case TGSI_SEMANTIC_FOG:
103 case TGSI_SEMANTIC_GENERIC:
104 case TGSI_SEMANTIC_TEXCOORD:
105 case TGSI_SEMANTIC_LAYER:
106 case TGSI_SEMANTIC_PCOORD:
107 case TGSI_SEMANTIC_VIEWPORT_INDEX:
108 case TGSI_SEMANTIC_CLIPDIST: {
109 if (!m_shaderio.find_varying(sname, sid, input->data.location_frac))
110 m_shaderio.add_input(new ShaderInputVarying(sname, sid, input));
111 return true;
112 }
113 default:
114 return false;
115 }
116 }
117
118 bool FragmentShaderFromNir::scan_sysvalue_access(nir_instr *instr)
119 {
120 switch (instr->type) {
121 case nir_instr_type_intrinsic: {
122 nir_intrinsic_instr *ii = nir_instr_as_intrinsic(instr);
123 switch (ii->intrinsic) {
124 case nir_intrinsic_load_front_face:
125 m_sv_values.set(es_face);
126 break;
127 case nir_intrinsic_load_sample_mask_in:
128 m_sv_values.set(es_sample_mask_in);
129 break;
130 case nir_intrinsic_load_sample_id:
131 m_sv_values.set(es_sample_id);
132 break;
133 case nir_intrinsic_interp_deref_at_centroid:
134 /* This is not a sysvalue, should go elsewhere */
135 m_enable_centroid_interpolators = true;
136 break;
137 default:
138 ;
139 }
140 }
141 default:
142 ;
143 }
144 return true;
145 }
146
147 bool FragmentShaderFromNir::allocate_reserved_registers()
148 {
149 assert(!m_reserved_registers);
150
151 int face_reg_index = -1;
152 // enabled interpolators based on inputs
153 for (auto& i: m_shaderio.inputs()) {
154 int ij = i->ij_index();
155 if (ij >= 0) {
156 m_interpolator[ij].enabled = true;
157 }
158 }
159
160 /* Lazy, enable both possible interpolators,
161 * TODO: check which ones are really needed */
162 if (m_enable_centroid_interpolators) {
163 m_interpolator[2].enabled = true; /* perspective */
164 m_interpolator[5].enabled = true; /* linear */
165 }
166
167 // sort the varying inputs
168 m_shaderio.sort_varying_inputs();
169
170 // handle interpolators
171 int num_baryc = 0;
172 for (int i = 0; i < 6; ++i) {
173 if (m_interpolator[i].enabled) {
174 sfn_log << SfnLog::io << "Interpolator " << i << " is enabled\n";
175
176 m_interpolator[i].ij_index = num_baryc;
177
178 unsigned sel = num_baryc / 2;
179 unsigned chan = 2 * (num_baryc % 2);
180
181 auto ip_i = new GPRValue(sel, chan + 1);
182 ip_i->set_as_input();
183 m_interpolator[i].i.reset(ip_i);
184 inject_register(sel, chan + 1, m_interpolator[i].i, false);
185
186 auto ip_j = new GPRValue(sel, chan);
187 ip_j->set_as_input();
188 m_interpolator[i].j.reset(ip_j);
189 inject_register(sel, chan, m_interpolator[i].j, false);
190
191 ++num_baryc;
192 }
193 }
194 m_reserved_registers += (num_baryc + 1) >> 1;
195
196 if (m_sv_values.test(es_pos)) {
197 m_frag_pos_index = m_reserved_registers++;
198 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_POSITION, m_frag_pos_index));
199 }
200
201 // handle system values
202 if (m_sv_values.test(es_face) || m_need_back_color) {
203 face_reg_index = m_reserved_registers++;
204 auto ffr = new GPRValue(face_reg_index,0);
205 ffr->set_as_input();
206 m_front_face_reg.reset(ffr);
207 sfn_log << SfnLog::io << "Set front_face register to " << *m_front_face_reg << "\n";
208 inject_register(ffr->sel(), ffr->chan(), m_front_face_reg, false);
209
210 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_FACE, face_reg_index));
211 load_front_face();
212 }
213
214 if (m_sv_values.test(es_sample_mask_in)) {
215 if (face_reg_index < 0)
216 face_reg_index = m_reserved_registers++;
217
218 auto smi = new GPRValue(face_reg_index,2);
219 smi->set_as_input();
220 m_sample_mask_reg.reset(smi);
221 sfn_log << SfnLog::io << "Set sample mask in register to " << *m_sample_mask_reg << "\n";
222 //inject_register(smi->sel(), smi->chan(), m_sample_mask_reg, false);
223 sh_info().nsys_inputs = 1;
224 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEMASK, face_reg_index));
225 }
226
227 if (m_sv_values.test(es_sample_id)) {
228 if (face_reg_index < 0)
229 face_reg_index = m_reserved_registers++;
230
231 auto smi = new GPRValue(face_reg_index, 3);
232 smi->set_as_input();
233 m_sample_id_reg.reset(smi);
234 sfn_log << SfnLog::io << "Set sample id register to " << *m_sample_id_reg << "\n";
235 sh_info().nsys_inputs++;
236 m_shaderio.add_input(new ShaderInputSystemValue(TGSI_SEMANTIC_SAMPLEID, face_reg_index));
237 }
238
239 // The back color handling is not emmited in the code, so we have
240 // to add the inputs here and later we also need to inject the code to set
241 // the right color
242 if (m_need_back_color) {
243 size_t ninputs = m_shaderio.inputs().size();
244 for (size_t k = 0; k < ninputs; ++k) {
245 ShaderInput& i = m_shaderio.input(k);
246
247 if (i.name() != TGSI_SEMANTIC_COLOR)
248 continue;
249
250 ShaderInputColor& col = static_cast<ShaderInputColor&>(i);
251
252 size_t next_pos = m_shaderio.size();
253 auto bcol = new ShaderInputVarying(TGSI_SEMANTIC_BCOLOR, col, next_pos);
254 m_shaderio.add_input(bcol);
255 col.set_back_color(next_pos);
256 }
257 m_shaderio.set_two_sided();
258 }
259
260 m_shaderio.update_lds_pos();
261
262 set_reserved_registers(m_reserved_registers);
263
264 return true;
265 }
266
267 void FragmentShaderFromNir::emit_shader_start()
268 {
269 if (m_sv_values.test(es_face))
270 load_front_face();
271
272 if (m_sv_values.test(es_pos)) {
273 for (int i = 0; i < 4; ++i) {
274 auto v = new GPRValue(m_frag_pos_index, i);
275 v->set_as_input();
276 auto reg = PValue(v);
277 if (i == 3)
278 emit_instruction(new AluInstruction(op1_recip_ieee, reg, reg, {alu_write, alu_last_instr}));
279 m_frag_pos[i] = reg;
280 }
281 }
282 }
283
284 bool FragmentShaderFromNir::do_emit_store_deref(const nir_variable *out_var, nir_intrinsic_instr* instr)
285 {
286 if (out_var->data.location == FRAG_RESULT_COLOR)
287 return emit_export_pixel(out_var, instr, true);
288
289 if ((out_var->data.location >= FRAG_RESULT_DATA0 &&
290 out_var->data.location <= FRAG_RESULT_DATA7) ||
291 out_var->data.location == FRAG_RESULT_DEPTH ||
292 out_var->data.location == FRAG_RESULT_STENCIL)
293 return emit_export_pixel(out_var, instr, false);
294
295 sfn_log << SfnLog::err << "r600-NIR: Unimplemented store_deref for " <<
296 out_var->data.location << "(" << out_var->data.driver_location << ")\n";
297 return false;
298 }
299
300 bool FragmentShaderFromNir::do_process_outputs(nir_variable *output)
301 {
302 sfn_log << SfnLog::instr << "Parse output variable "
303 << output->name << " @" << output->data.location
304 << "@dl:" << output->data.driver_location << "\n";
305
306 ++sh_info().noutput;
307 r600_shader_io& io = sh_info().output[output->data.driver_location];
308 tgsi_get_gl_frag_result_semantic(static_cast<gl_frag_result>( output->data.location),
309 &io.name, &io.sid);
310
311 /* Check whether this code has become obsolete by the IO vectorization */
312 unsigned num_components = 4;
313 unsigned vector_elements = glsl_get_vector_elements(glsl_without_array(output->type));
314 if (vector_elements)
315 num_components = vector_elements;
316 unsigned component = output->data.location_frac;
317
318 for (unsigned j = component; j < num_components + component; j++)
319 io.write_mask |= 1 << j;
320
321 int loc = output->data.location;
322 if (loc == FRAG_RESULT_COLOR &&
323 (m_nir.info.outputs_written & (1ull << loc))) {
324 sh_info().fs_write_all = true;
325 }
326
327 if (output->data.location == FRAG_RESULT_COLOR ||
328 (output->data.location >= FRAG_RESULT_DATA0 &&
329 output->data.location <= FRAG_RESULT_DATA7)) {
330 return true;
331 }
332 if (output->data.location == FRAG_RESULT_DEPTH ||
333 output->data.location == FRAG_RESULT_STENCIL) {
334 io.write_mask = 15;
335 return true;
336 }
337
338 return false;
339 }
340
341 bool FragmentShaderFromNir::emit_intrinsic_instruction_override(nir_intrinsic_instr* instr)
342 {
343 switch (instr->intrinsic) {
344 case nir_intrinsic_load_sample_mask_in:
345 return load_preloaded_value(instr->dest, 0, m_sample_mask_reg);
346 case nir_intrinsic_load_sample_id:
347 return load_preloaded_value(instr->dest, 0, m_sample_id_reg);
348 case nir_intrinsic_load_front_face:
349 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
350 case nir_intrinsic_interp_deref_at_sample:
351 return emit_interp_deref_at_sample(instr);
352 case nir_intrinsic_interp_deref_at_offset:
353 return emit_interp_deref_at_offset(instr);
354 case nir_intrinsic_interp_deref_at_centroid:
355 return emit_interp_deref_at_centroid(instr);
356 default:
357 return false;
358 }
359 }
360
361 void FragmentShaderFromNir::load_front_face()
362 {
363 assert(m_front_face_reg);
364 if (m_front_face_loaded)
365 return;
366
367 auto ir = new AluInstruction(op2_setge_dx10, m_front_face_reg, m_front_face_reg,
368 Value::zero, {alu_write, alu_last_instr});
369 m_front_face_loaded = true;
370 emit_instruction(ir);
371 }
372
373 bool FragmentShaderFromNir::emit_interp_deref_at_sample(nir_intrinsic_instr* instr)
374 {
375 GPRVector slope = get_temp_vec4();
376
377 auto fetch = new FetchInstruction(vc_fetch, no_index_offset, slope,
378 from_nir_with_fetch_constant(instr->src[1], 0),
379 0, R600_BUFFER_INFO_CONST_BUFFER, PValue(), bim_none);
380 fetch->set_flag(vtx_srf_mode);
381 emit_instruction(fetch);
382
383 GPRVector grad = get_temp_vec4();
384 auto var = get_deref_location(instr->src[0]);
385 assert(var);
386
387 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
388 auto interpolator = m_interpolator[io.ij_index()];
389 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
390
391 GPRVector src({interpolator.j, interpolator.i, dummy, dummy});
392
393 auto tex = new TexInstruction(TexInstruction::get_gradient_h, grad, src, 0, 0, PValue());
394 tex->set_dest_swizzle({0,1,7,7});
395 emit_instruction(tex);
396
397 tex = new TexInstruction(TexInstruction::get_gradient_v, grad, src, 0, 0, PValue());
398 tex->set_dest_swizzle({7,7,0,1});
399 emit_instruction(tex);
400
401 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(0), slope.reg_i(2), interpolator.j}, {alu_write}));
402 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(1), slope.reg_i(2), interpolator.i}, {alu_write, alu_last_instr}));
403
404 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(0), {grad.reg_i(2), slope.reg_i(3), slope.reg_i(0)}, {alu_write}));
405 emit_instruction(new AluInstruction(op3_muladd, slope.reg_i(1), {grad.reg_i(3), slope.reg_i(3), slope.reg_i(1)}, {alu_write, alu_last_instr}));
406
407 Interpolator ip = {true, 0, slope.reg_i(1), slope.reg_i(0)};
408
409 auto dst = vec_from_nir(instr->dest, 4);
410 int num_components = instr->dest.is_ssa ?
411 instr->dest.ssa.num_components:
412 instr->dest.reg.reg->num_components;
413
414 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
415
416 return true;
417 }
418
419 bool FragmentShaderFromNir::emit_interp_deref_at_offset(nir_intrinsic_instr* instr)
420 {
421 int temp = allocate_temp_register();
422
423 GPRVector help(temp, {0,1,2,3});
424
425 auto var = get_deref_location(instr->src[0]);
426 assert(var);
427
428 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
429 auto interpolator = m_interpolator[io.ij_index()];
430 PValue dummy(new GPRValue(interpolator.i->sel(), 7));
431
432 GPRVector interp({interpolator.j, interpolator.i, dummy, dummy});
433
434 auto getgradh = new TexInstruction(TexInstruction::get_gradient_h, help, interp, 0, 0, PValue());
435 getgradh->set_dest_swizzle({0,1,7,7});
436 getgradh->set_flag(TexInstruction::x_unnormalized);
437 getgradh->set_flag(TexInstruction::y_unnormalized);
438 getgradh->set_flag(TexInstruction::z_unnormalized);
439 getgradh->set_flag(TexInstruction::w_unnormalized);
440 emit_instruction(getgradh);
441
442 auto getgradv = new TexInstruction(TexInstruction::get_gradient_v, help, interp, 0, 0, PValue());
443 getgradv->set_dest_swizzle({7,7,0,1});
444 getgradv->set_flag(TexInstruction::x_unnormalized);
445 getgradv->set_flag(TexInstruction::y_unnormalized);
446 getgradv->set_flag(TexInstruction::z_unnormalized);
447 getgradv->set_flag(TexInstruction::w_unnormalized);
448 emit_instruction(getgradv);
449
450 PValue ofs_x = from_nir(instr->src[1], 0);
451 PValue ofs_y = from_nir(instr->src[1], 1);
452 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(0), ofs_x, interpolator.j, {alu_write}));
453 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(1), ofs_x, interpolator.i, {alu_write, alu_last_instr}));
454 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(0), help.reg_i(2), ofs_y, help.reg_i(0), {alu_write}));
455 emit_instruction(new AluInstruction(op3_muladd, help.reg_i(1), help.reg_i(3), ofs_y, help.reg_i(1), {alu_write, alu_last_instr}));
456
457 Interpolator ip = {true, 0, help.reg_i(1), help.reg_i(0)};
458
459 auto dst = vec_from_nir(instr->dest, 4);
460 load_interpolated(dst, io, ip, nir_dest_num_components(instr->dest),
461 var->data.location_frac);
462
463 return true;
464 }
465
466 bool FragmentShaderFromNir::emit_interp_deref_at_centroid(nir_intrinsic_instr* instr)
467 {
468 auto var = get_deref_location(instr->src[0]);
469 assert(var);
470
471 auto& io = m_shaderio.input(var->data.driver_location, var->data.location_frac);
472 io.set_uses_interpolate_at_centroid();
473
474 int ij_index = io.ij_index() >= 3 ? 5 : 2;
475 assert (m_interpolator[ij_index].enabled);
476 auto ip = m_interpolator[ij_index];
477
478 int num_components = nir_dest_num_components(instr->dest);
479
480 auto dst = vec_from_nir(instr->dest, 4);
481 load_interpolated(dst, io, ip, num_components, var->data.location_frac);
482 return true;
483 }
484
485
486 bool FragmentShaderFromNir::do_emit_load_deref(const nir_variable *in_var, nir_intrinsic_instr* instr)
487 {
488 if (in_var->data.location == VARYING_SLOT_POS) {
489 assert(instr->dest.is_ssa);
490
491 for (int i = 0; i < instr->dest.ssa.num_components; ++i) {
492 inject_register(instr->dest.ssa.index, i, m_frag_pos[i], true);
493 }
494 return true;
495 }
496
497 if (in_var->data.location == VARYING_SLOT_FACE)
498 return load_preloaded_value(instr->dest, 0, m_front_face_reg);
499
500 // todo: replace io with ShaderInputVarying
501 auto& io = m_shaderio.input(in_var->data.driver_location, in_var->data.location_frac);
502 unsigned num_components = 4;
503
504
505 if (instr->dest.is_ssa) {
506 num_components = instr->dest.ssa.num_components;
507 } else {
508 num_components = instr->dest.reg.reg->num_components;
509 }
510
511 auto dst = vec_from_nir(instr->dest, 4);
512
513 sfn_log << SfnLog::io << "Set input[" << in_var->data.driver_location
514 << "].gpr=" << dst.sel() << "\n";
515
516 io.set_gpr(dst.sel());
517
518 auto& ip = io.interpolate() ? m_interpolator[io.ij_index()] : m_interpolator[0];
519
520 load_interpolated(dst, io, ip, num_components, in_var->data.location_frac);
521
522 /* These results are expected starting in slot x..*/
523 if (in_var->data.location_frac > 0) {
524 int n = instr->dest.is_ssa ? instr->dest.ssa.num_components :
525 instr->dest.reg.reg->num_components;
526 AluInstruction *ir = nullptr;
527 for (int i = 0; i < n; ++i) {
528 ir = new AluInstruction(op1_mov, dst[i],
529 dst[i + in_var->data.location_frac], {alu_write});
530 emit_instruction(ir);
531 }
532 if (ir)
533 ir->set_flag(alu_last_instr);
534 }
535
536
537 if (m_need_back_color && io.name() == TGSI_SEMANTIC_COLOR) {
538
539 auto & color_input = static_cast<ShaderInputColor&> (io);
540 auto& bgio = m_shaderio.input(color_input.back_color_input_index());
541
542 bgio.set_gpr(allocate_temp_register());
543
544 GPRVector bgcol(bgio.gpr(), {0,1,2,3});
545 load_interpolated(bgcol, bgio, ip, num_components, 0);
546
547 load_front_face();
548
549 AluInstruction *ir = nullptr;
550 for (unsigned i = 0; i < 4 ; ++i) {
551 ir = new AluInstruction(op3_cnde, dst[i], m_front_face_reg, bgcol[i], dst[i], {alu_write});
552 emit_instruction(ir);
553 }
554 if (ir)
555 ir->set_flag(alu_last_instr);
556 }
557
558 return true;
559 }
560
561 bool FragmentShaderFromNir::load_interpolated(GPRVector &dest,
562 ShaderInput& io, const Interpolator &ip,
563 int num_components, int start_comp)
564 {
565 // replace io with ShaderInputVarying
566 if (io.interpolate() > 0) {
567
568 sfn_log << SfnLog::io << "Using Interpolator " << io.ij_index() << "\n";
569
570 if (num_components == 1) {
571 switch (start_comp) {
572 case 0: return load_interpolated_one_comp(dest, io, ip, op2_interp_x);
573 case 1: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
574 case 2: return load_interpolated_one_comp(dest, io, ip, op2_interp_z);
575 case 3: return load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_zw, 2, 3);
576 default:
577 assert(0);
578 }
579 }
580
581 if (num_components == 2) {
582 switch (start_comp) {
583 case 0: return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3);
584 case 2: return load_interpolated_two_comp(dest, io, ip, op2_interp_zw, 0xc);
585 case 1: return load_interpolated_one_comp(dest, io, ip, op2_interp_z) &&
586 load_interpolated_two_comp_for_one(dest, io, ip, op2_interp_xy, 0, 1);
587 default:
588 assert(0);
589 }
590 }
591
592 if (num_components == 3 && start_comp == 0)
593 return load_interpolated_two_comp(dest, io, ip, op2_interp_xy, 0x3) &&
594 load_interpolated_one_comp(dest, io, ip, op2_interp_z);
595
596 int full_write_mask = ((1 << num_components) - 1) << start_comp;
597
598 bool success = load_interpolated_two_comp(dest, io, ip, op2_interp_zw, full_write_mask & 0xc);
599 success &= load_interpolated_two_comp(dest, io, ip, op2_interp_xy, full_write_mask & 0x3);
600 return success;
601
602 } else {
603 AluInstruction *ir = nullptr;
604 for (unsigned i = 0; i < 4 ; ++i) {
605 ir = new AluInstruction(op1_interp_load_p0, dest[i],
606 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), i)),
607 EmitInstruction::write);
608 emit_instruction(ir);
609 }
610 ir->set_flag(alu_last_instr);
611 }
612 return true;
613 }
614
615 bool FragmentShaderFromNir::load_interpolated_one_comp(GPRVector &dest,
616 ShaderInput& io, const Interpolator& ip, EAluOp op)
617 {
618 for (unsigned i = 0; i < 2 ; ++i) {
619 int chan = i;
620 if (op == op2_interp_z)
621 chan += 2;
622
623
624 auto ir = new AluInstruction(op, dest[chan], i & 1 ? ip.j : ip.i,
625 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
626 i == 0 ? EmitInstruction::write : EmitInstruction::last);
627 dest.pin_to_channel(chan);
628
629 ir->set_bank_swizzle(alu_vec_210);
630 emit_instruction(ir);
631 }
632 return true;
633 }
634
635 bool FragmentShaderFromNir::load_interpolated_two_comp(GPRVector &dest, ShaderInput& io,
636 const Interpolator& ip, EAluOp op, int writemask)
637 {
638 AluInstruction *ir = nullptr;
639 for (unsigned i = 0; i < 4 ; ++i) {
640 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i, PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
641 (writemask & (1 << i)) ? EmitInstruction::write : EmitInstruction::empty);
642 dest.pin_to_channel(i);
643 ir->set_bank_swizzle(alu_vec_210);
644 emit_instruction(ir);
645 }
646 ir->set_flag(alu_last_instr);
647 return true;
648 }
649
650 bool FragmentShaderFromNir::load_interpolated_two_comp_for_one(GPRVector &dest,
651 ShaderInput& io, const Interpolator& ip,
652 EAluOp op, UNUSED int start, int comp)
653 {
654 AluInstruction *ir = nullptr;
655 for (int i = 0; i < 4 ; ++i) {
656 ir = new AluInstruction(op, dest[i], i & 1 ? ip.j : ip.i,
657 PValue(new InlineConstValue(ALU_SRC_PARAM_BASE + io.lds_pos(), 0)),
658 i == comp ? EmitInstruction::write : EmitInstruction::empty);
659 ir->set_bank_swizzle(alu_vec_210);
660 dest.pin_to_channel(i);
661 emit_instruction(ir);
662 }
663 ir->set_flag(alu_last_instr);
664 return true;
665 }
666
667
668 bool FragmentShaderFromNir::emit_export_pixel(const nir_variable *out_var, nir_intrinsic_instr* instr, bool all_chanels)
669 {
670 int outputs = all_chanels ? m_max_color_exports : 1;
671
672 std::array<uint32_t,4> swizzle;
673 unsigned writemask = nir_intrinsic_write_mask(instr);
674 if (out_var->data.location != FRAG_RESULT_STENCIL) {
675 for (int i = 0; i < 4; ++i) {
676 swizzle[i] = (i < instr->num_components) ? i : 7;
677 }
678 } else {
679 swizzle = {7,0,7,7};
680 }
681
682 GPRVector *value = vec_from_nir_with_fetch_constant(instr->src[1], writemask, swizzle);
683
684 set_output(out_var->data.driver_location, PValue(value));
685
686 if (out_var->data.location == FRAG_RESULT_COLOR ||
687 (out_var->data.location >= FRAG_RESULT_DATA0 &&
688 out_var->data.location <= FRAG_RESULT_DATA7)) {
689 for (int k = 0 ; k < outputs; ++k) {
690
691 unsigned location = out_var->data.driver_location + k - m_depth_exports;
692 if (location >= m_max_color_exports) {
693 sfn_log << SfnLog::io << "Pixel output " << location
694 << " skipped because we have only " << m_max_color_exports << "CBs\n";
695 continue;
696 }
697
698 m_last_pixel_export = new ExportInstruction(location, *value, ExportInstruction::et_pixel);
699
700 if (sh_info().ps_export_highest < location)
701 sh_info().ps_export_highest = location;
702
703 sh_info().nr_ps_color_exports++;
704
705 unsigned mask = (0xfu << (location * 4));
706 sh_info().ps_color_export_mask |= mask;
707
708 emit_export_instruction(m_last_pixel_export);
709 ++m_max_counted_color_exports;
710 };
711 } else if (out_var->data.location == FRAG_RESULT_DEPTH) {
712 m_depth_exports++;
713 emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
714 } else if (out_var->data.location == FRAG_RESULT_STENCIL) {
715 m_depth_exports++;
716 emit_export_instruction(new ExportInstruction(61, *value, ExportInstruction::et_pixel));
717 } else {
718 return false;
719 }
720
721 return true;
722 }
723
724 void FragmentShaderFromNir::do_finalize()
725 {
726 // update shader io info and set LDS etc.
727 sh_info().ninput = m_shaderio.inputs().size();
728
729 sfn_log << SfnLog::io << "Have " << sh_info().ninput << " inputs\n";
730 for (size_t i = 0; i < sh_info().ninput; ++i) {
731 int ij_idx = (m_shaderio.input(i).ij_index() < 6 &&
732 m_shaderio.input(i).ij_index() >= 0) ? m_shaderio.input(i).ij_index() : 0;
733 m_shaderio.input(i).set_ioinfo(sh_info().input[i], m_interpolator[ij_idx].ij_index);
734 }
735
736 sh_info().two_side = m_shaderio.two_sided();
737 sh_info().nlds = m_shaderio.nlds();
738
739 sh_info().nr_ps_max_color_exports = m_max_counted_color_exports;
740
741 if (sh_info().fs_write_all) {
742 sh_info().nr_ps_max_color_exports = m_max_color_exports;
743 }
744
745 if (!m_last_pixel_export) {
746 GPRVector v(0, {7,7,7,7});
747 m_last_pixel_export = new ExportInstruction(0, v, ExportInstruction::et_pixel);
748 sh_info().nr_ps_color_exports++;
749 sh_info().ps_color_export_mask = 0xf;
750 emit_export_instruction(m_last_pixel_export);
751 }
752
753 m_last_pixel_export->set_last();
754
755 if (sh_info().fs_write_all)
756 sh_info().nr_ps_max_color_exports = 8;
757 }
758
759 }