Funny story... a single shader was hurt for instructions, spills, fills.
That same shader was also the most helped for cycles. #GPUsAreWeird
No changes on any other Intel platform.
v2: Refactor selection of atomic opcode to a separate function.
Suggested by Jason.
Haswell, Broadwell, and Skylake had similar results. (Skylake shown)
total instructions in shared programs:
14304116 ->
14304261 (<.01%)
instructions in affected programs: 12776 -> 12921 (1.13%)
helped: 19
HURT: 1
helped stats (abs) min: 1 max: 16 x̄: 2.32 x̃: 1
helped stats (rel) min: 0.05% max: 7.27% x̄: 0.92% x̃: 0.55%
HURT stats (abs) min: 189 max: 189 x̄: 189.00 x̃: 189
HURT stats (rel) min: 4.87% max: 4.87% x̄: 4.87% x̃: 4.87%
95% mean confidence interval for instructions value: -12.83 27.33
95% mean confidence interval for instructions %-change: -1.57% 0.31%
Inconclusive result (value mean confidence interval includes 0).
total cycles in shared programs:
527552861 ->
527531226 (<.01%)
cycles in affected programs:
1459195 ->
1437560 (-1.48%)
helped: 16
HURT: 2
helped stats (abs) min: 2 max: 21328 x̄: 1353.69 x̃: 6
helped stats (rel) min: 0.01% max: 5.29% x̄: 0.36% x̃: 0.03%
HURT stats (abs) min: 12 max: 12 x̄: 12.00 x̃: 12
HURT stats (rel) min: 0.03% max: 0.03% x̄: 0.03% x̃: 0.03%
95% mean confidence interval for cycles value: -3699.81 1295.92
95% mean confidence interval for cycles %-change: -0.94% 0.30%
Inconclusive result (value mean confidence interval includes 0).
total spills in shared programs: 8025 -> 8033 (0.10%)
spills in affected programs: 208 -> 216 (3.85%)
helped: 1
HURT: 1
total fills in shared programs: 10989 -> 11040 (0.46%)
fills in affected programs: 444 -> 495 (11.49%)
helped: 1
HURT: 1
Ivy Bridge
total instructions in shared programs:
11709181 ->
11709153 (<.01%)
instructions in affected programs: 3505 -> 3477 (-0.80%)
helped: 3
HURT: 0
helped stats (abs) min: 1 max: 23 x̄: 9.33 x̃: 4
helped stats (rel) min: 0.11% max: 1.16% x̄: 0.63% x̃: 0.61%
total cycles in shared programs:
254741126 ->
254738801 (<.01%)
cycles in affected programs: 919067 -> 916742 (-0.25%)
helped: 3
HURT: 0
helped stats (abs) min: 21 max: 2144 x̄: 775.00 x̃: 160
helped stats (rel) min: 0.03% max: 0.90% x̄: 0.32% x̃: 0.03%
total spills in shared programs: 4536 -> 4533 (-0.07%)
spills in affected programs: 40 -> 37 (-7.50%)
helped: 1
HURT: 0
total fills in shared programs: 4819 -> 4813 (-0.12%)
fills in affected programs: 94 -> 88 (-6.38%)
helped: 1
HURT: 0
Signed-off-by: Ian Romanick <ian.d.romanick@intel.com>
Reviewed-by: Caio Marcelo de Oliveira Filho <caio.oliveira@intel.com> [v1]
Reviewed-by: Jason Ekstrand <jason@jlekstrand.net>
}
}
+static int
+get_op_for_atomic_add(nir_intrinsic_instr *instr, unsigned src)
+{
+ const nir_const_value *const val = nir_src_as_const_value(instr->src[src]);
+
+ if (val != NULL) {
+ if (val->i32[0] == 1)
+ return BRW_AOP_INC;
+ else if (val->i32[0] == -1)
+ return BRW_AOP_DEC;
+ }
+
+ return BRW_AOP_ADD;
+}
+
void
fs_visitor::nir_emit_cs_intrinsic(const fs_builder &bld,
nir_intrinsic_instr *instr)
}
case nir_intrinsic_shared_atomic_add:
- nir_emit_shared_atomic(bld, BRW_AOP_ADD, instr);
+ nir_emit_shared_atomic(bld, get_op_for_atomic_add(instr, 1), instr);
break;
case nir_intrinsic_shared_atomic_imin:
nir_emit_shared_atomic(bld, BRW_AOP_IMIN, instr);
}
case nir_intrinsic_ssbo_atomic_add:
- nir_emit_ssbo_atomic(bld, BRW_AOP_ADD, instr);
+ nir_emit_ssbo_atomic(bld, get_op_for_atomic_add(instr, 2), instr);
break;
case nir_intrinsic_ssbo_atomic_imin:
nir_emit_ssbo_atomic(bld, BRW_AOP_IMIN, instr);
}
fs_reg offset = get_nir_src(instr->src[1]);
- fs_reg data1 = get_nir_src(instr->src[2]);
+ fs_reg data1;
+ if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
+ data1 = get_nir_src(instr->src[2]);
fs_reg data2;
if (op == BRW_AOP_CMPWR)
data2 = get_nir_src(instr->src[3]);
fs_reg surface = brw_imm_ud(GEN7_BTI_SLM);
fs_reg offset;
- fs_reg data1 = get_nir_src(instr->src[1]);
+ fs_reg data1;
+ if (op != BRW_AOP_INC && op != BRW_AOP_DEC && op != BRW_AOP_PREDEC)
+ data1 = get_nir_src(instr->src[1]);
fs_reg data2;
if (op == BRW_AOP_CMPWR)
data2 = get_nir_src(instr->src[2]);