+ case SHADER_OPCODE_SEND:
+ switch (inst->sfid) {
+ case BRW_SFID_SAMPLER: {
+ unsigned msg_type = (inst->desc >> 12) & 0x1f;
+ switch (msg_type) {
+ case GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO:
+ case GEN6_SAMPLER_MESSAGE_SAMPLE_SAMPLEINFO:
+ /* See also SHADER_OPCODE_TXS */
+ latency = 100;
+ break;
+
+ default:
+ /* See also SHADER_OPCODE_TEX */
+ latency = 200;
+ break;
+ }
+ break;
+ }
+
+ case GEN6_SFID_DATAPORT_RENDER_CACHE:
+ switch ((inst->desc >> 14) & 0x1f) {
+ case GEN7_DATAPORT_RC_TYPED_SURFACE_WRITE:
+ case GEN7_DATAPORT_RC_TYPED_SURFACE_READ:
+ /* See also SHADER_OPCODE_TYPED_SURFACE_READ */
+ assert(!is_haswell);
+ latency = 600;
+ break;
+
+ case GEN7_DATAPORT_RC_TYPED_ATOMIC_OP:
+ /* See also SHADER_OPCODE_TYPED_ATOMIC */
+ assert(!is_haswell);
+ latency = 14000;
+ break;
+
+ case GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE:
+ /* completely fabricated number */
+ latency = 600;
+ break;
+
+ default:
+ unreachable("Unknown render cache message");
+ }
+ break;
+
+ case GEN7_SFID_DATAPORT_DATA_CACHE:
+ switch ((inst->desc >> 14) & 0x1f) {
+ case GEN7_DATAPORT_DC_DWORD_SCATTERED_READ:
+ case GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE:
+ case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_READ:
+ case HSW_DATAPORT_DC_PORT0_BYTE_SCATTERED_WRITE:
+ /* We have no data for this but assume it's roughly the same as
+ * untyped surface read/write.
+ */
+ latency = 300;
+ break;
+
+ case GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ:
+ case GEN7_DATAPORT_DC_UNTYPED_SURFACE_WRITE:
+ /* Test code:
+ * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
+ * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
+ * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
+ * send(8) g4<1>UD g112<8,8,1>UD
+ * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
+ * .
+ * . [repeats 8 times]
+ * .
+ * mov(8) g112<1>UD 0x00000000UD { align1 WE_all 1Q };
+ * mov(1) g112.7<1>UD g1.7<0,1,0>UD { align1 WE_all };
+ * mov(8) g113<1>UD 0x00000000UD { align1 WE_normal 1Q };
+ * send(8) g4<1>UD g112<8,8,1>UD
+ * data (38, 6, 5) mlen 2 rlen 1 { align1 WE_normal 1Q };
+ *
+ * Running it 100 times as fragment shader on a 128x128 quad
+ * gives an average latency of 583 cycles per surface read,
+ * standard deviation 0.9%.
+ */
+ assert(!is_haswell);
+ latency = 600;
+ break;
+
+ case GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP:
+ /* Test code:
+ * mov(8) g112<1>ud 0x00000000ud { align1 WE_all 1Q };
+ * mov(1) g112.7<1>ud g1.7<0,1,0>ud { align1 WE_all };
+ * mov(8) g113<1>ud 0x00000000ud { align1 WE_normal 1Q };
+ * send(8) g4<1>ud g112<8,8,1>ud
+ * data (38, 5, 6) mlen 2 rlen 1 { align1 WE_normal 1Q };
+ *
+ * Running it 100 times as fragment shader on a 128x128 quad
+ * gives an average latency of 13867 cycles per atomic op,
+ * standard deviation 3%. Note that this is a rather
+ * pessimistic estimate, the actual latency in cases with few
+ * collisions between threads and favorable pipelining has been
+ * seen to be reduced by a factor of 100.
+ */
+ assert(!is_haswell);
+ latency = 14000;
+ break;
+
+ default:
+ unreachable("Unknown data cache message");
+ }
+ break;
+
+ case HSW_SFID_DATAPORT_DATA_CACHE_1:
+ switch ((inst->desc >> 14) & 0x1f) {
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_READ:
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_SURFACE_WRITE:
+ case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_READ:
+ case HSW_DATAPORT_DC_PORT1_TYPED_SURFACE_WRITE:
+ case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_WRITE:
+ case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_SURFACE_READ:
+ case GEN8_DATAPORT_DC_PORT1_A64_SCATTERED_WRITE:
+ case GEN9_DATAPORT_DC_PORT1_A64_SCATTERED_READ:
+ /* See also GEN7_DATAPORT_DC_UNTYPED_SURFACE_READ */
+ latency = 300;
+ break;
+
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP:
+ case HSW_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_OP_SIMD4X2:
+ case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP_SIMD4X2:
+ case HSW_DATAPORT_DC_PORT1_TYPED_ATOMIC_OP:
+ case GEN9_DATAPORT_DC_PORT1_UNTYPED_ATOMIC_FLOAT_OP:
+ case GEN8_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_OP:
+ case GEN9_DATAPORT_DC_PORT1_A64_UNTYPED_ATOMIC_FLOAT_OP:
+ /* See also GEN7_DATAPORT_DC_UNTYPED_ATOMIC_OP */
+ latency = 14000;
+ break;
+
+ default:
+ unreachable("Unknown data cache message");
+ }
+ break;
+
+ default:
+ unreachable("Unknown SFID");
+ }
+ break;
+