radv/ac: setup mrt exports then export them in one go. (v2)
authorDave Airlie <airlied@redhat.com>
Sun, 23 Apr 2017 18:52:34 +0000 (19:52 +0100)
committerDave Airlie <airlied@redhat.com>
Tue, 25 Apr 2017 22:26:11 +0000 (23:26 +0100)
commit7f77554b5b224217ec1b3ebbf0fab0913c42e269
tree7f78f1d8ef2672a8de46dc689f37d8ab0f73023c
parentb2cedb3ea9482d3099506069204a2d226ccd54fc
radv/ac: setup mrt exports then export them in one go. (v2)

Noticed while looking at Sascha Willems deferred shaders.

This is a bit of an llvm workaround, llvm was producing this:
        v_cvt_pkrtz_f16_f32_e64 v4, v7, v8                       ; D2960004 00021107
        v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0                      ; D2960006 0001E509
        s_waitcnt vmcnt(0)                                       ; BF8C0F70
        exp mrt0 v4, v4, v6, v6 compr                            ; C400040F 00000604
        s_waitcnt expcnt(0)                                      ; BF8C0F0F
        v_cvt_pkrtz_f16_f32_e64 v4, v12, v5                      ; D2960004 00020B0C
        v_cvt_pkrtz_f16_f32_e64 v5, v14, 1.0                     ; D2960005 0001E50E
        exp mrt1 v4, v4, v5, v5 compr                            ; C400041F 00000504
        s_waitcnt expcnt(0)                                      ; BF8C0F0F
        v_cvt_pkrtz_f16_f32_e64 v0, v0, v1                       ; D2960000 00020300
        v_cvt_pkrtz_f16_f32_e64 v1, v2, v3                       ; D2960001 00020702
        exp mrt2 v0, v0, v1, v1 done compr vm                    ; C4001C2F 00000100

After this change:
        v_cvt_pkrtz_f16_f32_e64 v4, v7, v8                       ; D2960004 00021107
        s_waitcnt vmcnt(0)                                       ; BF8C0F70
        v_cvt_pkrtz_f16_f32_e64 v0, v0, v1                       ; D2960000 00020300
        v_cvt_pkrtz_f16_f32_e64 v6, v9, 1.0                      ; D2960006 0001E509
        v_cvt_pkrtz_f16_f32_e64 v5, v12, v5                      ; D2960005 00020B0C
        v_cvt_pkrtz_f16_f32_e64 v7, v14, 1.0                     ; D2960007 0001E50E
        exp mrt0 v4, v4, v6, v6 compr                            ; C400040F 00000604
        v_cvt_pkrtz_f16_f32_e64 v1, v2, v3                       ; D2960001 00020702
        exp mrt1 v5, v5, v7, v7 compr                            ; C400041F 00000705
        exp mrt2 v0, v0, v1, v1 done compr vm                    ; C4001C2F 00000100

No waitcnt for exports are emitted.

v2: fixup index->mrt mapping (Bas).

Reviewed-by: Bas Nieuwenhuizen <bas@basnieuwenhuizen.nl>
Signed-off-by: Dave Airlie <airlied@redhat.com>
src/amd/common/ac_nir_to_llvm.c