2 Copyright (C) 2018 Alyssa Rosenzweig
3 Copyright (c) 2013 Connor Abbott (connor@abbott.cx)
5 Permission is hereby granted, free of charge, to any person obtaining a copy
6 of this software and associated documentation files (the "Software"), to deal
7 in the Software without restriction, including without limitation the rights
8 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 copies of the Software, and to permit persons to whom the Software is
10 furnished to do so, subject to the following conditions:
12 The above copyright notice and this permission notice shall be included in
13 all copies or substantial portions of the Software.
15 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
30 # Definitions from cwabbott's tools
90 t6xx_alu_size_bits
= {
114 t6xx_dest_override
= {
120 t6xx_load_store_ops
= {
126 "ld_uniform_16": 0xAC,
127 "ld_uniform_32": 0xB0,
130 "ld_color_buffer_8": 0xBA
143 return (tag
>= t6xx_tag
["alu4"]) and (tag
<= t6xx_tag
["alu16"])
151 # Constant types supported, mapping the constant prefix to the Python format
152 # string and the coercion function
161 compact_branch_op
= {
186 with
open(sys
.argv
[1], "r") as f
:
188 space
= ln
.strip().split(" ")
190 instruction
= space
[0]
191 rest
= " ".join(space
[1:])
193 arguments
= [s
.strip() for s
in rest
.split(",")]
194 program
+= [(instruction
, arguments
)]
196 swizzle_component
= {
203 def decode_reg_name(reg_name
):
208 if reg_name
[0] == 'r':
209 ireg
= int(reg_name
[1:])
210 elif reg_name
[0] == 'h':
211 rreg
= int(reg_name
[2:])
213 # Decode half-register into its full register's half
218 # Special case for load/store addresses
221 return (ireg
, half
, upper
)
223 def standard_swizzle_from_parts(swizzle_parts
):
224 swizzle_s
= swizzle_parts
[1] if len(swizzle_parts
) > 1 else "xyzw"
227 for (i
, c
) in enumerate(swizzle_s
):
228 swizzle |
= swizzle_component
[c
] << (2 * i
)
232 def mask_from_parts(mask_parts
, large_mask
):
233 mask_s
= mask_parts
[1] if len(mask_parts
) > 1 else "xyzw"
236 mask
= sum([(3 << (2*swizzle_component
[c
]) if c
in mask_s
else 0) for c
in "xyzw"])
238 mask
= sum([(1 << swizzle_component
[c
] if c
in mask_s
else 0) for c
in "xyzw"])
240 return (mask
, mask_s
)
244 # Not actually a register, instead an immediate float
245 return (True, struct
.unpack("H", struct
.pack("e", float(reg
[1:])))[0], 0, 0, 0, 0)
247 # Function call syntax used in abs() modifier
251 swizzle_parts
= reg
.split(".")
253 reg_name
= swizzle_parts
[0]
257 if reg_name
[0] == '-':
259 reg_name
= reg_name
[1:]
261 if reg_name
[0] == 'a':
263 reg_name
= reg_name
[len("abs("):]
265 (ireg
, half
, upper
) = decode_reg_name(reg_name
)
267 return (False, ireg
, standard_swizzle_from_parts(swizzle_parts
), half
, upper
, modifiers
)
269 def decode_masked_reg(reg
, large_mask
):
270 mask_parts
= reg
.split(".")
272 reg_name
= mask_parts
[0]
273 (ireg
, half
, upper
) = decode_reg_name(reg_name
)
274 (mask
, mask_s
) = mask_from_parts(mask_parts
, large_mask
)
276 component
= max([0] + [swizzle_component
[c
] for c
in "xyzw" if c
in mask_s
])
278 return (ireg
, mask
, component
, half
, upper
)
280 # TODO: Fill these in XXX
282 # Texture pipeline registers in r28-r29
285 def decode_texture_reg_number(reg
):
286 r
= reg
.split(".")[0]
289 return (True, int(r
[1:]) - TEXTURE_BASE
, 0)
292 return (False, (no
>> 1) - TEXTURE_BASE
, no
& 1)
294 def decode_texture_reg(reg
):
295 (full
, select
, upper
) = decode_texture_reg_number(reg
)
297 # Swizzle mandatory for texture registers, afaict
298 swizzle
= reg
.split(".")[1]
299 swizzleL
= swizzle_component
[swizzle
[0]]
300 swizzleR
= swizzle_component
[swizzle
[1]]
302 return (full
, select
, upper
, swizzleR
, swizzleL
)
304 def decode_texture_out_reg(reg
):
305 (full
, select
, upper
) = decode_texture_reg_number(reg
)
306 (mask
, _
) = mask_from_parts(reg
.split("."), False)
308 return (full
, select
, upper
, mask
)
310 instruction_stream
= []
316 family
= ins_mod
= ins
.split(".")[0]
317 ins_op
= (ins
+ ".").split(".")[1]
319 ins_outmod
= (ins
+ "." + ".").split(".")[2]
322 out_mod
= t6xx_outmod
[ins_outmod
]
326 if ins
in t6xx_load_store_ops
:
327 op
= t6xx_load_store_ops
[ins
]
328 (reg
, mask
, component
, half
, upper
) = decode_masked_reg(p
[1][0], False)
329 (immediate
, address
, swizzle
, half
, upper
, modifiers
) = decode_reg(p
[1][1])
330 unknown
= int(p
[1][2], 16)
331 b
= (op
<< 0) |
(reg
<< 8) |
(mask
<< 13) |
(swizzle
<< 17) |
(unknown
<< 25) |
(address
<< 51)
332 instruction_stream
+= [(LDST
, b
)]
333 elif ins_op
in t6xx_alu_ops
:
334 op
= t6xx_alu_ops
[ins_op
]
336 (reg_out
, mask
, out_component
, half0
, upper0
) = decode_masked_reg(p
[1][0], True)
337 (_
, reg_in1
, swizzle1
, half1
, upper1
, mod1
) = decode_reg(p
[1][1])
338 (immediate
, reg_in2
, swizzle2
, half2
, upper2
, mod2
) = decode_reg(p
[1][2])
341 register_word
= (reg_in1
<< 0) |
((reg_in2
>> 11) << 5) |
(reg_out
<< 10) |
(1 << 15)
343 register_word
= (reg_in1
<< 0) |
(reg_in2
<< 5) |
(reg_out
<< 10)
345 if ins_mod
in ["vadd", "vmul", "lut"]:
346 io_mode
= t6xx_reg_mode
["half" if half0
else "full"]
350 output_override
= 2 # NORMAL, TODO
353 if (ins_outmod
== "quarter"):
354 io_mode
= t6xx_reg_mode
["quarter"]
357 # TODO: half actually
365 (_
, halfmask
, _
, _
, _
) = decode_masked_reg(p
[1][0], False)
372 # Inline constant: lower 11 bits
374 i2block
= ((reg_in2
& 0xFF) << 3) |
((reg_in2
>> 8) & 0x7)
377 # TODO: replicate input 2 if half
380 # TODO: half selection
381 i2block
= upper2 |
(half2
<< 2)
383 i2block |
= swizzle2
<< 3
385 # Extra modifier for some special cased stuff
387 special
= ins
.split(".")[3]
390 output_override
= 0 # low
391 elif special
== "fulllow":
392 # TODO: Not really a special case, just a bug?
393 io_mode
= t6xx_reg_mode
["full"]
394 output_override
= 0 #low
399 instruction_word
= (op
<< 0) |
(io_mode
<< 8) |
(mod1
<< 10) |
(repsel
<< 12) |
(i1half
<< 14) |
(swizzle1
<< 15) |
(mod2
<< 23) |
(i2block
<< 25) |
(output_override
<< 36) |
(out_mod
<< 38) |
(wr_mask
<< 40)
400 elif ins_mod
in ["sadd", "smul"]:
401 # TODO: What are these?
408 i1comp_block
= swizzle1 |
(upper1
<< 2)
410 i1comp_block
= swizzle1
<< 1
415 # Inline constant is splattered in a... bizarre way
417 i2block
= (((reg_in2
>> 9) & 3) << 0) |
(((reg_in2
>> 8) & 1) << 2) |
(((reg_in2
>> 5) & 7) << 3) |
(((reg_in2
>> 0) & 15) << 6)
419 # TODO: half register
420 swizzle2
= (swizzle2
<< 1) & 0x1F
421 i2block
= (mod2
<< 0) |
((not half2
) << 2) |
(swizzle2
<< 3) |
(unknown2
<< 5)
426 outcomp_block
= out_component
<< 1
428 # TODO: half register
431 instruction_word
= (op
<< 0) |
(mod1
<< 8) |
((not half1
) << 10) |
(i1comp_block
<< 11) |
(i2block
<< 14) |
(unknown3
<< 25) |
(out_mod
<< 26) |
((not half0
) << 28) |
(outcomp_block
) << 29
434 instruction_word
= op
436 instruction_stream
+= [(ALU
, ins_mod
, register_word
, instruction_word
)]
437 elif family
== "texture":
438 # Texture ops use long series of modifiers to describe their needed
439 # capabilities, seperated by dots. Decode them here
440 parts
= ins
.split(".")
442 # First few modifiers are fixed, like an instruction name
446 # The remaining are variable, but strictly ordered
449 op
= texture_op
[tex_op
]
451 # Some bits are defined directly in the modifier list
452 shadow
= "shadow" in parts
453 cont
= "cont" in parts
454 last
= "last" in parts
455 has_filter
= "raw" not in parts
457 # The remaining need order preserved since they have their own arguments
458 argument_parts
= [part
for part
in parts
if part
not in ["shadow", "cont", "last", "raw"]]
462 for argument
, part
in zip(argument_parts
, arguments
[4:]):
463 if argument
== "bias":
464 bias_lod
= int(float(part
) * 256)
466 print("Unknown argument: " + str(argument
))
468 fmt
= texture_fmt
[tex_fmt
]
472 magic2
= 2 # Where did this even come from?!
474 texture_handle
= int(arguments
[1][len("texture"):])
476 sampler_parts
= arguments
[2].split(".")
477 sampler_handle
= int(sampler_parts
[0][len("sampler"):])
478 swizzle0
= standard_swizzle_from_parts(sampler_parts
)
480 (full0
, select0
, upper0
, mask0
) = decode_texture_out_reg(arguments
[0])
481 (full1
, select1
, upper1
, swizzleR1
, swizzleL1
) = decode_texture_reg(arguments
[3])
483 tex
= (op
<< 0) |
(shadow
<< 6) |
(cont
<< 8) |
(last
<< 9) |
(fmt
<< 10) |
(has_offset
<< 15) |
(has_filter
<< 16) |
(select1
<< 17) |
(upper1
<< 18) |
(swizzleL1
<< 19) |
(swizzleR1
<< 21) |
(0 << 23) |
(magic2
<< 25) |
(full0
<< 29) |
(magic1
<< 30) |
(select0
<< 32) |
(upper0
<< 33) |
(mask0
<< 34) |
(swizzle0
<< 40) |
(bias_lod
<< 72) |
(texture_handle
<< 88) |
(sampler_handle
<< 104)
485 instruction_stream
+= [(TEXTURE
, tex
)]
487 cond
= ins
.split(".")[2]
488 condition
= branch_condition
[cond
]
489 bop
= compact_branch_op
[ins_op
]
491 offset
= int(arguments
[0].split("->")[0])
493 # 2's complement and chill
495 offset
= (1 << 7) - abs(offset
)
497 # Find where we're going
498 dest_tag
= int(arguments
[0].split("->")[1])
500 br
= (bop
<< 0) |
(dest_tag
<< 3) |
(offset
<< 7) |
(condition
<< 14)
502 # TODO: Unconditional branch encoding
504 instruction_stream
+= [(ALU
, "br", None, br
)]
505 elif ins
[1:] == "constants":
506 if ins
[0] not in constant_types
:
507 print("Unknown constant type " + str(constant_type
))
510 (fmt
, cast
) = constant_types
[ins
[0]]
512 encoded
= [struct
.pack(fmt
, cast(f
)) for f
in p
[1]]
518 # consts must be exactly 4 quadwords, so pad with zeroes if necessary
519 consts
+= bytes(4*4 - len(consts
))
521 instruction_stream
+= [(ALU
, "constants", consts
)]
523 # Emit from instruction stream
526 while index
< len(instruction_stream
):
527 output_stream
= bytearray()
528 ins
= instruction_stream
[index
]
531 can_prefetch
= index
+ 1 < len(instruction_stream
)
535 succeeding
= instruction_stream
[index
+ 1] if can_prefetch
else None
539 if succeeding
and succeeding
[0] == LDST
:
540 partb
= succeeding
[1]
544 parta
= t6xx_load_store_ops
["ld_st_noop"]
546 tag8
= t6xx_tag
["load_store"]
548 ins
= (partb
<< 68) |
(parta
<< 8) | tag8
549 output_stream
+= (ins
.to_bytes(16, "little"))
551 tag8
= t6xx_tag
["texture"]
552 ins
= (ins
[1] << 8) | tag8
554 output_stream
+= (ins
.to_bytes(16, "little"))
556 # TODO: Combining ALU ops
558 emit_size
= 4 # 32-bit tag always emitted
561 register_words
= bytearray()
562 body_words
= bytearray()
563 constant_words
= None
567 # Iterate through while there are ALU tags in strictly ascending order
568 while index
< len(instruction_stream
) and instruction_stream
[index
][0] == ALU
and t6xx_alu_bits
[instruction_stream
[index
][1]] > last_alu_bit
:
569 ins
= instruction_stream
[index
]
571 bit
= t6xx_alu_bits
[ins
[1]]
574 if ins
[1] == "constants":
575 constant_words
= ins
[2]
577 # Flag for the used part of the GPU
580 # 16-bit register word, if present
581 if ins
[2] is not None:
582 register_words
+= (ins
[2].to_bytes(2, "little"))
585 size
= int(t6xx_alu_size_bits
[ins
[1]] / 8)
586 body_words
+= (ins
[3].to_bytes(size
, "little"))
591 index
-= 1 # fix off by one, from later loop increment
593 # Pad to nearest multiple of 4 words
594 padding
= (16 - (emit_size
& 15)) if (emit_size
& 15) else 0
597 # emit_size includes constants
599 emit_size
+= len(constant_words
)
601 # Calculate tag given size
602 words
= emit_size
>> 2
603 tag |
= t6xx_tag
["alu" + str(words
)]
605 # Actually emit, now that we can
606 output_stream
+= tag
.to_bytes(4, "little")
607 output_stream
+= register_words
608 output_stream
+= body_words
609 output_stream
+= bytes(padding
)
612 output_stream
+= constant_words
614 instructions
+= [output_stream
]
617 # Assmebly over; just emit tags at this point
620 for (idx
, ins
) in enumerate(instructions
):
621 # Instruction prefetch
624 if idx
+ 1 < len(instructions
):
625 tag
= instructions
[idx
+ 1][0] & 0xF
627 # Check for ALU special case
629 if is_tag_alu(tag
) and idx
+ 2 == len(instructions
):
632 # Instruction stream over
640 pprint
.pprint(program
)
642 with
open(sys
.argv
[2], "wb") as f
: