# Decoding variable-length instructions efficiently We can use a method similar to carry look-ahead to make decoding `N` instructions take time `O(log N)`: Using the encoding in: and [[demo]] See following code [on Compiler Explorer](https://gcc.godbolt.org/z/8e3WEn): ```C++ #include #include #include enum Mode { Standard, Compressed, StandardOnceThenCompressed, }; struct SizeAndMode { size_t size; // size in bytes; always even Mode mode; }; struct RangeSummary { // SizeAndMode::mode fields are the new mode after executing // the instructions summarized by this RangeSummary SizeAndMode mode_was_standard; SizeAndMode mode_was_compressed; SizeAndMode mode_was_standard_once_then_compressed; }; // predecodes a single uint16_t, treating it as if it were the first // uint16_t in an instruction RangeSummary predecode_one(uint16_t nth, uint16_t n_plus_1th) { uint16_t po_field = nth >> 10; uint16_t scm_field = nth & 0x1; uint16_t scmt_field = nth >> 15; uint16_t xo_field = n_plus_1th >> 1; SizeAndMode mode_was_standard = { .size = 4, // 32-bit .mode = Standard, }; SizeAndMode mode_was_standard_once_then_compressed = { .size = 4, // 32-bit .mode = Compressed, }; switch(po_field) { case 0: // 32/48-bits // check for bit set by "Support Processor Attention" instruction if((xo_field & 256) == 0) { mode_was_standard.size = 6; // 48-bit mode_was_standard_once_then_compressed.size = 6; } else { // SVP32 or "Support Processor Attention" // leave sizes as 32-bit } break; case 1: // PowerISA v3.1 prefix & SVP64 mode_was_standard.size = 8; // 64-bit mode_was_standard_once_then_compressed.size = 8; break; case 5: mode_was_standard.size = 2; // 16-bit mode_was_standard_once_then_compressed.size = 2; if(scm_field != 0) // swap to compressed mode mode_was_standard.mode = Compressed; break; default: // standard PowerISA instructions // leave sizes as 32-bit break; } SizeAndMode mode_was_compressed = { .size = 2, // 16-bit .mode = Compressed, }; if(scm_field) { // swap to standard mode if(scmt_field) // swap to standard mode temporarily mode_was_compressed.mode = StandardOnceThenCompressed; else mode_was_compressed.mode = Standard; } else mode_was_compressed.mode = Compressed; return RangeSummary{ .mode_was_standard = mode_was_standard, .mode_was_compressed = mode_was_compressed, .mode_was_standard_once_then_compressed = mode_was_standard_once_then_compressed, }; } SizeAndMode propagate_through_range(SizeAndMode in, RangeSummary range_summary) { SizeAndMode picked; switch(in.mode) { case Standard: picked = range_summary.mode_was_standard; break; case Compressed: picked = range_summary.mode_was_compressed; break; case StandardOnceThenCompressed: picked = range_summary.mode_was_standard_once_then_compressed; break; } return SizeAndMode{ .size = in.size + picked.size, .mode = picked.mode, }; } RangeSummary merge(RangeSummary first, RangeSummary second) { SizeAndMode s = propagate_through_range( first.mode_was_standard, second); SizeAndMode c = propagate_through_range( first.mode_was_compressed, second); SizeAndMode sc = propagate_through_range( first.mode_was_standard_once_then_compressed, second); return RangeSummary{ .mode_was_standard = s, .mode_was_compressed = c, .mode_was_standard_once_then_compressed = sc, }; } enum Endian { LittleEndian, BigEndian, }; const size_t INSTRUCTION_MEMORY_SIZE = 0x1000000; // just for demo purposes uint8_t instruction_memory[INSTRUCTION_MEMORY_SIZE]; // get the uint16_t corresponding to the instruction indicated by the PC uint16_t get_u16(Endian endian, size_t pc) { assert((pc & 1) == 0); // pc must be aligned to 2 bytes uint8_t lsb_byte; uint8_t msb_byte; switch(endian) { case LittleEndian: msb_byte = instruction_memory[(pc ^ 2) + 1]; lsb_byte = instruction_memory[pc ^ 2]; break; case BigEndian: msb_byte = instruction_memory[pc]; lsb_byte = instruction_memory[pc + 1]; break; } return (uint16_t)lsb_byte | ((uint16_t)msb_byte << 8); } struct PredecodedInstruction { Mode mode; // mode for this instruction bool is_start; // true if this is the initial uint16_t in an instruction }; // decode width in uint16_t units // can replace with whatever value you like const size_t DECODE_WIDTH = 8; void predecode_all(Endian endian, size_t pc, Mode starting_mode, PredecodedInstruction outputs[DECODE_WIDTH]) { RangeSummary range_summaries[DECODE_WIDTH]; for(size_t i = 0; i < DECODE_WIDTH; i++) { uint16_t nth = get_u16(endian, pc + i * 2); uint16_t n_plus_1th = get_u16(endian, pc + (i + 1) * 2); range_summaries[i] = predecode_one(nth, n_plus_1th); } // use a parallel prefix-sum algorithm, // using `merge()` as the sum's operation. // use a simple recursive algorithm for(size_t step = 1; step < DECODE_WIDTH; step *= 2) { // outer loop loops ceil(log2(DECODE_WIDTH)) times RangeSummary temp[DECODE_WIDTH]; for(size_t i = 0; i < DECODE_WIDTH; i++) { if(i < step) temp[i] = range_summaries[i]; else temp[i] = merge(range_summaries[i - step], range_summaries[i]); } for(size_t i = 0; i < DECODE_WIDTH; i++) { range_summaries[i] = temp[i]; } } for(size_t i = 0; i < DECODE_WIDTH; i++) { SizeAndMode size_and_mode{ .size = 0, .mode = starting_mode, }; if(i > 0) { size_and_mode = propagate_through_range(size_and_mode, range_summaries[i - 1]); } outputs[i] = PredecodedInstruction{ .mode = size_and_mode.mode, .is_start = (size_and_mode.size == pc + i * 2), }; } } ```