#! /usr/bin/env perl # This file is dual-licensed, meaning that you can use it under your # choice of either of the following two licenses: # # Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved. # # Licensed under the Apache License 2.0 (the "License"). You can obtain # a copy in the file LICENSE in the source distribution or at # https://www.openssl.org/source/license.html # # or # # Copyright (c) 2022, Hongren (Zenithal) Zheng # All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions # are met: # 1. Redistributions of source code must retain the above copyright # notice, this list of conditions and the following disclaimer. # 2. Redistributions in binary form must reproduce the above copyright # notice, this list of conditions and the following disclaimer in the # documentation and/or other materials provided with the distribution. # # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS # "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT # LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR # A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT # OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, # SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT # LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, # DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY # THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. # $output is the last argument if it looks like a file (it has an extension) # $flavour is the first argument if it doesn't look like a file $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; $output and open STDOUT,">$output"; ################################################################################ # Utility functions to help with keeping track of which registers to stack/ # unstack when entering / exiting routines. ################################################################################ { # Callee-saved registers my @callee_saved = map("x$_",(2,8,9,18..27)); # Caller-saved registers my @caller_saved = map("x$_",(1,5..7,10..17,28..31)); my @must_save; sub use_reg { my $reg = shift; if (grep(/^$reg$/, @callee_saved)) { push(@must_save, $reg); } elsif (!grep(/^$reg$/, @caller_saved)) { # Register is not usable! die("Unusable register ".$reg); } return $reg; } sub use_regs { return map(use_reg("x$_"), @_); } sub save_regs { my $ret = ''; my $stack_reservation = ($#must_save + 1) * 8; my $stack_offset = $stack_reservation; if ($stack_reservation % 16) { $stack_reservation += 8; } $ret.=" addi sp,sp,-$stack_reservation\n"; foreach (@must_save) { $stack_offset -= 8; $ret.=" sw $_,$stack_offset(sp)\n"; } return $ret; } sub load_regs { my $ret = ''; my $stack_reservation = ($#must_save + 1) * 8; my $stack_offset = $stack_reservation; if ($stack_reservation % 16) { $stack_reservation += 8; } foreach (@must_save) { $stack_offset -= 8; $ret.=" lw $_,$stack_offset(sp)\n"; } $ret.=" addi sp,sp,$stack_reservation\n"; return $ret; } sub clear_regs { @must_save = (); } } ################################################################################ # util for encoding scalar crypto extension instructions ################################################################################ my @regs = map("x$_",(0..31)); my %reglookup; @reglookup{@regs} = @regs; # Takes a register name, possibly an alias, and converts it to a register index # from 0 to 31 sub read_reg { my $reg = lc shift; if (!exists($reglookup{$reg})) { die("Unknown register ".$reg); } my $regstr = $reglookup{$reg}; if (!($regstr =~ /^x([0-9]+)$/)) { die("Could not process register ".$reg); } return $1; } sub aes32dsi { # Encoding for aes32dsi rd, rs1, rs2, bs instruction on RV32 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX my $template = 0b00_10101_00000_00000_000_00000_0110011; my $rd = read_reg shift; my $rs1 = read_reg shift; my $rs2 = read_reg shift; my $bs = shift; return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); } sub aes32dsmi { # Encoding for aes32dsmi rd, rs1, rs2, bs instruction on RV32 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX my $template = 0b00_10111_00000_00000_000_00000_0110011; my $rd = read_reg shift; my $rs1 = read_reg shift; my $rs2 = read_reg shift; my $bs = shift; return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); } sub aes32esi { # Encoding for aes32esi rd, rs1, rs2, bs instruction on RV32 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX my $template = 0b00_10001_00000_00000_000_00000_0110011; my $rd = read_reg shift; my $rs1 = read_reg shift; my $rs2 = read_reg shift; my $bs = shift; return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); } sub aes32esmi { # Encoding for aes32esmi rd, rs1, rs2, bs instruction on RV32 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX my $template = 0b00_10011_00000_00000_000_00000_0110011; my $rd = read_reg shift; my $rs1 = read_reg shift; my $rs2 = read_reg shift; my $bs = shift; return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); } sub rori { # Encoding for ror rd, rs1, imm instruction on RV64 # XXXXXXX_shamt_ rs1 _XXX_ rd _XXXXXXX my $template = 0b0110000_00000_00000_101_00000_0010011; my $rd = read_reg shift; my $rs1 = read_reg shift; my $shamt = shift; return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7)); } ################################################################################ # Register assignment for rv32i_zkne_encrypt and rv32i_zknd_decrypt ################################################################################ # Registers initially to hold AES state (called s0-s3 or y0-y3 elsewhere) my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9); # Function arguments (x10-x12 are a0-a2 in the ABI) # Input block pointer, output block pointer, key pointer my ($INP,$OUTP,$KEYP) = use_regs(10..12); # Registers initially to hold Key my ($T0,$T1,$T2,$T3) = use_regs(13..16); # Loop counter my ($loopcntr) = use_regs(30); ################################################################################ # Utility for rv32i_zkne_encrypt and rv32i_zknd_decrypt ################################################################################ # outer product of whole state into one column of key sub outer { my $inst = shift; my $key = shift; # state 0 to 3 my $s0 = shift; my $s1 = shift; my $s2 = shift; my $s3 = shift; my $ret = ''; $ret .= <<___; @{[$inst->($key,$key,$s0,0)]} @{[$inst->($key,$key,$s1,1)]} @{[$inst->($key,$key,$s2,2)]} @{[$inst->($key,$key,$s3,3)]} ___ return $ret; } sub aes32esmi4 { return outer(\&aes32esmi, @_) } sub aes32esi4 { return outer(\&aes32esi, @_) } sub aes32dsmi4 { return outer(\&aes32dsmi, @_) } sub aes32dsi4 { return outer(\&aes32dsi, @_) } ################################################################################ # void rv32i_zkne_encrypt(const unsigned char *in, unsigned char *out, # const AES_KEY *key); ################################################################################ my $code .= <<___; .text .balign 16 .globl rv32i_zkne_encrypt .type rv32i_zkne_encrypt,\@function rv32i_zkne_encrypt: ___ $code .= save_regs(); $code .= <<___; # Load input to block cipher lw $Q0,0($INP) lw $Q1,4($INP) lw $Q2,8($INP) lw $Q3,12($INP) # Load key lw $T0,0($KEYP) lw $T1,4($KEYP) lw $T2,8($KEYP) lw $T3,12($KEYP) # Load number of rounds lw $loopcntr,240($KEYP) # initial transformation xor $Q0,$Q0,$T0 xor $Q1,$Q1,$T1 xor $Q2,$Q2,$T2 xor $Q3,$Q3,$T3 # The main loop only executes the first N-2 rounds, each loop consumes two rounds add $loopcntr,$loopcntr,-2 srli $loopcntr,$loopcntr,1 1: # Grab next key in schedule add $KEYP,$KEYP,16 lw $T0,0($KEYP) lw $T1,4($KEYP) lw $T2,8($KEYP) lw $T3,12($KEYP) @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]} @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]} @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]} @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]} # now T0~T3 hold the new state # Grab next key in schedule add $KEYP,$KEYP,16 lw $Q0,0($KEYP) lw $Q1,4($KEYP) lw $Q2,8($KEYP) lw $Q3,12($KEYP) @{[aes32esmi4 $Q0,$T0,$T1,$T2,$T3]} @{[aes32esmi4 $Q1,$T1,$T2,$T3,$T0]} @{[aes32esmi4 $Q2,$T2,$T3,$T0,$T1]} @{[aes32esmi4 $Q3,$T3,$T0,$T1,$T2]} # now Q0~Q3 hold the new state add $loopcntr,$loopcntr,-1 bgtz $loopcntr,1b # final two rounds # Grab next key in schedule add $KEYP,$KEYP,16 lw $T0,0($KEYP) lw $T1,4($KEYP) lw $T2,8($KEYP) lw $T3,12($KEYP) @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]} @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]} @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]} @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]} # now T0~T3 hold the new state # Grab next key in schedule add $KEYP,$KEYP,16 lw $Q0,0($KEYP) lw $Q1,4($KEYP) lw $Q2,8($KEYP) lw $Q3,12($KEYP) # no mix column now @{[aes32esi4 $Q0,$T0,$T1,$T2,$T3]} @{[aes32esi4 $Q1,$T1,$T2,$T3,$T0]} @{[aes32esi4 $Q2,$T2,$T3,$T0,$T1]} @{[aes32esi4 $Q3,$T3,$T0,$T1,$T2]} # now Q0~Q3 hold the new state sw $Q0,0($OUTP) sw $Q1,4($OUTP) sw $Q2,8($OUTP) sw $Q3,12($OUTP) # Pop registers and return ___ $code .= load_regs(); $code .= <<___; ret ___ ################################################################################ # void rv32i_zknd_decrypt(const unsigned char *in, unsigned char *out, # const AES_KEY *key); ################################################################################ $code .= <<___; .text .balign 16 .globl rv32i_zknd_decrypt .type rv32i_zknd_decrypt,\@function rv32i_zknd_decrypt: ___ $code .= save_regs(); $code .= <<___; # Load input to block cipher lw $Q0,0($INP) lw $Q1,4($INP) lw $Q2,8($INP) lw $Q3,12($INP) # Load number of rounds lw $loopcntr,240($KEYP) # Load the last key # use T0 as temporary now slli $T0,$loopcntr,4 add $KEYP,$KEYP,$T0 # Load key lw $T0,0($KEYP) lw $T1,4($KEYP) lw $T2,8($KEYP) lw $T3,12($KEYP) # initial transformation xor $Q0,$Q0,$T0 xor $Q1,$Q1,$T1 xor $Q2,$Q2,$T2 xor $Q3,$Q3,$T3 # The main loop only executes the first N-2 rounds, each loop consumes two rounds add $loopcntr,$loopcntr,-2 srli $loopcntr,$loopcntr,1 1: # Grab next key in schedule add $KEYP,$KEYP,-16 lw $T0,0($KEYP) lw $T1,4($KEYP) lw $T2,8($KEYP) lw $T3,12($KEYP) @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]} @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]} @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]} @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]} # now T0~T3 hold the new state # Grab next key in schedule add $KEYP,$KEYP,-16 lw $Q0,0($KEYP) lw $Q1,4($KEYP) lw $Q2,8($KEYP) lw $Q3,12($KEYP) @{[aes32dsmi4 $Q0,$T0,$T3,$T2,$T1]} @{[aes32dsmi4 $Q1,$T1,$T0,$T3,$T2]} @{[aes32dsmi4 $Q2,$T2,$T1,$T0,$T3]} @{[aes32dsmi4 $Q3,$T3,$T2,$T1,$T0]} # now Q0~Q3 hold the new state add $loopcntr,$loopcntr,-1 bgtz $loopcntr,1b # final two rounds # Grab next key in schedule add $KEYP,$KEYP,-16 lw $T0,0($KEYP) lw $T1,4($KEYP) lw $T2,8($KEYP) lw $T3,12($KEYP) @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]} @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]} @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]} @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]} # now T0~T3 hold the new state # Grab next key in schedule add $KEYP,$KEYP,-16 lw $Q0,0($KEYP) lw $Q1,4($KEYP) lw $Q2,8($KEYP) lw $Q3,12($KEYP) # no mix column now @{[aes32dsi4 $Q0,$T0,$T3,$T2,$T1]} @{[aes32dsi4 $Q1,$T1,$T0,$T3,$T2]} @{[aes32dsi4 $Q2,$T2,$T1,$T0,$T3]} @{[aes32dsi4 $Q3,$T3,$T2,$T1,$T0]} # now Q0~Q3 hold the new state sw $Q0,0($OUTP) sw $Q1,4($OUTP) sw $Q2,8($OUTP) sw $Q3,12($OUTP) # Pop registers and return ___ $code .= load_regs(); $code .= <<___; ret ___ clear_regs(); ################################################################################ # Register assignment for rv32i_zkn[e/d]_set_[en/de]crypt ################################################################################ # Function arguments (x10-x12 are a0-a2 in the ABI) # Pointer to user key, number of bits in key, key pointer my ($UKEY,$BITS,$KEYP) = use_regs(10..12); # Temporaries my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8) = use_regs(13..17,28..31); ################################################################################ # utility functions for rv32i_zkne_set_encrypt_key ################################################################################ my @rcon = (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36); # do 4 sbox on 4 bytes of rs, (possibly mix), then xor with rd sub sbox4 { my $inst = shift; my $rd = shift; my $rs = shift; my $ret = <<___; @{[$inst->($rd,$rd,$rs,0)]} @{[$inst->($rd,$rd,$rs,1)]} @{[$inst->($rd,$rd,$rs,2)]} @{[$inst->($rd,$rd,$rs,3)]} ___ return $ret; } sub fwdsbox4 { return sbox4(\&aes32esi, @_); } sub ke128enc { my $zbkb = shift; my $rnum = 0; my $ret = ''; $ret .= <<___; lw $T0,0($UKEY) lw $T1,4($UKEY) lw $T2,8($UKEY) lw $T3,12($UKEY) sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) ___ while($rnum < 10) { $ret .= <<___; # use T4 to store rcon li $T4,$rcon[$rnum] # as xor is associative and commutative # we fist xor T0 with RCON, then use T0 to # xor the result of each SBOX result of T3 xor $T0,$T0,$T4 # use T4 to store rotated T3 ___ # right rotate by 8 if ($zbkb) { $ret .= <<___; @{[rori $T4,$T3,8]} ___ } else { $ret .= <<___; srli $T4,$T3,8 slli $T5,$T3,24 or $T4,$T4,$T5 ___ } $ret .= <<___; # update T0 @{[fwdsbox4 $T0,$T4]} # update new T1~T3 xor $T1,$T1,$T0 xor $T2,$T2,$T1 xor $T3,$T3,$T2 add $KEYP,$KEYP,16 sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) ___ $rnum++; } return $ret; } sub ke192enc { my $zbkb = shift; my $rnum = 0; my $ret = ''; $ret .= <<___; lw $T0,0($UKEY) lw $T1,4($UKEY) lw $T2,8($UKEY) lw $T3,12($UKEY) lw $T4,16($UKEY) lw $T5,20($UKEY) sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) sw $T4,16($KEYP) sw $T5,20($KEYP) ___ while($rnum < 8) { $ret .= <<___; # see the comment in ke128enc li $T6,$rcon[$rnum] xor $T0,$T0,$T6 ___ # right rotate by 8 if ($zbkb) { $ret .= <<___; @{[rori $T6,$T5,8]} ___ } else { $ret .= <<___; srli $T6,$T5,8 slli $T7,$T5,24 or $T6,$T6,$T7 ___ } $ret .= <<___; @{[fwdsbox4 $T0,$T6]} xor $T1,$T1,$T0 xor $T2,$T2,$T1 xor $T3,$T3,$T2 ___ if ($rnum != 7) { # note that (8+1)*24 = 216, (12+1)*16 = 208 # thus the last 8 bytes can be dropped $ret .= <<___; xor $T4,$T4,$T3 xor $T5,$T5,$T4 ___ } $ret .= <<___; add $KEYP,$KEYP,24 sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) ___ if ($rnum != 7) { $ret .= <<___; sw $T4,16($KEYP) sw $T5,20($KEYP) ___ } $rnum++; } return $ret; } sub ke256enc { my $zbkb = shift; my $rnum = 0; my $ret = ''; $ret .= <<___; lw $T0,0($UKEY) lw $T1,4($UKEY) lw $T2,8($UKEY) lw $T3,12($UKEY) lw $T4,16($UKEY) lw $T5,20($UKEY) lw $T6,24($UKEY) lw $T7,28($UKEY) sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) sw $T4,16($KEYP) sw $T5,20($KEYP) sw $T6,24($KEYP) sw $T7,28($KEYP) ___ while($rnum < 7) { $ret .= <<___; # see the comment in ke128enc li $T8,$rcon[$rnum] xor $T0,$T0,$T8 ___ # right rotate by 8 if ($zbkb) { $ret .= <<___; @{[rori $T8,$T7,8]} ___ } else { $ret .= <<___; srli $T8,$T7,8 slli $BITS,$T7,24 or $T8,$T8,$BITS ___ } $ret .= <<___; @{[fwdsbox4 $T0,$T8]} xor $T1,$T1,$T0 xor $T2,$T2,$T1 xor $T3,$T3,$T2 add $KEYP,$KEYP,32 sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) ___ if ($rnum != 6) { # note that (7+1)*32 = 256, (14+1)*16 = 240 # thus the last 16 bytes can be dropped $ret .= <<___; # for aes256, T3->T4 needs 4sbox but no rotate/rcon @{[fwdsbox4 $T4,$T3]} xor $T5,$T5,$T4 xor $T6,$T6,$T5 xor $T7,$T7,$T6 sw $T4,16($KEYP) sw $T5,20($KEYP) sw $T6,24($KEYP) sw $T7,28($KEYP) ___ } $rnum++; } return $ret; } ################################################################################ # void rv32i_zkne_set_encrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) ################################################################################ sub AES_set_common { my ($ke128, $ke192, $ke256) = @_; my $ret = ''; $ret .= <<___; bnez $UKEY,1f # if (!userKey || !key) return -1; bnez $KEYP,1f li a0,-1 ret 1: # Determine number of rounds from key size in bits li $T0,128 bne $BITS,$T0,1f li $T1,10 # key->rounds = 10 if bits == 128 sw $T1,240($KEYP) # store key->rounds $ke128 j 4f 1: li $T0,192 bne $BITS,$T0,2f li $T1,12 # key->rounds = 12 if bits == 192 sw $T1,240($KEYP) # store key->rounds $ke192 j 4f 2: li $T1,14 # key->rounds = 14 if bits == 256 li $T0,256 beq $BITS,$T0,3f li a0,-2 # If bits != 128, 192, or 256, return -2 j 5f 3: sw $T1,240($KEYP) # store key->rounds $ke256 4: # return 0 li a0,0 5: # return a0 ___ return $ret; } $code .= <<___; .text .balign 16 .globl rv32i_zkne_set_encrypt_key .type rv32i_zkne_set_encrypt_key,\@function rv32i_zkne_set_encrypt_key: ___ $code .= save_regs(); $code .= AES_set_common(ke128enc(0), ke192enc(0),ke256enc(0)); $code .= load_regs(); $code .= <<___; ret ___ ################################################################################ # void rv32i_zbkb_zkne_set_encrypt_key(const unsigned char *userKey, # const int bits, AES_KEY *key) ################################################################################ $code .= <<___; .text .balign 16 .globl rv32i_zbkb_zkne_set_encrypt_key .type rv32i_zbkb_zkne_set_encrypt_key,\@function rv32i_zbkb_zkne_set_encrypt_key: ___ $code .= save_regs(); $code .= AES_set_common(ke128enc(1), ke192enc(1),ke256enc(1)); $code .= load_regs(); $code .= <<___; ret ___ ################################################################################ # utility functions for rv32i_zknd_zkne_set_decrypt_key ################################################################################ sub invm4 { # fwd sbox then inv sbox then mix column # the result is only mix column # this simulates aes64im T0 my $rd = shift; my $tmp = shift; my $rs = shift; my $ret = <<___; li $tmp,0 li $rd,0 @{[fwdsbox4 $tmp,$rs]} @{[sbox4(\&aes32dsmi, $rd,$tmp)]} ___ return $ret; } sub ke128dec { my $zbkb = shift; my $rnum = 0; my $ret = ''; $ret .= <<___; lw $T0,0($UKEY) lw $T1,4($UKEY) lw $T2,8($UKEY) lw $T3,12($UKEY) sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) ___ while($rnum < 10) { $ret .= <<___; # see comments in ke128enc li $T4,$rcon[$rnum] xor $T0,$T0,$T4 ___ # right rotate by 8 if ($zbkb) { $ret .= <<___; @{[rori $T4,$T3,8]} ___ } else { $ret .= <<___; srli $T4,$T3,8 slli $T5,$T3,24 or $T4,$T4,$T5 ___ } $ret .= <<___; @{[fwdsbox4 $T0,$T4]} xor $T1,$T1,$T0 xor $T2,$T2,$T1 xor $T3,$T3,$T2 add $KEYP,$KEYP,16 ___ # need to mixcolumn only for [1:N-1] round keys # this is from the fact that aes32dsmi subwords first then mix column # intuitively decryption needs to first mix column then subwords # however, for merging datapaths (encryption first subwords then mix column) # aes32dsmi chooses to inverse the order of them, thus # transform should then be done on the round key if ($rnum < 9) { $ret .= <<___; # T4 and T5 are temp variables @{[invm4 $T5,$T4,$T0]} sw $T5,0($KEYP) @{[invm4 $T5,$T4,$T1]} sw $T5,4($KEYP) @{[invm4 $T5,$T4,$T2]} sw $T5,8($KEYP) @{[invm4 $T5,$T4,$T3]} sw $T5,12($KEYP) ___ } else { $ret .= <<___; sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) ___ } $rnum++; } return $ret; } sub ke192dec { my $zbkb = shift; my $rnum = 0; my $ret = ''; $ret .= <<___; lw $T0,0($UKEY) lw $T1,4($UKEY) lw $T2,8($UKEY) lw $T3,12($UKEY) lw $T4,16($UKEY) lw $T5,20($UKEY) sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) # see the comment in ke128dec # T7 and T6 are temp variables @{[invm4 $T7,$T6,$T4]} sw $T7,16($KEYP) @{[invm4 $T7,$T6,$T5]} sw $T7,20($KEYP) ___ while($rnum < 8) { $ret .= <<___; # see the comment in ke128enc li $T6,$rcon[$rnum] xor $T0,$T0,$T6 ___ # right rotate by 8 if ($zbkb) { $ret .= <<___; @{[rori $T6,$T5,8]} ___ } else { $ret .= <<___; srli $T6,$T5,8 slli $T7,$T5,24 or $T6,$T6,$T7 ___ } $ret .= <<___; @{[fwdsbox4 $T0,$T6]} xor $T1,$T1,$T0 xor $T2,$T2,$T1 xor $T3,$T3,$T2 add $KEYP,$KEYP,24 ___ if ($rnum < 7) { $ret .= <<___; xor $T4,$T4,$T3 xor $T5,$T5,$T4 # see the comment in ke128dec # T7 and T6 are temp variables @{[invm4 $T7,$T6,$T0]} sw $T7,0($KEYP) @{[invm4 $T7,$T6,$T1]} sw $T7,4($KEYP) @{[invm4 $T7,$T6,$T2]} sw $T7,8($KEYP) @{[invm4 $T7,$T6,$T3]} sw $T7,12($KEYP) @{[invm4 $T7,$T6,$T4]} sw $T7,16($KEYP) @{[invm4 $T7,$T6,$T5]} sw $T7,20($KEYP) ___ } else { # rnum == 7 $ret .= <<___; # the reason for dropping T4/T5 is in ke192enc # the reason for not invm4 is in ke128dec sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) ___ } $rnum++; } return $ret; } sub ke256dec { my $zbkb = shift; my $rnum = 0; my $ret = ''; $ret .= <<___; lw $T0,0($UKEY) lw $T1,4($UKEY) lw $T2,8($UKEY) lw $T3,12($UKEY) lw $T4,16($UKEY) lw $T5,20($UKEY) lw $T6,24($UKEY) lw $T7,28($UKEY) sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) # see the comment in ke128dec # BITS and T8 are temp variables # BITS are not used anymore @{[invm4 $T8,$BITS,$T4]} sw $T8,16($KEYP) @{[invm4 $T8,$BITS,$T5]} sw $T8,20($KEYP) @{[invm4 $T8,$BITS,$T6]} sw $T8,24($KEYP) @{[invm4 $T8,$BITS,$T7]} sw $T8,28($KEYP) ___ while($rnum < 7) { $ret .= <<___; # see the comment in ke128enc li $T8,$rcon[$rnum] xor $T0,$T0,$T8 ___ # right rotate by 8 if ($zbkb) { $ret .= <<___; @{[rori $T8,$T7,8]} ___ } else { $ret .= <<___; srli $T8,$T7,8 slli $BITS,$T7,24 or $T8,$T8,$BITS ___ } $ret .= <<___; @{[fwdsbox4 $T0,$T8]} xor $T1,$T1,$T0 xor $T2,$T2,$T1 xor $T3,$T3,$T2 add $KEYP,$KEYP,32 ___ if ($rnum < 6) { $ret .= <<___; # for aes256, T3->T4 needs 4sbox but no rotate/rcon @{[fwdsbox4 $T4,$T3]} xor $T5,$T5,$T4 xor $T6,$T6,$T5 xor $T7,$T7,$T6 # see the comment in ke128dec # T8 and BITS are temp variables @{[invm4 $T8,$BITS,$T0]} sw $T8,0($KEYP) @{[invm4 $T8,$BITS,$T1]} sw $T8,4($KEYP) @{[invm4 $T8,$BITS,$T2]} sw $T8,8($KEYP) @{[invm4 $T8,$BITS,$T3]} sw $T8,12($KEYP) @{[invm4 $T8,$BITS,$T4]} sw $T8,16($KEYP) @{[invm4 $T8,$BITS,$T5]} sw $T8,20($KEYP) @{[invm4 $T8,$BITS,$T6]} sw $T8,24($KEYP) @{[invm4 $T8,$BITS,$T7]} sw $T8,28($KEYP) ___ } else { $ret .= <<___; sw $T0,0($KEYP) sw $T1,4($KEYP) sw $T2,8($KEYP) sw $T3,12($KEYP) # last 16 bytes are dropped # see the comment in ke256enc ___ } $rnum++; } return $ret; } ################################################################################ # void rv32i_zknd_zkne_set_decrypt_key(const unsigned char *userKey, const int bits, # AES_KEY *key) ################################################################################ # a note on naming: set_decrypt_key needs aes32esi thus add zkne on name $code .= <<___; .text .balign 16 .globl rv32i_zknd_zkne_set_decrypt_key .type rv32i_zknd_zkne_set_decrypt_key,\@function rv32i_zknd_zkne_set_decrypt_key: ___ $code .= save_regs(); $code .= AES_set_common(ke128dec(0), ke192dec(0),ke256dec(0)); $code .= load_regs(); $code .= <<___; ret ___ ################################################################################ # void rv32i_zbkb_zknd_zkne_set_decrypt_key(const unsigned char *userKey, # const int bits, AES_KEY *key) ################################################################################ $code .= <<___; .text .balign 16 .globl rv32i_zbkb_zknd_zkne_set_decrypt_key .type rv32i_zbkb_zknd_zkne_set_decrypt_key,\@function rv32i_zbkb_zknd_zkne_set_decrypt_key: ___ $code .= save_regs(); $code .= AES_set_common(ke128dec(1), ke192dec(1),ke256dec(1)); $code .= load_regs(); $code .= <<___; ret ___ print $code; close STDOUT or die "error closing STDOUT: $!";