/*
;  riscv64-expand.S -- decompressors for riscv64
;
;  This file is part of the UPX executable compressor.
;
;  Copyright (C) 1996-2021 Markus Franz Xaver Johannes Oberhumer
;  Copyright (C) 1996-2021 Laszlo Molnar
;  Copyright (C) John F. Reiser
;  All Rights Reserved.
;
;  UPX and the UCL library are free software; you can redistribute them
;  and/or modify them under the terms of the GNU General Public License as
;  published by the Free Software Foundation; either version 2 of
;  the License, or (at your option) any later version.
;
;  This program is distributed in the hope that it will be useful,
;  but WITHOUT ANY WARRANTY; without even the implied warranty of
;  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
;  GNU General Public License for more details.
;
;  You should have received a copy of the GNU General Public License
;  along with this program; see the file COPYING.
;  If not, write to the Free Software Foundation, Inc.,
;  59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.
;
;  Markus F.X.J. Oberhumer              Laszlo Molnar
;  <markus@oberhumer.com>               <ezerotven+github@gmail.com>
;
;  John F. Reiser
;  <jreiser@users.sourceforge.net>
;
*/

NBPW = 8
#include "arch/riscv/64/macros.S"
#include "arch/riscv/64/regs.h"

.macro call  subr; jal  \subr; .endm
.macro callr subr; jalr \subr; .endm

  section EXP_HEAD

sz_unc= 0
sz_cpr= 4
b_method= 8
b_ftid=   9
b_cto8=  10
b_extra= 11
sz_binfo= 12

f_expand: .globl f_expand // f_expand(b_info *, dst, &dstlen)
// Supervisor for de-compression, un-filter, and sync_cache
// Input to supervisor:
#define fx_src arg1
#define fx_dst arg2
#define fx_dstlen arg3

F_FRAME= 8*NBPW
F_EOFDST=7*NBPW
F_EOFSRC=6*NBPW; F_ENTR= F_EOFSRC; F_PMASK= F_EOFSRC
//F_RDX=  5*NBPW
//F_LENU= 4*NBPW
//F_ADRU= 3*NBPW
//F_ELFA= 2*NBPW
//F_LENX= 1*NBPW  # 4 bytes only
//F_MFD=  1*NBPW + 4
  F_ADRX= 0*NBPW

    PUSH4 s0,s1,zero,zero  // MATCH_93  ABI callee saved; MATCH_92 F_EOFSRC, F_EOFDST
    PUSH4 fx_src,fx_dst,fx_dstlen,ra  // MATCH_95 params to unfilter and sync_cache

// Input to de-compressor:
#define meth arg5
#define tmp  arg6
        lwu tmp,sz_unc(fx_src)
        add tmp,tmp,fx_dst
        sd  tmp,F_EOFDST(sp)
    lbu meth,b_method(fx_src)
    mv arg4,fx_dstlen
    mv arg3,fx_dst
    lwu arg2,sz_cpr(fx_src)
    addi arg1,fx_src,sz_binfo
        add tmp,arg2,arg1
        sd  tmp,F_EOFSRC(sp)
#undef tmp

#include "riscv64-getbit.h"
    jal x5,decompress  # x5= t0= &getbit  Note: x5 is same register as t0
#include "riscv64-getbit.S"

decompress:  // (src *, cpr_len, dst *, &dstlen, method);  daisy chain of decompressors

#define NO_METHOD_CHECK 1  /* subsumed here by daisy chain */

#define M_NRV2B_LE32    2
#define M_NRV2B_8    3
#define M_NRV2D_LE32    5
#define M_NRV2D_8    6
#define M_NRV2E_LE32    8
#define M_NRV2E_8    9
#define M_CL1B_LE32     11
#define M_LZMA          14

// Remember that register t0 is the same as register x5, which has &getbit.
// Also: 4 arguments in a0-a3 (x10-x13: rbit, bits, ta, disp)
  section NRV2E
    li pre8,M_NRV2E_LE32; bne meth,pre8,not_nrv2e
    mv dst,arg3
    mv src,arg1
#include "arch/riscv/64/nrv2e_d.S"
not_nrv2e:

  section NRV2D
    li pre8,M_NRV2D_LE32; bne meth,pre8,not_nrv2d
    mv dst,arg3
    mv src,arg1
#include "arch/riscv/64/nrv2d_d.S"
not_nrv2d:

  section NRV2B
    li pre8,M_NRV2B_LE32; bne meth,pre8,not_nrv2b
    mv dst,arg3
    mv src,arg1
#include "arch/riscv/64/nrv2b_d.S"
not_nrv2b:

  section LZMA_DAISY
    li pre8,M_LZMA; bne meth,pre8,not_lzma
#include "arch/riscv/64/lzma_d.S"

not_lzma:

  section EXP_TAIL
// Fall through: daisy chain had no matching method
    ebreak  // EXP_TAIL
        li a0,-1
        mv a1,meth

  .globl eof

eof_lzma: .globl eof_lzma
    sd a1,F_EOFDST(sp)  # outSizeProcessed
    mv s1,a1
    sd rv,F_EOFSRC(sp)  # rv of de-compression
    j eof_all
eof_n2b:
eof_n2d:
eof_n2e:
// must agree with src/riscv64-getbit.h
#define ta  x12
#define src x14
    ld fx_src,F_ADRX(sp)
    lwu ta,sz_cpr(fx_src)  # expected consumption of src
    sub src,src,fx_src
    addi src,src,-sz_binfo  # actual consumption of src
    sub src,src,ta  # actual - expected
    sd src,F_EOFSRC(sp)  # rv of decompress
#undef src
#undef ta
    ld fx_dst,NBPW(sp)
    sub s1,dst,fx_dst  # length generated by de-compress

eof_all:
    POP4 fx_src,fx_dst,fx_dstlen,ra  // MATCH_95  fx_src,fx_dst,fx_dstlen,ra
    mv  s0,fx_dst
#undef fx_dst
    sd  s1,0(fx_dstlen)  # register s1 needed by unfiler and icache sync
#undef fx_dstlen

    lbu arg3,b_ftid(fx_src)  # filter
#undef fx_src
    beqz arg3,no_unf
      mv arg1,s0
      mv arg2,s1  // actual dstlen
#include "arch/riscv/64/bxx.S"  // unfilter code; args in registers, fall-through return
no_unf:
    mv arg1,s0
    mv arg2,s1
//memfd_create + mmap must flush icache, so sync_cache not needed
uncfail:
    POP4 s0,s1,rv,a1  # MATCH_93 ABI; MATCH_92 F_EOFSRC
    ret

upx_mmap_and_fd: .globl upx_mmap_and_fd
    // UMF_LINUX goes here

#if 0  // {BUGGY, and clobbers t0 === x5
// Instead: use pre8, rbit, ta (x9, x10, x12)  [len is val(x8)]
//copy:  // In: len, dst, disp;  Out:  dst;  trashes len, pre8
//        add rax,dst,disp; li t0,5; sgtu t0,t0,len  // <=3 is forced
//        lbu rdx,0(rax); bnez t0,copy1  // <=5 for better branch predict
//        li t0,-4; sgtu t0,disp,t0; bnez t0,copy1  // 4-byte chunks would overlap
//        addi len,len,-4  // adjust for termination cases
//        add t0,len,dst
//copy4:
//        lwu rdx,0(rax); addi rax,rax,4
//        sw  rdx,0(dst); addi dst,dst,4; bgeu dst,len,copy4
//        addi len,len,4; lbu rdx,0(rax); beqz len,copy0
//copy1:
//        addi rax,rax,1; lbu rdx,0(dst); addi len,len,-1
//            lbu rdx,0(rax)
//                addi dst,dst,1;  bnez len,copy1
//copy0:
//        ret
#endif  //}

