//
// Syd: rock-solid application kernel
// src/kernel/mod.rs: Secure computing hooks
//
// Copyright (c) 2025 Ali Polatel <alip@chesswob.org>
//
// SPDX-License-Identifier: GPL-3.0

macro_rules! syscall_handler {
    ($request:ident, $body:expr) => {{
        let request_id = $request.scmpreq.id;

        match $body($request) {
            Ok(result) => result,
            // SAFETY: Harden against UnknownErrno so as not to
            // confuse the Linux API from returning no-op.
            Err(Errno::UnknownErrno) => ScmpNotifResp::new(request_id, 0, -libc::ENOSYS, 0),
            // SAFETY: ECANCELED is used by Syd internally to denote
            // requests that should be turned into no-op.
            Err(Errno::ECANCELED) => ScmpNotifResp::new(request_id, 0, 0, 0),
            Err(errno) => {
                // `ScmpNotifResp` expects negated errno.
                let errno = (errno as i32).checked_neg().unwrap_or(-libc::ENOSYS);
                ScmpNotifResp::new(request_id, 0, errno, 0)
            }
        }
    }};
}

/// access(2), faccessat(2) and faccessat2(2) handlers
pub(crate) mod access;

/// chdir(2) and fchdir(2) handlers
pub(crate) mod chdir;

/// chmod(2), fchmod(2), fchmodat(2), and fchmodat2(2) handlers
pub(crate) mod chmod;

/// chown(2), lchown(2), fchown(2), and fchownat(2) handlers
pub(crate) mod chown;

/// chroot(2) handler
pub(crate) mod chroot;

/// exec(3) handlers
pub(crate) mod exec;

/// fanotify_mark(2) handler
pub(crate) mod fanotify;

/// fcntl{,64}(2) handlers
pub(crate) mod fcntl;

/// getdents64(2) handler
pub(crate) mod getdents;

/// inotify_add_watch(2) handler
pub(crate) mod inotify;

/// ioctl(2) handlers
pub(crate) mod ioctl;

/// link(2) and linkat(2) handlers
pub(crate) mod link;

/// Memory syscall handlers
pub(crate) mod mem;

/// memfd_create(2) handler
pub(crate) mod memfd;

/// mkdir(2) and mkdirat(2) handlers
pub(crate) mod mkdir;

/// mknod(2) and mknodat(2) handlers
pub(crate) mod mknod;

/// Network syscall handlers
pub(crate) mod net;

/// creat(2), open(2), openat(2), and openat2(2) handlers
pub(crate) mod open;

/// prctl(2) handler
pub(crate) mod prctl;

/// readlink(2) and readlinkat(2) handlers
pub(crate) mod readlink;

/// rename(2), renameat(2) and renameat2(2) handlers
pub(crate) mod rename;

/// Set UID/GID syscall handlers
pub(crate) mod setid;

/// Shared memory syscall handlers
pub(crate) mod shm;

/// {,rt_}sigaction(2) handler
pub(crate) mod sigaction;

/// Signal syscall handlers
pub(crate) mod signal;

/// stat syscall handlers
pub(crate) mod stat;

/// statfs syscall handlers
pub(crate) mod statfs;

/// symlink(2) and symlinkat(2) handlers
pub(crate) mod symlink;

/// sysinfo(2) handler
pub(crate) mod sysinfo;

/// syslog(2) handler
pub(crate) mod syslog;

/// truncate and allocate handlers
pub(crate) mod truncate;

/// uname(2) handler
pub(crate) mod uname;

/// utime handlers
pub(crate) mod utime;

/// rmdir(2), unlink(2) and unlinkat(2) handlers
pub(crate) mod unlink;

/// xattr handlers
pub(crate) mod xattr;

/// ptrace(2) hooks
pub(crate) mod ptrace;

/// ptrace(2) syscall handler
pub(crate) mod sys_ptrace;

use libseccomp::ScmpNotifResp;
use nix::{
    errno::Errno,
    fcntl::{AtFlags, OFlag},
    sys::{
        signal::{kill, Signal},
        stat::{Mode, SFlag},
    },
    unistd::Pid,
};

use crate::{
    fs::{fd_status_flags, to_valid_fd},
    log::log_is_main,
    log_enabled,
    lookup::{CanonicalPath, FileInfo, FileType},
    notice,
    path::XPath,
    req::{PathArgs, SysArg, UNotifyEventRequest},
    sandbox::{Action, Capability, Sandbox, SandboxGuard},
    syslog::LogLevel,
    warn,
};

/// Process the given path argument.
#[expect(clippy::cognitive_complexity)]
pub(crate) fn sandbox_path(
    request: Option<&UNotifyEventRequest>,
    sandbox: &Sandbox,
    pid: Pid,
    path: &XPath,
    caps: Capability,
    hide: bool,
    syscall_name: &str,
) -> Result<(), Errno> {
    // Check for chroot.
    if sandbox.is_chroot() {
        return Err(if hide { Errno::ENOENT } else { Errno::EACCES });
    }

    // Convert /proc/$pid to /proc/self as necessary.
    let path = path.replace_proc_self(pid);

    // Check enabled capabilities.
    let caps_old = caps;
    let mut caps = sandbox.getcaps(caps);
    let stat = sandbox.enabled(Capability::CAP_STAT);
    if caps.is_empty() && (!hide || !stat) {
        return if caps_old.can_write() && sandbox.is_append(&path) {
            // SAFETY: Protect append-only paths against writes.
            // We use ECANCELED which will result in a no-op.
            Err(Errno::ECANCELED)
        } else {
            Ok(())
        };
    }

    let mut action = Action::Allow;
    let mut filter = false;
    let check_caps = caps & Capability::CAP_PATH;
    let mut deny_errno = if check_caps == Capability::CAP_WALK {
        Errno::ENOENT
    } else {
        Errno::EACCES
    };

    // Sandboxing.
    for cap in check_caps {
        let (new_action, new_filter) = sandbox.check_path(cap, &path);

        if new_action >= action {
            action = new_action;
            filter = new_filter;
        }
    }

    // SAFETY: Do an additional stat check to correct errno to ENOENT,
    // for sandboxing types other than Stat.
    let check_hidden = stat && hide && (caps.is_empty() || action.is_denying());
    if check_hidden || caps.contains(Capability::CAP_STAT) {
        let (new_action, new_filter) = sandbox.check_path(Capability::CAP_STAT, &path);

        if !check_hidden {
            deny_errno = Errno::ENOENT;
            action = new_action;
            filter = new_filter;
        } else if new_action.is_denying() {
            deny_errno = Errno::ENOENT;
            if caps.is_empty() {
                action = new_action;
                filter = new_filter;
                caps.insert(Capability::CAP_STAT);
            }
        }

        if path.is_rootfs() && deny_errno == Errno::ENOENT {
            // SAFETY: No point in hiding `/`.
            deny_errno = Errno::EACCES;
        }
    }

    if !filter && action >= Action::Warn && log_enabled!(LogLevel::Warn) {
        // Log warn for normal cases.
        // Log info for path hiding/walking unless explicitly specified to warn.
        let is_warn = match caps {
            Capability::CAP_STAT => !matches!(
                sandbox.default_action(Capability::CAP_STAT),
                Action::Filter | Action::Deny
            ),
            Capability::CAP_WALK => !matches!(
                sandbox.default_action(Capability::CAP_WALK),
                Action::Filter | Action::Deny
            ),
            _ => true,
        };

        if let Some(request) = request {
            let args = request.scmpreq.data.args;
            if sandbox.log_scmp() {
                if is_warn {
                    warn!("ctx": "access", "cap": caps, "act": action,
                        "sys": syscall_name,
                        "path": &path, "args": args,
                        "tip": format!("configure `allow/{}+{}'",
                            caps.to_string().to_ascii_lowercase(),
                            path),
                        "req": request);
                } else {
                    notice!("ctx": "access", "cap": caps, "act": action,
                        "sys": syscall_name,
                        "path": &path, "args": args,
                        "tip": format!("configure `allow/{}+{}'",
                            caps.to_string().to_ascii_lowercase(),
                            path),
                        "req": request);
                }
            } else if is_warn {
                warn!("ctx": "access", "cap": caps, "act": action,
                    "sys": syscall_name,
                    "path": &path, "args": args,
                    "tip": format!("configure `allow/{}+{}'",
                        caps.to_string().to_ascii_lowercase(),
                        path),
                    "pid": request.scmpreq.pid);
            } else {
                notice!("ctx": "access", "cap": caps, "act": action,
                    "sys": syscall_name,
                    "path": &path, "args": args,
                    "tip": format!("configure `allow/{}+{}'",
                        caps.to_string().to_ascii_lowercase(),
                        path),
                    "pid": request.scmpreq.pid);
            }
        } else if is_warn {
            warn!("ctx": "access", "cap": caps, "act": action,
                "sys": syscall_name, "path": &path,
                "tip": format!("configure `allow/{}+{}'",
                    caps.to_string().to_ascii_lowercase(),
                    path),
                "pid": pid.as_raw());
        } else {
            notice!("ctx": "access", "cap": caps, "act": action,
                "sys": syscall_name, "path": &path,
                "tip": format!("configure `allow/{}+{}'",
                    caps.to_string().to_ascii_lowercase(),
                    path),
                "pid": pid.as_raw());
        }
    }

    match action {
        Action::Allow | Action::Warn => {
            if caps.can_write() && sandbox.is_append(&path) {
                // SAFETY: Protect append-only paths against writes.
                // We use ECANCELED which will result in a no-op.
                Err(Errno::ECANCELED)
            } else {
                Ok(())
            }
        }
        Action::Deny | Action::Filter => Err(deny_errno),
        //Do NOT panic the main thread!
        Action::Panic if log_is_main(std::thread::current().id()) => Err(deny_errno),
        Action::Panic => panic!(),
        Action::Exit => std::process::exit(deny_errno as i32),
        Action::Stop => {
            if let Some(request) = request {
                let _ = request.pidfd_kill(libc::SIGSTOP);
            } else {
                let _ = kill(pid, Some(Signal::SIGSTOP));
            }
            Err(deny_errno)
        }
        Action::Abort => {
            if let Some(request) = request {
                let _ = request.pidfd_kill(libc::SIGABRT);
            } else {
                let _ = kill(pid, Some(Signal::SIGABRT));
            }
            Err(deny_errno)
        }
        Action::Kill => {
            if let Some(request) = request {
                let _ = request.pidfd_kill(libc::SIGKILL);
            } else {
                let _ = kill(pid, Some(Signal::SIGKILL));
            }
            Err(deny_errno)
        }
    }
}

///
/// Handles syscalls related to paths, reducing code redundancy and ensuring a uniform way of dealing with paths.
///
/// # Parameters
///
/// - `request`: User notification request from seccomp.
/// - `syscall_name`: The name of the syscall being handled, used for logging and error reporting.
/// - `arg_mappings`: Non-empty list of argument mappings containing dirfd and path indexes, if applicable.
/// - `handler`: Closure that processes the constructed canonical paths and performs additional syscall-specific operations.
///
/// # Returns
///
/// - `ScmpNotifResp`: Response indicating the result of the syscall handling.
#[expect(clippy::cognitive_complexity)]
pub(crate) fn syscall_path_handler<H>(
    request: UNotifyEventRequest,
    syscall_name: &str,
    path_argv: &[SysArg],
    handler: H,
) -> ScmpNotifResp
where
    H: Fn(PathArgs, &UNotifyEventRequest, SandboxGuard) -> Result<ScmpNotifResp, Errno>,
{
    syscall_handler!(request, |request: UNotifyEventRequest| {
        let req = request.scmpreq;

        // Determine system call capabilities.
        let mut caps = Capability::try_from((req, syscall_name))?;

        // Check if system call is FD-only.
        let is_fd = path_argv.iter().all(|arg| arg.path.is_none());

        // Check for chroot:
        //
        // Delay Chdir to allow the common `cd /` use case right after chroot(2).
        let sandbox = request.get_sandbox();
        if sandbox.is_chroot() && !is_fd && !caps.contains(Capability::CAP_CHDIR) {
            return Err(Errno::ENOENT);
        }

        // If sandboxing for all the selected capabilities is off, return immediately.
        let crypt = sandbox.enabled(Capability::CAP_CRYPT);
        let hide = !is_fd && sandbox.enabled(Capability::CAP_STAT);

        let mut magic = false;
        let mut paths: [Option<CanonicalPath>; 2] = [None, None];
        for (idx, arg) in path_argv.iter().enumerate() {
            // Handle system calls that take a FD only,
            // such as fchmod, fchown, falllocate, ftruncate,
            // fgetxattr, fsetxattr safely and efficiently.
            if arg.path.is_some() {
                let (path, is_magic, _) = request.read_path(&sandbox, *arg)?;
                magic = is_magic;

                if sandbox.is_chroot() {
                    return if caps.contains(Capability::CAP_CHDIR) && path.abs().is_rootfs() {
                        // SAFETY: Allow `cd /` after chroot.
                        Ok(unsafe { request.continue_syscall() })
                    } else {
                        // arg.path.is_some() -> is_fd = false: Return ENOENT.
                        Err(Errno::ENOENT)
                    };
                }

                paths[idx] = Some(path);
            } else if let Some(arg_idx) = arg.dirfd {
                // Validate FD argument.
                let dirfd = to_valid_fd(req.data.args[arg_idx])?;

                if sandbox.is_chroot() {
                    // Return EACCES for FD-only system calls such as fchdir.
                    return Err(if is_fd { Errno::EACCES } else { Errno::ENOENT });
                }

                if dirfd != libc::AT_FDCWD {
                    // SAFETY: Get the file descriptor before access check
                    // as it may change after which is a TOCTOU vector.
                    let fd = request.get_fd(dirfd)?;

                    // Validate WANT_READ against O_PATH.
                    if arg.fsflags.want_read() && fd_status_flags(&fd)?.contains(OFlag::O_PATH) {
                        return Err(Errno::EBADF);
                    }

                    // Handle ftruncate etc. for files with encryption in progress.
                    let crypt_path = if crypt {
                        #[expect(clippy::disallowed_methods)]
                        let files = request.cache.crypt_map.as_ref().unwrap();
                        if let Ok(info) = FileInfo::from_fd(&fd) {
                            let files = files.0.lock().unwrap_or_else(|e| e.into_inner());
                            files
                                .iter()
                                .find_map(|(path, map)| (map.info == info).then(|| path.clone()))
                            // Lock is released here.
                        } else {
                            None
                        }
                    } else {
                        None
                    };

                    let path = if let Some(crypt_path) = crypt_path {
                        CanonicalPath::new_crypt(fd.into(), crypt_path)
                    } else {
                        CanonicalPath::new_fd(fd.into(), req.pid())?
                    };

                    paths[idx] = Some(path);
                } else {
                    let path = CanonicalPath::new_fd(libc::AT_FDCWD.into(), req.pid())?;

                    paths[idx] = Some(path);
                }
            } else {
                unreachable!("BUG: Both dirfd and path are None in SysArg!");
            }
        }

        if magic && sandbox.locked_for(req.pid()) {
            // Sandbox is locked, access denied.
            return Err(Errno::ENOENT);
        }

        if !magic {
            // Call sandbox access checker, skip magic paths.
            match (&paths[0], &paths[1]) {
                (Some(path), None) => {
                    // Adjust capabilities.
                    if caps.contains(Capability::CAP_CREATE) && path.typ.is_some() {
                        caps.remove(Capability::CAP_CREATE);
                    }
                    if caps.contains(Capability::CAP_DELETE) && path.typ.is_none() {
                        caps.remove(Capability::CAP_DELETE);
                    }
                    if caps.contains(Capability::CAP_CHDIR) && path.typ != Some(FileType::Dir) {
                        caps.remove(Capability::CAP_CHDIR);
                    }
                    if caps.contains(Capability::CAP_MKDIR) && path.typ.is_some() {
                        caps.remove(Capability::CAP_MKDIR);
                    }

                    sandbox_path(
                        Some(&request),
                        &sandbox,
                        request.scmpreq.pid(), // Unused when request.is_some()
                        path.abs(),
                        caps,
                        hide,
                        syscall_name,
                    )?
                }
                (Some(path_0), Some(path_1)) => {
                    // link, linkat, rename, renameat, renameat2.
                    // All of which have RENAME capability.
                    // It's the second argument that is being
                    // created.
                    sandbox_path(
                        Some(&request),
                        &sandbox,
                        request.scmpreq.pid(), // Unused when request.is_some()
                        path_0.abs(),
                        Capability::CAP_RENAME,
                        hide,
                        syscall_name,
                    )?;

                    // Careful, rename* may overwrite, link* must create.
                    if path_1.typ.is_none() || !path_argv[1].fsflags.missing() {
                        sandbox_path(
                            Some(&request),
                            &sandbox,
                            request.scmpreq.pid(), // Unused when request.is_some()
                            path_1.abs(),
                            Capability::CAP_CREATE,
                            hide,
                            syscall_name,
                        )?;
                    }
                }
                _ => unreachable!("BUG: number of path arguments is not 1 or 2!"),
            }
        }

        // SAFETY: Path hiding is done, now it is safe to:
        //
        // 1. Return EEXIST if options had MISS_LAST.
        // 2. Return ENOTDIR for non-directories with trailing slash.
        for (idx, path) in paths.iter_mut().enumerate() {
            if let Some(path) = path {
                let arg = if let Some(arg) = path_argv.get(idx) {
                    arg
                } else {
                    break;
                };

                if arg.fsflags.missing() && path.typ.is_some() {
                    return Err(Errno::EEXIST);
                }

                if let Some(file_type) = &path.typ {
                    if !matches!(file_type, FileType::Dir | FileType::MagicLnk(_))
                        && path.abs().last() == Some(b'/')
                    {
                        return Err(Errno::ENOTDIR);
                    }
                }
            }
        }

        // Call the system call handler.
        handler(
            PathArgs(paths[0].take(), paths[1].take()),
            &request,
            sandbox,
        )
    })
}

// Convert system call argument to AtFlags safely.
// Use `valid` to limit set of valid AtFlags.
#[inline]
pub(crate) fn to_atflags(arg: u64, valid: AtFlags) -> Result<AtFlags, Errno> {
    // SAFETY: Reject undefined flags.
    let flags = arg.try_into().or(Err(Errno::EINVAL))?;

    // SAFETY: Keep invalid flags for future compat!
    let flags = AtFlags::from_bits_retain(flags);

    // SAFETY: Reject unused flags.
    if !flags.difference(valid).is_empty() {
        return Err(Errno::EINVAL);
    }

    Ok(flags)
}

// to_mode that strips unknown bits.
#[inline]
pub(crate) fn to_mode(arg: u64) -> Mode {
    // Linux VFS only honors these chmod bits (07777).
    const S_IALLUGO: libc::mode_t = libc::S_ISUID
        | libc::S_ISGID
        | libc::S_ISVTX
        | libc::S_IRWXU
        | libc::S_IRWXG
        | libc::S_IRWXO;

    #[expect(clippy::cast_possible_truncation)]
    Mode::from_bits_truncate((arg as libc::mode_t) & S_IALLUGO)
}

// to_mode that rejects unknown/invalid bits.
#[inline]
pub(crate) fn to_mode2(arg: u64) -> Result<Mode, Errno> {
    let mode = arg.try_into().or(Err(Errno::EINVAL))?;
    Mode::from_bits(mode).ok_or(Errno::EINVAL)
}

#[inline]
pub(crate) fn to_sflag(arg: u64) -> Result<SFlag, Errno> {
    let kind = arg
        .try_into()
        .map(|kind: libc::mode_t| kind & SFlag::S_IFMT.bits())
        .or(Err(Errno::EINVAL))?;

    // Careful here, zero file type is equivalent to S_IFREG.
    if kind == 0 {
        Ok(SFlag::S_IFREG)
    } else {
        SFlag::from_bits(kind).ok_or(Errno::EINVAL)
    }
}

#[cfg(test)]
mod tests {
    use super::*;
    use crate::fs::AT_EXECVE_CHECK;

    #[test]
    fn test_to_atflags() {
        let valid = AtFlags::AT_SYMLINK_NOFOLLOW | AtFlags::AT_EMPTY_PATH | AT_EXECVE_CHECK;
        assert_eq!(to_atflags(valid.bits() as u64, valid), Ok(valid));

        let invalid = AtFlags::AT_REMOVEDIR;
        assert_eq!(to_atflags(invalid.bits() as u64, valid), Err(Errno::EINVAL));
        assert_eq!(
            to_atflags((valid | invalid).bits() as u64, valid),
            Err(Errno::EINVAL)
        );
        assert_eq!(
            to_atflags((valid | invalid).bits() as u64, valid | invalid),
            Ok(valid | invalid)
        );
    }
}
