Re: VFS layers...

From: Robert Watson (rwatson_at_freebsd.org)
Date: 01/22/04

  • Next message: Bob Pickles: "Device Driver Deleopment"
    Date: Thu, 22 Jan 2004 13:11:32 -0500 (EST)
    To: Steven Woolgar <woolie@mac.com>
    
    

    On Wed, 21 Jan 2004, Steven Woolgar wrote:

    > I am studying the source organization and do not currently have access
    > to a BSD machine I can rebuild to include kernel debugging.
    >
    > I was wondering if someone could explain (or illustrate) the back trace
    > (call stack) of a call from int open( const char*, int ) all the way
    > through to the UFS layer.
    >
    > I am looking at the source code and am making progress, but it is long
    > and tedious work without a compiling and step- able code.
    >
    > Thanks for any help.
    >
    > Oh, and if this isn't a appropriate request for this list please tell
    > me.

    You probably want freebsd-fs, but here are two paths to ufs_open() that
    might be of interest to you:

    The first path is via the open() system call. The "missing" bit that
    isn't visible in DDB is the "macro/inline magic" that occurs in the
    context of a compiled kernel.

    db> trace
    ufs_open(cd631a70,cd631b2c,c06e40c3,cd631a70,c2364c6c) at ufs_open
    ufs_vnoperate(cd631a70,c2364c6c,4000,100,c20ce780) at ufs_vnoperate+0x18
    vn_open_cred(cd631bdc,cd631cdc,0,c2362980,3) at vn_open_cred+0x3f3
    vn_open(cd631bdc,cd631cdc,0,3,9) at vn_open+0x33
    kern_open(c20ce780,28064e9b,0,1,0) at kern_open+0xc8
    open(c20ce780,cd631d14,c,cd631d40,3) at open+0x30
    syscall(2f,2f,2f,80484b9,80484b9) at syscall+0x310
    Xint0x80_syscall() at Xint0x80_syscall+0x1d
    --- syscall (5, FreeBSD ELF32, open), eip = 0x280540af, esp = 0xbfbfe9dc,
    ebp = 0xbfbfea88 ---

    db> trace
    ufs_open(cd631a58,cd631ac4,c065bb07,cd631a58,c094c280) at ufs_open
    ufs_vnoperate(cd631a58,c094c280,c2877618,1,c2362980) at ufs_vnoperate+0x18
    exec_check_permissions(cd631b8c,11000,11000,cd631aec,c07f498d) at
    exec_check_permissions+0x107
    kern_execve(c20ce780,80c8200,80c3df0,80c8000,0) at kern_execve+0x339
    execve(c20ce780,cd631d14,c,0,3) at execve+0x30
    syscall(2f,2f,2f,80c3d70,80c8200) at syscall+0x310
    Xint0x80_syscall() at Xint0x80_syscall+0x1d
    --- syscall (59, FreeBSD ELF32, execve), eip = 0x2815fbaf, esp =
    0xbfbfa6bc, ebp = 0xbfbfa6d8 ---

    The malloc/inline magic is built from src/sys/kern/vnode_if.src when the
    kernel is compiled, and is storaged in vnode_if.[hc] in the kernel compile
    directory. Here's a sample for VOP_OPEN:

    struct vop_open_args {
            struct vnodeop_desc *a_desc;
            struct vnode *a_vp;
            int a_mode;
            struct ucred *a_cred;
            struct thread *a_td;
            int a_fdidx;
    };
    extern struct vnodeop_desc vop_open_desc;
    static __inline int VOP_OPEN(
            struct vnode *vp,
            int mode,
            struct ucred *cred,
            struct thread *td,
            int fdidx)
    {
            struct vop_open_args a;
            int rc;
            a.a_desc = VDESC(vop_open);
            a.a_vp = vp;
            a.a_mode = mode;
            a.a_cred = cred;
            a.a_td = td;
            a.a_fdidx = fdidx;
            ASSERT_VI_UNLOCKED(vp, "VOP_OPEN");
            ASSERT_VOP_LOCKED(vp, "VOP_OPEN");
            rc = VCALL(vp, VOFFSET(vop_open), &a);
            CTR5(KTR_VOP, "VOP_OPEN(vp 0x%lX, mode %ld, cred 0x%lX, td 0x%lX,
    fdidx
    %ld)", vp, mode, cred, td, fdidx);
    if (rc == 0) {
            ASSERT_VI_UNLOCKED(vp, "VOP_OPEN");
            ASSERT_VOP_LOCKED(vp, "VOP_OPEN");
    } else {
            ASSERT_VI_UNLOCKED(vp, "VOP_OPEN");
            ASSERT_VOP_LOCKED(vp, "VOP_OPEN");
    }
            return (rc);
    }

    FYI, VCALL() is implemented in src/sys/sys/vnode.h:

    #define VDESC(OP) (& __CONCAT(OP,_desc))
    ...
    #define VOFFSET(OP) (VDESC(OP)->vdesc_offset)
    ...
    #define VOCALL(OPSV,OFF,AP) (( *((OPSV)[(OFF)])) (AP))
    ...
    #define VCALL(VP,OFF,AP) VOCALL((VP)->v_op,(OFF),(AP))

    So the basic idea is that the set of operations for a vnode is compiled
    from the operation definition in vnode_if.src into a function pointer
    vector. However, all this evaluation happens inline, so in the stack
    trace you just see a call from the consumer of the VOP straight to the
    file system.

    A descriptor for each method is defined in vnode_if.c as a result of the
    compilation, and looks something like the following:

    static int vop_open_vp_offsets[] = {
            VOPARG_OFFSETOF(struct vop_open_args,a_vp),
            VDESC_NO_OFFSET
    };
    struct vnodeop_desc vop_open_desc = {
            0,
            "vop_open",
            0,
            vop_open_vp_offsets,
            VDESC_NO_OFFSET,
            VOPARG_OFFSETOF(struct vop_open_args,a_cred),
            VOPARG_OFFSETOF(struct vop_open_args,a_td),
            VDESC_NO_OFFSET,
            NULL,
    };

    This somewhat obfuscated approach makes up for a lack of object oriented
    programming in allowing the method set to be expanded at runtime, and
    opaque to the caller.

    Robert N M Watson FreeBSD Core Team, TrustedBSD Projects
    robert@fledge.watson.org Senior Research Scientist, McAfee Research

    _______________________________________________
    freebsd-arch@freebsd.org mailing list
    http://lists.freebsd.org/mailman/listinfo/freebsd-arch
    To unsubscribe, send any mail to "freebsd-arch-unsubscribe@freebsd.org"


  • Next message: Bob Pickles: "Device Driver Deleopment"

    Relevant Pages